diff options
Diffstat (limited to 'tesseract/src/classify')
56 files changed, 17075 insertions, 0 deletions
diff --git a/tesseract/src/classify/adaptive.cpp b/tesseract/src/classify/adaptive.cpp new file mode 100644 index 00000000..92f0d3da --- /dev/null +++ b/tesseract/src/classify/adaptive.cpp @@ -0,0 +1,498 @@ +/****************************************************************************** + ** Filename: adaptive.c + ** Purpose: Adaptive matcher. + ** Author: Dan Johnson + ** History: Fri Mar 8 10:00:21 1991, DSJ, Created. + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#include "adaptive.h" + +#include "classify.h" + +#include <cassert> +#include <cstdio> + +namespace tesseract { + +/*---------------------------------------------------------------------------- + Public Code +----------------------------------------------------------------------------*/ +/*---------------------------------------------------------------------------*/ +/** + * This routine adds a new adapted class to an existing + * set of adapted templates. + * + * @param Templates set of templates to add new class to + * @param Class new class to add to templates + * @param ClassId class id to associate with new class + * + * @note Globals: none + */ +void AddAdaptedClass(ADAPT_TEMPLATES Templates, + ADAPT_CLASS Class, + CLASS_ID ClassId) { + INT_CLASS IntClass; + + assert (Templates != nullptr); + assert (Class != nullptr); + assert (LegalClassId (ClassId)); + assert (UnusedClassIdIn (Templates->Templates, ClassId)); + assert (Class->NumPermConfigs == 0); + + IntClass = NewIntClass (1, 1); + AddIntClass (Templates->Templates, ClassId, IntClass); + + assert (Templates->Class[ClassId] == nullptr); + Templates->Class[ClassId] = Class; + +} /* AddAdaptedClass */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine frees all memory consumed by a temporary + * configuration. + * + * @param Config config to be freed + * + * @note Globals: none + */ +void FreeTempConfig(TEMP_CONFIG Config) { + assert (Config != nullptr); + FreeBitVector (Config->Protos); + free(Config); +} /* FreeTempConfig */ + +/*---------------------------------------------------------------------------*/ +void FreeTempProto(void *arg) { + auto proto = static_cast<PROTO>(arg); + + free(proto); +} + +static void FreePermConfig(PERM_CONFIG Config) { + assert(Config != nullptr); + delete [] Config->Ambigs; + free(Config); +} + +/*---------------------------------------------------------------------------*/ +/** + * This operation allocates and initializes a new adapted + * class data structure and returns a ptr to it. + * + * @return Ptr to new class data structure. + * + * @note Globals: none + */ +ADAPT_CLASS NewAdaptedClass() { + ADAPT_CLASS Class; + + Class = static_cast<ADAPT_CLASS>(malloc (sizeof (ADAPT_CLASS_STRUCT))); + Class->NumPermConfigs = 0; + Class->MaxNumTimesSeen = 0; + Class->TempProtos = NIL_LIST; + + Class->PermProtos = NewBitVector (MAX_NUM_PROTOS); + Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS); + zero_all_bits (Class->PermProtos, WordsInVectorOfSize (MAX_NUM_PROTOS)); + zero_all_bits (Class->PermConfigs, WordsInVectorOfSize (MAX_NUM_CONFIGS)); + + for (int i = 0; i < MAX_NUM_CONFIGS; i++) + TempConfigFor (Class, i) = nullptr; + + return (Class); + +} /* NewAdaptedClass */ + + +/*-------------------------------------------------------------------------*/ +void free_adapted_class(ADAPT_CLASS adapt_class) { + for (int i = 0; i < MAX_NUM_CONFIGS; i++) { + if (ConfigIsPermanent (adapt_class, i) + && PermConfigFor (adapt_class, i) != nullptr) + FreePermConfig (PermConfigFor (adapt_class, i)); + else if (!ConfigIsPermanent (adapt_class, i) + && TempConfigFor (adapt_class, i) != nullptr) + FreeTempConfig (TempConfigFor (adapt_class, i)); + } + FreeBitVector (adapt_class->PermProtos); + FreeBitVector (adapt_class->PermConfigs); + destroy_nodes (adapt_class->TempProtos, FreeTempProto); + free(adapt_class); +} + + +/*---------------------------------------------------------------------------*/ +/** + * Allocates memory for adapted templates. + * each char in unicharset to the newly created templates + * + * @param InitFromUnicharset if true, add an empty class for + * @return Ptr to new adapted templates. + * + * @note Globals: none + */ +ADAPT_TEMPLATES Classify::NewAdaptedTemplates(bool InitFromUnicharset) { + ADAPT_TEMPLATES Templates; + + Templates = static_cast<ADAPT_TEMPLATES>(malloc (sizeof (ADAPT_TEMPLATES_STRUCT))); + + Templates->Templates = NewIntTemplates (); + Templates->NumPermClasses = 0; + Templates->NumNonEmptyClasses = 0; + + /* Insert an empty class for each unichar id in unicharset */ + for (int i = 0; i < MAX_NUM_CLASSES; i++) { + Templates->Class[i] = nullptr; + if (InitFromUnicharset && i < unicharset.size()) { + AddAdaptedClass(Templates, NewAdaptedClass(), i); + } + } + + return (Templates); + +} /* NewAdaptedTemplates */ + +// Returns FontinfoId of the given config of the given adapted class. +int Classify::GetFontinfoId(ADAPT_CLASS Class, uint8_t ConfigId) { + return (ConfigIsPermanent(Class, ConfigId) ? + PermConfigFor(Class, ConfigId)->FontinfoId : + TempConfigFor(Class, ConfigId)->FontinfoId); +} + +/*----------------------------------------------------------------------------*/ +void free_adapted_templates(ADAPT_TEMPLATES templates) { + + if (templates != nullptr) { + for (int i = 0; i < (templates->Templates)->NumClasses; i++) + free_adapted_class (templates->Class[i]); + free_int_templates (templates->Templates); + free(templates); + } +} + + +/*---------------------------------------------------------------------------*/ +/** + * This routine allocates and returns a new temporary config. + * + * @param MaxProtoId max id of any proto in new config + * @param FontinfoId font information from pre-trained templates + * @return Ptr to new temp config. + * + * @note Globals: none + */ +TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId) { + int NumProtos = MaxProtoId + 1; + + auto Config = static_cast<TEMP_CONFIG>(malloc(sizeof(TEMP_CONFIG_STRUCT))); + Config->Protos = NewBitVector (NumProtos); + + Config->NumTimesSeen = 1; + Config->MaxProtoId = MaxProtoId; + Config->ProtoVectorSize = WordsInVectorOfSize (NumProtos); + zero_all_bits (Config->Protos, Config->ProtoVectorSize); + Config->FontinfoId = FontinfoId; + + return (Config); + +} /* NewTempConfig */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine allocates and returns a new temporary proto. + * + * @return Ptr to new temporary proto. + * + * @note Globals: none + */ +TEMP_PROTO NewTempProto() { + return static_cast<TEMP_PROTO>(malloc(sizeof(TEMP_PROTO_STRUCT))); +} /* NewTempProto */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine prints a summary of the adapted templates + * in Templates to File. + * + * @param File open text file to print Templates to + * @param Templates adapted templates to print to File + * + * @note Globals: none + */ +void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) { + INT_CLASS IClass; + ADAPT_CLASS AClass; + + fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n"); + fprintf (File, "Num classes = %d; Num permanent classes = %d\n\n", + Templates->NumNonEmptyClasses, Templates->NumPermClasses); + fprintf (File, " Id NC NPC NP NPP\n"); + fprintf (File, "------------------------\n"); + + for (int i = 0; i < (Templates->Templates)->NumClasses; i++) { + IClass = Templates->Templates->Class[i]; + AClass = Templates->Class[i]; + if (!IsEmptyAdaptedClass (AClass)) { + fprintf (File, "%5d %s %3d %3d %3d %3d\n", + i, unicharset.id_to_unichar(i), + IClass->NumConfigs, AClass->NumPermConfigs, + IClass->NumProtos, + IClass->NumProtos - count (AClass->TempProtos)); + } + } + fprintf (File, "\n"); + +} /* PrintAdaptedTemplates */ + + +/*---------------------------------------------------------------------------*/ +/** + * Read an adapted class description from file and return + * a ptr to the adapted class. + * + * @param fp open file to read adapted class from + * @return Ptr to new adapted class. + * + * @note Globals: none + */ +ADAPT_CLASS ReadAdaptedClass(TFile *fp) { + int NumTempProtos; + int NumConfigs; + int i; + ADAPT_CLASS Class; + + /* first read high level adapted class structure */ + Class = static_cast<ADAPT_CLASS>(malloc (sizeof (ADAPT_CLASS_STRUCT))); + fp->FRead(Class, sizeof(ADAPT_CLASS_STRUCT), 1); + + /* then read in the definitions of the permanent protos and configs */ + Class->PermProtos = NewBitVector (MAX_NUM_PROTOS); + Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS); + fp->FRead(Class->PermProtos, sizeof(uint32_t), + WordsInVectorOfSize(MAX_NUM_PROTOS)); + fp->FRead(Class->PermConfigs, sizeof(uint32_t), + WordsInVectorOfSize(MAX_NUM_CONFIGS)); + + /* then read in the list of temporary protos */ + fp->FRead(&NumTempProtos, sizeof(int), 1); + Class->TempProtos = NIL_LIST; + for (i = 0; i < NumTempProtos; i++) { + auto TempProto = static_cast<TEMP_PROTO>(malloc(sizeof(TEMP_PROTO_STRUCT))); + fp->FRead(TempProto, sizeof(TEMP_PROTO_STRUCT), 1); + Class->TempProtos = push_last (Class->TempProtos, TempProto); + } + + /* then read in the adapted configs */ + fp->FRead(&NumConfigs, sizeof(int), 1); + for (i = 0; i < NumConfigs; i++) + if (test_bit (Class->PermConfigs, i)) + Class->Config[i].Perm = ReadPermConfig(fp); + else + Class->Config[i].Temp = ReadTempConfig(fp); + + return (Class); + +} /* ReadAdaptedClass */ + + +/*---------------------------------------------------------------------------*/ +/** + * Read a set of adapted templates from file and return + * a ptr to the templates. + * + * @param fp open text file to read adapted templates from + * @return Ptr to adapted templates read from file. + * + * @note Globals: none + */ +ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(TFile *fp) { + ADAPT_TEMPLATES Templates; + + /* first read the high level adaptive template struct */ + Templates = static_cast<ADAPT_TEMPLATES>(malloc (sizeof (ADAPT_TEMPLATES_STRUCT))); + fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1); + + /* then read in the basic integer templates */ + Templates->Templates = ReadIntTemplates(fp); + + /* then read in the adaptive info for each class */ + for (int i = 0; i < (Templates->Templates)->NumClasses; i++) { + Templates->Class[i] = ReadAdaptedClass(fp); + } + return (Templates); + +} /* ReadAdaptedTemplates */ + +/*---------------------------------------------------------------------------*/ +/** + * Read a permanent configuration description from file + * and return a ptr to it. + * + * @param fp open file to read permanent config from + * @return Ptr to new permanent configuration description. + * + * @note Globals: none + */ +PERM_CONFIG ReadPermConfig(TFile *fp) { + auto Config = static_cast<PERM_CONFIG>(malloc(sizeof(PERM_CONFIG_STRUCT))); + uint8_t NumAmbigs; + fp->FRead(&NumAmbigs, sizeof(NumAmbigs), 1); + Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1]; + fp->FRead(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs); + Config->Ambigs[NumAmbigs] = -1; + fp->FRead(&(Config->FontinfoId), sizeof(int), 1); + + return (Config); + +} /* ReadPermConfig */ + + +/*---------------------------------------------------------------------------*/ +/** + * Read a temporary configuration description from file + * and return a ptr to it. + * + * @param fp open file to read temporary config from + * @return Ptr to new temporary configuration description. + * + * @note Globals: none + */ +TEMP_CONFIG ReadTempConfig(TFile *fp) { + auto Config = static_cast<TEMP_CONFIG>(malloc(sizeof(TEMP_CONFIG_STRUCT))); + fp->FRead(Config, sizeof(TEMP_CONFIG_STRUCT), 1); + + Config->Protos = NewBitVector (Config->ProtoVectorSize * BITSINLONG); + fp->FRead(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize); + + return (Config); + +} /* ReadTempConfig */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine writes a binary representation of Class + * to File. + * + * @param File open file to write Class to + * @param Class adapted class to write to File + * @param NumConfigs number of configs in Class + * + * @note Globals: none + */ +void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs) { + int NumTempProtos; + LIST TempProtos; + int i; + + /* first write high level adapted class structure */ + fwrite(Class, sizeof(ADAPT_CLASS_STRUCT), 1, File); + + /* then write out the definitions of the permanent protos and configs */ + fwrite(Class->PermProtos, sizeof(uint32_t), + WordsInVectorOfSize(MAX_NUM_PROTOS), File); + fwrite(Class->PermConfigs, sizeof(uint32_t), + WordsInVectorOfSize(MAX_NUM_CONFIGS), File); + + /* then write out the list of temporary protos */ + NumTempProtos = count (Class->TempProtos); + fwrite(&NumTempProtos, sizeof(int), 1, File); + TempProtos = Class->TempProtos; + iterate (TempProtos) { + void* proto = first_node(TempProtos); + fwrite(proto, sizeof(TEMP_PROTO_STRUCT), 1, File); + } + + /* then write out the adapted configs */ + fwrite(&NumConfigs, sizeof(int), 1, File); + for (i = 0; i < NumConfigs; i++) + if (test_bit (Class->PermConfigs, i)) + WritePermConfig (File, Class->Config[i].Perm); + else + WriteTempConfig (File, Class->Config[i].Temp); + +} /* WriteAdaptedClass */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine saves Templates to File in a binary format. + * + * @param File open text file to write Templates to + * @param Templates set of adapted templates to write to File + * + * @note Globals: none + */ +void Classify::WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) { + int i; + + /* first write the high level adaptive template struct */ + fwrite(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1, File); + + /* then write out the basic integer templates */ + WriteIntTemplates (File, Templates->Templates, unicharset); + + /* then write out the adaptive info for each class */ + for (i = 0; i < (Templates->Templates)->NumClasses; i++) { + WriteAdaptedClass (File, Templates->Class[i], + Templates->Templates->Class[i]->NumConfigs); + } +} /* WriteAdaptedTemplates */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine writes a binary representation of a + * permanent configuration to File. + * + * @param File open file to write Config to + * @param Config permanent config to write to File + * + * @note Globals: none + */ +void WritePermConfig(FILE *File, PERM_CONFIG Config) { + uint8_t NumAmbigs = 0; + + assert (Config != nullptr); + while (Config->Ambigs[NumAmbigs] > 0) ++NumAmbigs; + + fwrite(&NumAmbigs, sizeof(uint8_t), 1, File); + fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File); + fwrite(&(Config->FontinfoId), sizeof(int), 1, File); +} /* WritePermConfig */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine writes a binary representation of a + * temporary configuration to File. + * + * @param File open file to write Config to + * @param Config temporary config to write to File + * + * @note Globals: none + */ +void WriteTempConfig(FILE *File, TEMP_CONFIG Config) { + assert (Config != nullptr); + + fwrite(Config, sizeof (TEMP_CONFIG_STRUCT), 1, File); + fwrite(Config->Protos, sizeof (uint32_t), Config->ProtoVectorSize, File); + +} /* WriteTempConfig */ + +} // namespace tesseract diff --git a/tesseract/src/classify/adaptive.h b/tesseract/src/classify/adaptive.h new file mode 100644 index 00000000..b1bf6a2e --- /dev/null +++ b/tesseract/src/classify/adaptive.h @@ -0,0 +1,128 @@ +/****************************************************************************** + ** Filename: adaptive.h + ** Purpose: Interface to adaptive matcher. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ +#ifndef ADAPTIVE_H +#define ADAPTIVE_H + +#include "intproto.h" +#include "oldlist.h" + +#include <cstdio> + +namespace tesseract { + +typedef struct { + uint16_t ProtoId; + PROTO_STRUCT Proto; +} + +TEMP_PROTO_STRUCT; +using TEMP_PROTO = TEMP_PROTO_STRUCT*; + +typedef struct { + uint8_t NumTimesSeen; + uint8_t ProtoVectorSize; + PROTO_ID MaxProtoId; + BIT_VECTOR Protos; + int FontinfoId; // font information inferred from pre-trained templates +} TEMP_CONFIG_STRUCT; +using TEMP_CONFIG = TEMP_CONFIG_STRUCT*; + +typedef struct { + UNICHAR_ID* Ambigs; + int FontinfoId; // font information inferred from pre-trained templates +} PERM_CONFIG_STRUCT; +using PERM_CONFIG = PERM_CONFIG_STRUCT*; + +typedef union { + TEMP_CONFIG Temp; + PERM_CONFIG Perm; +} ADAPTED_CONFIG; + +typedef struct { + uint8_t NumPermConfigs; + uint8_t MaxNumTimesSeen; // maximum number of times any TEMP_CONFIG was seen + // (cut at matcher_min_examples_for_prototyping) + BIT_VECTOR PermProtos; + BIT_VECTOR PermConfigs; + LIST TempProtos; + ADAPTED_CONFIG Config[MAX_NUM_CONFIGS]; +} ADAPT_CLASS_STRUCT; +using ADAPT_CLASS = ADAPT_CLASS_STRUCT*; + +typedef struct { + INT_TEMPLATES Templates; + int NumNonEmptyClasses; + uint8_t NumPermClasses; + ADAPT_CLASS Class[MAX_NUM_CLASSES]; +} ADAPT_TEMPLATES_STRUCT; +using ADAPT_TEMPLATES = ADAPT_TEMPLATES_STRUCT*; + +/*---------------------------------------------------------------------------- + Public Function Prototypes +----------------------------------------------------------------------------*/ +#define NumNonEmptyClassesIn(Template) ((Template)->NumNonEmptyClasses) + +#define IsEmptyAdaptedClass(Class) \ + ((Class)->NumPermConfigs == 0 && (Class)->TempProtos == NIL_LIST) + +#define ConfigIsPermanent(Class, ConfigId) \ + (test_bit((Class)->PermConfigs, ConfigId)) + +#define MakeConfigPermanent(Class, ConfigId) \ + (SET_BIT((Class)->PermConfigs, ConfigId)) + +#define MakeProtoPermanent(Class, ProtoId) \ + (SET_BIT((Class)->PermProtos, ProtoId)) + +#define TempConfigFor(Class, ConfigId) ((Class)->Config[ConfigId].Temp) + +#define PermConfigFor(Class, ConfigId) ((Class)->Config[ConfigId].Perm) + +#define IncreaseConfidence(TempConfig) ((TempConfig)->NumTimesSeen++) + +void AddAdaptedClass(ADAPT_TEMPLATES Templates, ADAPT_CLASS Class, + CLASS_ID ClassId); + +void FreeTempProto(void* arg); + +void FreeTempConfig(TEMP_CONFIG Config); + +ADAPT_CLASS NewAdaptedClass(); + +void free_adapted_class(ADAPT_CLASS adapt_class); + +void free_adapted_templates(ADAPT_TEMPLATES templates); + +TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId); + +TEMP_PROTO NewTempProto(); + +ADAPT_CLASS ReadAdaptedClass(tesseract::TFile* File); + +PERM_CONFIG ReadPermConfig(tesseract::TFile* File); + +TEMP_CONFIG ReadTempConfig(tesseract::TFile* File); + +void WriteAdaptedClass(FILE* File, ADAPT_CLASS Class, int NumConfigs); + +void WritePermConfig(FILE* File, PERM_CONFIG Config); + +void WriteTempConfig(FILE* File, TEMP_CONFIG Config); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/classify/adaptmatch.cpp b/tesseract/src/classify/adaptmatch.cpp new file mode 100644 index 00000000..65254b8a --- /dev/null +++ b/tesseract/src/classify/adaptmatch.cpp @@ -0,0 +1,2317 @@ +/****************************************************************************** + ** Filename: adaptmatch.cpp + ** Purpose: High level adaptive matcher. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +/*----------------------------------------------------------------------------- + Include Files and Type Defines +-----------------------------------------------------------------------------*/ +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "adaptive.h" // for ADAPT_CLASS, free_adapted_templates +#include "ambigs.h" // for UnicharIdVector, UnicharAmbigs +#include "bitvec.h" // for FreeBitVector, NewBitVector, BIT_VECTOR +#include "blobs.h" // for TBLOB, TWERD +#include "classify.h" // for Classify, CST_FRAGMENT, CST_WHOLE +#include "dict.h" // for Dict +#include "errcode.h" // for ASSERT_HOST +#include "featdefs.h" // for CharNormDesc +#include "float2int.h" // for BASELINE_Y_SHIFT +#include "fontinfo.h" // for ScoredFont, FontSet +#include "intfx.h" // for BlobToTrainingSample, INT_FX_RESULT_S... +#include "intmatcher.h" // for CP_RESULT_STRUCT, IntegerMatcher +#include "intproto.h" // for INT_FEATURE_STRUCT, (anonymous), Clas... +#include "matchdefs.h" // for CLASS_ID, FEATURE_ID, PROTO_ID, NO_PROTO +#include "mfoutline.h" // for baseline, character, MF_SCALE_FACTOR +#include "normalis.h" // for DENORM, kBlnBaselineOffset, kBlnXHeight +#include "normfeat.h" // for ActualOutlineLength, CharNormLength +#include "ocrfeatures.h" // for FEATURE_STRUCT, FreeFeatureSet, FEATURE +#include "oldlist.h" // for push, delete_d +#include "outfeat.h" // for OutlineFeatDir, OutlineFeatLength +#include "pageres.h" // for WERD_RES +#include "params.h" // for IntParam, BoolParam, DoubleParam, Str... +#include "picofeat.h" // for PicoFeatDir, PicoFeatX, PicoFeatY +#include "protos.h" // for PROTO_STRUCT, FillABC, PROTO +#include "ratngs.h" // for BLOB_CHOICE_IT, BLOB_CHOICE_LIST, BLO... +#include "rect.h" // for TBOX +#include "scrollview.h" // for ScrollView, ScrollView::BROWN, Scroll... +#include "seam.h" // for SEAM +#include "shapeclassifier.h" // for ShapeClassifier +#include "shapetable.h" // for UnicharRating, ShapeTable, Shape, Uni... +#include "tessclassifier.h" // for TessClassifier +#include "tessdatamanager.h" // for TessdataManager, TESSDATA_INTTEMP +#include "tprintf.h" // for tprintf +#include "trainingsample.h" // for TrainingSample +#include "unicharset.h" // for UNICHARSET, CHAR_FRAGMENT, UNICHAR_SPACE +#include "unicity_table.h" // for UnicityTable + +#include "genericvector.h" // for GenericVector +#include "serialis.h" // for TFile +#include "strngs.h" // for STRING +#include "helpers.h" // for IntCastRounded, ClipToRange +#include <tesseract/unichar.h> // for UNICHAR_ID, INVALID_UNICHAR_ID + +#include <algorithm> // for max, min +#include <cassert> // for assert +#include <cmath> // for fabs +#include <cstdint> // for INT32_MAX, UINT8_MAX +#include <cstdio> // for fflush, fclose, fopen, stdout, FILE +#include <cstdlib> // for malloc +#include <cstring> // for strstr, memset, strcmp + +namespace tesseract { + +#define ADAPT_TEMPLATE_SUFFIX ".a" + +#define MAX_MATCHES 10 +#define UNLIKELY_NUM_FEAT 200 +#define NO_DEBUG 0 +#define MAX_ADAPTABLE_WERD_SIZE 40 + +#define ADAPTABLE_WERD_ADJUSTMENT (0.05) + +#define Y_DIM_OFFSET (Y_SHIFT - BASELINE_Y_SHIFT) + +#define WORST_POSSIBLE_RATING (0.0f) + +struct ADAPT_RESULTS { + int32_t BlobLength; + bool HasNonfragment; + UNICHAR_ID best_unichar_id; + int best_match_index; + float best_rating; + std::vector<UnicharRating> match; + std::vector<CP_RESULT_STRUCT> CPResults; + + /// Initializes data members to the default values. Sets the initial + /// rating of each class to be the worst possible rating (1.0). + inline void Initialize() { + BlobLength = INT32_MAX; + HasNonfragment = false; + ComputeBest(); + } + // Computes best_unichar_id, best_match_index and best_rating. + void ComputeBest() { + best_unichar_id = INVALID_UNICHAR_ID; + best_match_index = -1; + best_rating = WORST_POSSIBLE_RATING; + for (int i = 0; i < match.size(); ++i) { + if (match[i].rating > best_rating) { + best_rating = match[i].rating; + best_unichar_id = match[i].unichar_id; + best_match_index = i; + } + } + } +}; + +struct PROTO_KEY { + ADAPT_TEMPLATES Templates; + CLASS_ID ClassId; + int ConfigId; +}; + +// Sort function to sort ratings appropriately by descending rating. +static bool SortDescendingRating(const UnicharRating &a, const UnicharRating &b) { + if (a.rating != b.rating) { + return a.rating > b.rating; + } else { + return a.unichar_id < b.unichar_id; + } +} + +/*----------------------------------------------------------------------------- + Private Macros +-----------------------------------------------------------------------------*/ +inline bool MarginalMatch(float confidence, float matcher_great_threshold) { + return (1.0f - confidence) > matcher_great_threshold; +} + +/*----------------------------------------------------------------------------- + Private Function Prototypes +-----------------------------------------------------------------------------*/ +// Returns the index of the given id in results, if present, or the size of the +// vector (index it will go at) if not present. +static int FindScoredUnichar(UNICHAR_ID id, const ADAPT_RESULTS& results) { + for (int i = 0; i < results.match.size(); i++) { + if (results.match[i].unichar_id == id) + return i; + } + return results.match.size(); +} + +// Returns the current rating for a unichar id if we have rated it, defaulting +// to WORST_POSSIBLE_RATING. +static float ScoredUnichar(UNICHAR_ID id, const ADAPT_RESULTS& results) { + int index = FindScoredUnichar(id, results); + if (index >= results.match.size()) return WORST_POSSIBLE_RATING; + return results.match[index].rating; +} + +void InitMatcherRatings(float *Rating); + +int MakeTempProtoPerm(void *item1, void *item2); + +void SetAdaptiveThreshold(float Threshold); + + +/*----------------------------------------------------------------------------- + Public Code +-----------------------------------------------------------------------------*/ +/** + * This routine calls the adaptive matcher + * which returns (in an array) the class id of each + * class matched. + * + * It also returns the number of classes matched. + * For each class matched it places the best rating + * found for that class into the Ratings array. + * + * Bad matches are then removed so that they don't + * need to be sorted. The remaining good matches are + * then sorted and converted to choices. + * + * This routine also performs some simple speckle + * filtering. + * + * @param Blob blob to be classified + * @param[out] Choices List of choices found by adaptive matcher. + * filled on return with the choices found by the + * class pruner and the ratings therefrom. Also + * contains the detailed results of the integer matcher. + * + */ +void Classify::AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices) { + assert(Choices != nullptr); + auto *Results = new ADAPT_RESULTS; + Results->Initialize(); + + ASSERT_HOST(AdaptedTemplates != nullptr); + + DoAdaptiveMatch(Blob, Results); + + RemoveBadMatches(Results); + std::sort(Results->match.begin(), Results->match.end(), SortDescendingRating); + RemoveExtraPuncs(Results); + Results->ComputeBest(); + ConvertMatchesToChoices(Blob->denorm(), Blob->bounding_box(), Results, + Choices); + + // TODO(rays) Move to before ConvertMatchesToChoices! + if (LargeSpeckle(*Blob) || Choices->length() == 0) + AddLargeSpeckleTo(Results->BlobLength, Choices); + + if (matcher_debug_level >= 1) { + tprintf("AD Matches = "); + PrintAdaptiveMatchResults(*Results); + } + +#ifndef GRAPHICS_DISABLED + if (classify_enable_adaptive_debugger) + DebugAdaptiveClassifier(Blob, Results); +#endif + + delete Results; +} /* AdaptiveClassifier */ + +#ifndef GRAPHICS_DISABLED + +// If *win is nullptr, sets it to a new ScrollView() object with title msg. +// Clears the window and draws baselines. +void Classify::RefreshDebugWindow(ScrollView **win, const char *msg, + int y_offset, const TBOX &wbox) { + const int kSampleSpaceWidth = 500; + if (*win == nullptr) { + *win = new ScrollView(msg, 100, y_offset, kSampleSpaceWidth * 2, 200, + kSampleSpaceWidth * 2, 200, true); + } + (*win)->Clear(); + (*win)->Pen(64, 64, 64); + (*win)->Line(-kSampleSpaceWidth, kBlnBaselineOffset, + kSampleSpaceWidth, kBlnBaselineOffset); + (*win)->Line(-kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset, + kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset); + (*win)->ZoomToRectangle(wbox.left(), wbox.top(), + wbox.right(), wbox.bottom()); +} + +#endif // !GRAPHICS_DISABLED + +// Learns the given word using its chopped_word, seam_array, denorm, +// box_word, best_state, and correct_text to learn both correctly and +// incorrectly segmented blobs. If fontname is not nullptr, then LearnBlob +// is called and the data will be saved in an internal buffer. +// Otherwise AdaptToBlob is called for adaption within a document. +void Classify::LearnWord(const char* fontname, WERD_RES* word) { + int word_len = word->correct_text.size(); + if (word_len == 0) return; + + float* thresholds = nullptr; + if (fontname == nullptr) { + // Adaption mode. + if (!EnableLearning || word->best_choice == nullptr) + return; // Can't or won't adapt. + + if (classify_learning_debug_level >= 1) + tprintf("\n\nAdapting to word = %s\n", + word->best_choice->debug_string().c_str()); + thresholds = new float[word_len]; + word->ComputeAdaptionThresholds(certainty_scale, + matcher_perfect_threshold, + matcher_good_threshold, + matcher_rating_margin, thresholds); + } + int start_blob = 0; + + #ifndef GRAPHICS_DISABLED + if (classify_debug_character_fragments) { + if (learn_fragmented_word_debug_win_ != nullptr) { + learn_fragmented_word_debug_win_->Wait(); + } + RefreshDebugWindow(&learn_fragments_debug_win_, "LearnPieces", 400, + word->chopped_word->bounding_box()); + RefreshDebugWindow(&learn_fragmented_word_debug_win_, "LearnWord", 200, + word->chopped_word->bounding_box()); + word->chopped_word->plot(learn_fragmented_word_debug_win_); + ScrollView::Update(); + } + #endif // !GRAPHICS_DISABLED + + for (int ch = 0; ch < word_len; ++ch) { + if (classify_debug_character_fragments) { + tprintf("\nLearning %s\n", word->correct_text[ch].c_str()); + } + if (word->correct_text[ch].length() > 0) { + float threshold = thresholds != nullptr ? thresholds[ch] : 0.0f; + + LearnPieces(fontname, start_blob, word->best_state[ch], threshold, + CST_WHOLE, word->correct_text[ch].c_str(), word); + + if (word->best_state[ch] > 1 && !disable_character_fragments) { + // Check that the character breaks into meaningful fragments + // that each match a whole character with at least + // classify_character_fragments_garbage_certainty_threshold + bool garbage = false; + int frag; + for (frag = 0; frag < word->best_state[ch]; ++frag) { + TBLOB* frag_blob = word->chopped_word->blobs[start_blob + frag]; + if (classify_character_fragments_garbage_certainty_threshold < 0) { + garbage |= LooksLikeGarbage(frag_blob); + } + } + // Learn the fragments. + if (!garbage) { + bool pieces_all_natural = word->PiecesAllNatural(start_blob, + word->best_state[ch]); + if (pieces_all_natural || !prioritize_division) { + for (frag = 0; frag < word->best_state[ch]; ++frag) { + std::vector<STRING> tokens; + word->correct_text[ch].split(' ', &tokens); + + tokens[0] = CHAR_FRAGMENT::to_string( + tokens[0].c_str(), frag, word->best_state[ch], + pieces_all_natural); + + STRING full_string; + for (int i = 0; i < tokens.size(); i++) { + full_string += tokens[i]; + if (i != tokens.size() - 1) + full_string += ' '; + } + LearnPieces(fontname, start_blob + frag, 1, threshold, + CST_FRAGMENT, full_string.c_str(), word); + } + } + } + } + + // TODO(rays): re-enable this part of the code when we switch to the + // new classifier that needs to see examples of garbage. + /* + if (word->best_state[ch] > 1) { + // If the next blob is good, make junk with the rightmost fragment. + if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) { + LearnPieces(fontname, start_blob + word->best_state[ch] - 1, + word->best_state[ch + 1] + 1, + threshold, CST_IMPROPER, INVALID_UNICHAR, word); + } + // If the previous blob is good, make junk with the leftmost fragment. + if (ch > 0 && word->correct_text[ch - 1].length() > 0) { + LearnPieces(fontname, start_blob - word->best_state[ch - 1], + word->best_state[ch - 1] + 1, + threshold, CST_IMPROPER, INVALID_UNICHAR, word); + } + } + // If the next blob is good, make a join with it. + if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) { + STRING joined_text = word->correct_text[ch]; + joined_text += word->correct_text[ch + 1]; + LearnPieces(fontname, start_blob, + word->best_state[ch] + word->best_state[ch + 1], + threshold, CST_NGRAM, joined_text.c_str(), word); + } + */ + } + start_blob += word->best_state[ch]; + } + delete [] thresholds; +} // LearnWord. + +// Builds a blob of length fragments, from the word, starting at start, +// and then learns it, as having the given correct_text. +// If fontname is not nullptr, then LearnBlob is called and the data will be +// saved in an internal buffer for static training. +// Otherwise AdaptToBlob is called for adaption within a document. +// threshold is a magic number required by AdaptToChar and generated by +// ComputeAdaptionThresholds. +// Although it can be partly inferred from the string, segmentation is +// provided to explicitly clarify the character segmentation. +void Classify::LearnPieces(const char* fontname, int start, int length, + float threshold, CharSegmentationType segmentation, + const char* correct_text, WERD_RES* word) { + // TODO(daria) Remove/modify this if/when we want + // to train and/or adapt to n-grams. + if (segmentation != CST_WHOLE && + (segmentation != CST_FRAGMENT || disable_character_fragments)) + return; + + if (length > 1) { + SEAM::JoinPieces(word->seam_array, word->chopped_word->blobs, start, + start + length - 1); + } + TBLOB* blob = word->chopped_word->blobs[start]; + // Rotate the blob if needed for classification. + TBLOB* rotated_blob = blob->ClassifyNormalizeIfNeeded(); + if (rotated_blob == nullptr) + rotated_blob = blob; + + #ifndef GRAPHICS_DISABLED + // Draw debug windows showing the blob that is being learned if needed. + if (strcmp(classify_learn_debug_str.c_str(), correct_text) == 0) { + RefreshDebugWindow(&learn_debug_win_, "LearnPieces", 600, + word->chopped_word->bounding_box()); + rotated_blob->plot(learn_debug_win_, ScrollView::GREEN, ScrollView::BROWN); + learn_debug_win_->Update(); + learn_debug_win_->Wait(); + } + if (classify_debug_character_fragments && segmentation == CST_FRAGMENT) { + ASSERT_HOST(learn_fragments_debug_win_ != nullptr); // set up in LearnWord + blob->plot(learn_fragments_debug_win_, + ScrollView::BLUE, ScrollView::BROWN); + learn_fragments_debug_win_->Update(); + } + #endif // !GRAPHICS_DISABLED + + if (fontname != nullptr) { + classify_norm_method.set_value(character); // force char norm spc 30/11/93 + tess_bn_matching.set_value(false); // turn it off + tess_cn_matching.set_value(false); + DENORM bl_denorm, cn_denorm; + INT_FX_RESULT_STRUCT fx_info; + SetupBLCNDenorms(*rotated_blob, classify_nonlinear_norm, + &bl_denorm, &cn_denorm, &fx_info); + LearnBlob(fontname, rotated_blob, cn_denorm, fx_info, correct_text); + } else if (unicharset.contains_unichar(correct_text)) { + UNICHAR_ID class_id = unicharset.unichar_to_id(correct_text); + int font_id = word->fontinfo != nullptr + ? fontinfo_table_.get_id(*word->fontinfo) + : 0; + if (classify_learning_debug_level >= 1) + tprintf("Adapting to char = %s, thr= %g font_id= %d\n", + unicharset.id_to_unichar(class_id), threshold, font_id); + // If filename is not nullptr we are doing recognition + // (as opposed to training), so we must have already set word fonts. + AdaptToChar(rotated_blob, class_id, font_id, threshold, AdaptedTemplates); + if (BackupAdaptedTemplates != nullptr) { + // Adapt the backup templates too. They will be used if the primary gets + // too full. + AdaptToChar(rotated_blob, class_id, font_id, threshold, + BackupAdaptedTemplates); + } + } else if (classify_debug_level >= 1) { + tprintf("Can't adapt to %s not in unicharset\n", correct_text); + } + if (rotated_blob != blob) { + delete rotated_blob; + } + + SEAM::BreakPieces(word->seam_array, word->chopped_word->blobs, start, + start + length - 1); +} // LearnPieces. + +/*---------------------------------------------------------------------------*/ +/** + * This routine performs cleanup operations + * on the adaptive classifier. It should be called + * before the program is terminated. Its main function + * is to save the adapted templates to a file. + * + * Globals: + * - #AdaptedTemplates current set of adapted templates + * - #classify_save_adapted_templates true if templates should be saved + * - #classify_enable_adaptive_matcher true if adaptive matcher is enabled + */ +void Classify::EndAdaptiveClassifier() { + STRING Filename; + FILE *File; + + if (AdaptedTemplates != nullptr && + classify_enable_adaptive_matcher && classify_save_adapted_templates) { + Filename = imagefile + ADAPT_TEMPLATE_SUFFIX; + File = fopen (Filename.c_str(), "wb"); + if (File == nullptr) + tprintf ("Unable to save adapted templates to %s!\n", Filename.c_str()); + else { + tprintf ("\nSaving adapted templates to %s ...", Filename.c_str()); + fflush(stdout); + WriteAdaptedTemplates(File, AdaptedTemplates); + tprintf ("\n"); + fclose(File); + } + } + + if (AdaptedTemplates != nullptr) { + free_adapted_templates(AdaptedTemplates); + AdaptedTemplates = nullptr; + } + if (BackupAdaptedTemplates != nullptr) { + free_adapted_templates(BackupAdaptedTemplates); + BackupAdaptedTemplates = nullptr; + } + + if (PreTrainedTemplates != nullptr) { + free_int_templates(PreTrainedTemplates); + PreTrainedTemplates = nullptr; + } + getDict().EndDangerousAmbigs(); + FreeNormProtos(); + if (AllProtosOn != nullptr) { + FreeBitVector(AllProtosOn); + FreeBitVector(AllConfigsOn); + FreeBitVector(AllConfigsOff); + FreeBitVector(TempProtoMask); + AllProtosOn = nullptr; + AllConfigsOn = nullptr; + AllConfigsOff = nullptr; + TempProtoMask = nullptr; + } + delete shape_table_; + shape_table_ = nullptr; + delete static_classifier_; + static_classifier_ = nullptr; +} /* EndAdaptiveClassifier */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine reads in the training + * information needed by the adaptive classifier + * and saves it into global variables. + * Parameters: + * load_pre_trained_templates Indicates whether the pre-trained + * templates (inttemp, normproto and pffmtable components) + * should be loaded. Should only be set to true if the + * necessary classifier components are present in the + * [lang].traineddata file. + * Globals: + * BuiltInTemplatesFile file to get built-in temps from + * BuiltInCutoffsFile file to get avg. feat per class from + * classify_use_pre_adapted_templates + * enables use of pre-adapted templates + */ +void Classify::InitAdaptiveClassifier(TessdataManager* mgr) { + if (!classify_enable_adaptive_matcher) + return; + if (AllProtosOn != nullptr) + EndAdaptiveClassifier(); // Don't leak with multiple inits. + + // If there is no language_data_path_prefix, the classifier will be + // adaptive only. + if (language_data_path_prefix.length() > 0 && mgr != nullptr) { + TFile fp; + ASSERT_HOST(mgr->GetComponent(TESSDATA_INTTEMP, &fp)); + PreTrainedTemplates = ReadIntTemplates(&fp); + + if (mgr->GetComponent(TESSDATA_SHAPE_TABLE, &fp)) { + shape_table_ = new ShapeTable(unicharset); + if (!shape_table_->DeSerialize(&fp)) { + tprintf("Error loading shape table!\n"); + delete shape_table_; + shape_table_ = nullptr; + } + } + + ASSERT_HOST(mgr->GetComponent(TESSDATA_PFFMTABLE, &fp)); + ReadNewCutoffs(&fp, CharNormCutoffs); + + ASSERT_HOST(mgr->GetComponent(TESSDATA_NORMPROTO, &fp)); + NormProtos = ReadNormProtos(&fp); + static_classifier_ = new TessClassifier(false, this); + } + + InitIntegerFX(); + + AllProtosOn = NewBitVector(MAX_NUM_PROTOS); + AllConfigsOn = NewBitVector(MAX_NUM_CONFIGS); + AllConfigsOff = NewBitVector(MAX_NUM_CONFIGS); + TempProtoMask = NewBitVector(MAX_NUM_PROTOS); + set_all_bits(AllProtosOn, WordsInVectorOfSize(MAX_NUM_PROTOS)); + set_all_bits(AllConfigsOn, WordsInVectorOfSize(MAX_NUM_CONFIGS)); + zero_all_bits(AllConfigsOff, WordsInVectorOfSize(MAX_NUM_CONFIGS)); + + for (uint16_t& BaselineCutoff : BaselineCutoffs) { + BaselineCutoff = 0; + } + + if (classify_use_pre_adapted_templates) { + TFile fp; + STRING Filename; + + Filename = imagefile; + Filename += ADAPT_TEMPLATE_SUFFIX; + if (!fp.Open(Filename.c_str(), nullptr)) { + AdaptedTemplates = NewAdaptedTemplates(true); + } else { + tprintf("\nReading pre-adapted templates from %s ...\n", + Filename.c_str()); + fflush(stdout); + AdaptedTemplates = ReadAdaptedTemplates(&fp); + tprintf("\n"); + PrintAdaptedTemplates(stdout, AdaptedTemplates); + + for (int i = 0; i < AdaptedTemplates->Templates->NumClasses; i++) { + BaselineCutoffs[i] = CharNormCutoffs[i]; + } + } + } else { + if (AdaptedTemplates != nullptr) + free_adapted_templates(AdaptedTemplates); + AdaptedTemplates = NewAdaptedTemplates(true); + } +} /* InitAdaptiveClassifier */ + +void Classify::ResetAdaptiveClassifierInternal() { + if (classify_learning_debug_level > 0) { + tprintf("Resetting adaptive classifier (NumAdaptationsFailed=%d)\n", + NumAdaptationsFailed); + } + free_adapted_templates(AdaptedTemplates); + AdaptedTemplates = NewAdaptedTemplates(true); + if (BackupAdaptedTemplates != nullptr) + free_adapted_templates(BackupAdaptedTemplates); + BackupAdaptedTemplates = nullptr; + NumAdaptationsFailed = 0; +} + +// If there are backup adapted templates, switches to those, otherwise resets +// the main adaptive classifier (because it is full.) +void Classify::SwitchAdaptiveClassifier() { + if (BackupAdaptedTemplates == nullptr) { + ResetAdaptiveClassifierInternal(); + return; + } + if (classify_learning_debug_level > 0) { + tprintf("Switch to backup adaptive classifier (NumAdaptationsFailed=%d)\n", + NumAdaptationsFailed); + } + free_adapted_templates(AdaptedTemplates); + AdaptedTemplates = BackupAdaptedTemplates; + BackupAdaptedTemplates = nullptr; + NumAdaptationsFailed = 0; +} + +// Resets the backup adaptive classifier to empty. +void Classify::StartBackupAdaptiveClassifier() { + if (BackupAdaptedTemplates != nullptr) + free_adapted_templates(BackupAdaptedTemplates); + BackupAdaptedTemplates = NewAdaptedTemplates(true); +} + +/*---------------------------------------------------------------------------*/ +/** + * This routine prepares the adaptive + * matcher for the start + * of the first pass. Learning is enabled (unless it + * is disabled for the whole program). + * + * @note this is somewhat redundant, it simply says that if learning is + * enabled then it will remain enabled on the first pass. If it is + * disabled, then it will remain disabled. This is only put here to + * make it very clear that learning is controlled directly by the global + * setting of EnableLearning. + * + * Globals: + * - #EnableLearning + * set to true by this routine + */ +void Classify::SettupPass1() { + EnableLearning = classify_enable_learning; + + getDict().SettupStopperPass1(); + +} /* SettupPass1 */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine prepares the adaptive + * matcher for the start of the second pass. Further + * learning is disabled. + * + * Globals: + * - #EnableLearning set to false by this routine + */ +void Classify::SettupPass2() { + EnableLearning = false; + getDict().SettupStopperPass2(); + +} /* SettupPass2 */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine creates a new adapted + * class and uses Blob as the model for the first + * config in that class. + * + * @param Blob blob to model new class after + * @param ClassId id of the class to be initialized + * @param FontinfoId font information inferred from pre-trained templates + * @param Class adapted class to be initialized + * @param Templates adapted templates to add new class to + * + * Globals: + * - #AllProtosOn dummy mask with all 1's + * - BaselineCutoffs kludge needed to get cutoffs + * - #PreTrainedTemplates kludge needed to get cutoffs + */ +void Classify::InitAdaptedClass(TBLOB *Blob, + CLASS_ID ClassId, + int FontinfoId, + ADAPT_CLASS Class, + ADAPT_TEMPLATES Templates) { + FEATURE_SET Features; + int Fid, Pid; + FEATURE Feature; + int NumFeatures; + TEMP_PROTO TempProto; + PROTO Proto; + INT_CLASS IClass; + TEMP_CONFIG Config; + + classify_norm_method.set_value(baseline); + Features = ExtractOutlineFeatures(Blob); + NumFeatures = Features->NumFeatures; + if (NumFeatures > UNLIKELY_NUM_FEAT || NumFeatures <= 0) { + FreeFeatureSet(Features); + return; + } + + Config = NewTempConfig(NumFeatures - 1, FontinfoId); + TempConfigFor(Class, 0) = Config; + + /* this is a kludge to construct cutoffs for adapted templates */ + if (Templates == AdaptedTemplates) + BaselineCutoffs[ClassId] = CharNormCutoffs[ClassId]; + + IClass = ClassForClassId (Templates->Templates, ClassId); + + for (Fid = 0; Fid < Features->NumFeatures; Fid++) { + Pid = AddIntProto (IClass); + assert (Pid != NO_PROTO); + + Feature = Features->Features[Fid]; + TempProto = NewTempProto (); + Proto = &(TempProto->Proto); + + /* compute proto params - NOTE that Y_DIM_OFFSET must be used because + ConvertProto assumes that the Y dimension varies from -0.5 to 0.5 + instead of the -0.25 to 0.75 used in baseline normalization */ + Proto->Angle = Feature->Params[OutlineFeatDir]; + Proto->X = Feature->Params[OutlineFeatX]; + Proto->Y = Feature->Params[OutlineFeatY] - Y_DIM_OFFSET; + Proto->Length = Feature->Params[OutlineFeatLength]; + FillABC(Proto); + + TempProto->ProtoId = Pid; + SET_BIT (Config->Protos, Pid); + + ConvertProto(Proto, Pid, IClass); + AddProtoToProtoPruner(Proto, Pid, IClass, + classify_learning_debug_level >= 2); + + Class->TempProtos = push (Class->TempProtos, TempProto); + } + FreeFeatureSet(Features); + + AddIntConfig(IClass); + ConvertConfig (AllProtosOn, 0, IClass); + + if (classify_learning_debug_level >= 1) { + tprintf("Added new class '%s' with class id %d and %d protos.\n", + unicharset.id_to_unichar(ClassId), ClassId, NumFeatures); +#ifndef GRAPHICS_DISABLED + if (classify_learning_debug_level > 1) + DisplayAdaptedChar(Blob, IClass); +#endif + } + + if (IsEmptyAdaptedClass(Class)) + (Templates->NumNonEmptyClasses)++; +} /* InitAdaptedClass */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine sets up the feature + * extractor to extract baseline normalized + * pico-features. + * + * The extracted pico-features are converted + * to integer form and placed in IntFeatures. The + * original floating-pt. features are returned in + * FloatFeatures. + * + * Globals: none + * @param Blob blob to extract features from + * @param[out] IntFeatures array to fill with integer features + * @param[out] FloatFeatures place to return actual floating-pt features + * + * @return Number of pico-features returned (0 if + * an error occurred) + */ +int Classify::GetAdaptiveFeatures(TBLOB *Blob, + INT_FEATURE_ARRAY IntFeatures, + FEATURE_SET *FloatFeatures) { + FEATURE_SET Features; + int NumFeatures; + + classify_norm_method.set_value(baseline); + Features = ExtractPicoFeatures(Blob); + + NumFeatures = Features->NumFeatures; + if (NumFeatures == 0 || NumFeatures > UNLIKELY_NUM_FEAT) { + FreeFeatureSet(Features); + return 0; + } + + ComputeIntFeatures(Features, IntFeatures); + *FloatFeatures = Features; + + return NumFeatures; +} /* GetAdaptiveFeatures */ + + +/*----------------------------------------------------------------------------- + Private Code +-----------------------------------------------------------------------------*/ +/*---------------------------------------------------------------------------*/ +/** + * Return true if the specified word is acceptable for adaptation. + * + * Globals: none + * + * @param word current word + * + * @return true or false + */ +bool Classify::AdaptableWord(WERD_RES* word) { + if (word->best_choice == nullptr) return false; + int BestChoiceLength = word->best_choice->length(); + float adaptable_score = + getDict().segment_penalty_dict_case_ok + ADAPTABLE_WERD_ADJUSTMENT; + return // rules that apply in general - simplest to compute first + BestChoiceLength > 0 && + BestChoiceLength == word->rebuild_word->NumBlobs() && + BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE && + // This basically ensures that the word is at least a dictionary match + // (freq word, user word, system dawg word, etc). + // Since all the other adjustments will make adjust factor higher + // than higher than adaptable_score=1.1+0.05=1.15 + // Since these are other flags that ensure that the word is dict word, + // this check could be at times redundant. + word->best_choice->adjust_factor() <= adaptable_score && + // Make sure that alternative choices are not dictionary words. + word->AlternativeChoiceAdjustmentsWorseThan(adaptable_score); +} + +/*---------------------------------------------------------------------------*/ +/** + * @param Blob blob to add to templates for ClassId + * @param ClassId class to add blob to + * @param FontinfoId font information from pre-trained templates + * @param Threshold minimum match rating to existing template + * @param adaptive_templates current set of adapted templates + * + * Globals: + * - AllProtosOn dummy mask to match against all protos + * - AllConfigsOn dummy mask to match against all configs + */ +void Classify::AdaptToChar(TBLOB* Blob, CLASS_ID ClassId, int FontinfoId, + float Threshold, + ADAPT_TEMPLATES adaptive_templates) { + int NumFeatures; + INT_FEATURE_ARRAY IntFeatures; + UnicharRating int_result; + INT_CLASS IClass; + ADAPT_CLASS Class; + TEMP_CONFIG TempConfig; + FEATURE_SET FloatFeatures; + int NewTempConfigId; + + if (!LegalClassId (ClassId)) + return; + + int_result.unichar_id = ClassId; + Class = adaptive_templates->Class[ClassId]; + assert(Class != nullptr); + if (IsEmptyAdaptedClass(Class)) { + InitAdaptedClass(Blob, ClassId, FontinfoId, Class, adaptive_templates); + } else { + IClass = ClassForClassId(adaptive_templates->Templates, ClassId); + + NumFeatures = GetAdaptiveFeatures(Blob, IntFeatures, &FloatFeatures); + if (NumFeatures <= 0) { + return; // Features already freed by GetAdaptiveFeatures. + } + + // Only match configs with the matching font. + BIT_VECTOR MatchingFontConfigs = NewBitVector(MAX_NUM_PROTOS); + for (int cfg = 0; cfg < IClass->NumConfigs; ++cfg) { + if (GetFontinfoId(Class, cfg) == FontinfoId) { + SET_BIT(MatchingFontConfigs, cfg); + } else { + reset_bit(MatchingFontConfigs, cfg); + } + } + im_.Match(IClass, AllProtosOn, MatchingFontConfigs, + NumFeatures, IntFeatures, + &int_result, classify_adapt_feature_threshold, + NO_DEBUG, matcher_debug_separate_windows); + FreeBitVector(MatchingFontConfigs); + + SetAdaptiveThreshold(Threshold); + + if (1.0f - int_result.rating <= Threshold) { + if (ConfigIsPermanent(Class, int_result.config)) { + if (classify_learning_debug_level >= 1) + tprintf("Found good match to perm config %d = %4.1f%%.\n", + int_result.config, int_result.rating * 100.0); + FreeFeatureSet(FloatFeatures); + return; + } + + TempConfig = TempConfigFor(Class, int_result.config); + IncreaseConfidence(TempConfig); + if (TempConfig->NumTimesSeen > Class->MaxNumTimesSeen) { + Class->MaxNumTimesSeen = TempConfig->NumTimesSeen; + } + if (classify_learning_debug_level >= 1) + tprintf("Increasing reliability of temp config %d to %d.\n", + int_result.config, TempConfig->NumTimesSeen); + + if (TempConfigReliable(ClassId, TempConfig)) { + MakePermanent(adaptive_templates, ClassId, int_result.config, Blob); + UpdateAmbigsGroup(ClassId, Blob); + } + } else { + if (classify_learning_debug_level >= 1) { + tprintf("Found poor match to temp config %d = %4.1f%%.\n", + int_result.config, int_result.rating * 100.0); +#ifndef GRAPHICS_DISABLED + if (classify_learning_debug_level > 2) + DisplayAdaptedChar(Blob, IClass); +#endif + } + NewTempConfigId = + MakeNewTemporaryConfig(adaptive_templates, ClassId, FontinfoId, + NumFeatures, IntFeatures, FloatFeatures); + if (NewTempConfigId >= 0 && + TempConfigReliable(ClassId, TempConfigFor(Class, NewTempConfigId))) { + MakePermanent(adaptive_templates, ClassId, NewTempConfigId, Blob); + UpdateAmbigsGroup(ClassId, Blob); + } + +#ifndef GRAPHICS_DISABLED + if (classify_learning_debug_level > 1) { + DisplayAdaptedChar(Blob, IClass); + } +#endif + } + FreeFeatureSet(FloatFeatures); + } +} /* AdaptToChar */ + +#ifndef GRAPHICS_DISABLED + +void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) { + INT_FX_RESULT_STRUCT fx_info; + std::vector<INT_FEATURE_STRUCT> bl_features; + TrainingSample* sample = + BlobToTrainingSample(*blob, classify_nonlinear_norm, &fx_info, + &bl_features); + if (sample == nullptr) return; + + UnicharRating int_result; + im_.Match(int_class, AllProtosOn, AllConfigsOn, + bl_features.size(), &bl_features[0], + &int_result, classify_adapt_feature_threshold, + NO_DEBUG, matcher_debug_separate_windows); + tprintf("Best match to temp config %d = %4.1f%%.\n", + int_result.config, int_result.rating * 100.0); + if (classify_learning_debug_level >= 2) { + uint32_t ConfigMask; + ConfigMask = 1 << int_result.config; + ShowMatchDisplay(); + im_.Match(int_class, AllProtosOn, static_cast<BIT_VECTOR>(&ConfigMask), + bl_features.size(), &bl_features[0], + &int_result, classify_adapt_feature_threshold, + 6 | 0x19, matcher_debug_separate_windows); + UpdateMatchDisplay(); + } + + delete sample; +} + +#endif + +/** + * This routine adds the result of a classification into + * Results. If the new rating is much worse than the current + * best rating, it is not entered into results because it + * would end up being stripped later anyway. If the new rating + * is better than the old rating for the class, it replaces the + * old rating. If this is the first rating for the class, the + * class is added to the list of matched classes in Results. + * If the new rating is better than the best so far, it + * becomes the best so far. + * + * Globals: + * - #matcher_bad_match_pad defines limits of an acceptable match + * + * @param new_result new result to add + * @param[out] results results to add new result to + */ +void Classify::AddNewResult(const UnicharRating& new_result, + ADAPT_RESULTS *results) { + int old_match = FindScoredUnichar(new_result.unichar_id, *results); + + if (new_result.rating + matcher_bad_match_pad < results->best_rating || + (old_match < results->match.size() && + new_result.rating <= results->match[old_match].rating)) + return; // New one not good enough. + + if (!unicharset.get_fragment(new_result.unichar_id)) + results->HasNonfragment = true; + + if (old_match < results->match.size()) { + results->match[old_match].rating = new_result.rating; + } else { + results->match.push_back(new_result); + } + + if (new_result.rating > results->best_rating && + // Ensure that fragments do not affect best rating, class and config. + // This is needed so that at least one non-fragmented character is + // always present in the results. + // TODO(daria): verify that this helps accuracy and does not + // hurt performance. + !unicharset.get_fragment(new_result.unichar_id)) { + results->best_match_index = old_match; + results->best_rating = new_result.rating; + results->best_unichar_id = new_result.unichar_id; + } +} /* AddNewResult */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine is identical to CharNormClassifier() + * except that it does no class pruning. It simply matches + * the unknown blob against the classes listed in + * Ambiguities. + * + * Globals: + * - #AllProtosOn mask that enables all protos + * - #AllConfigsOn mask that enables all configs + * + * @param blob blob to be classified + * @param templates built-in templates to classify against + * @param classes adapted class templates + * @param ambiguities array of unichar id's to match against + * @param[out] results place to put match results + * @param int_features + * @param fx_info + */ +void Classify::AmbigClassifier( + const std::vector<INT_FEATURE_STRUCT>& int_features, + const INT_FX_RESULT_STRUCT& fx_info, + const TBLOB *blob, + INT_TEMPLATES templates, + ADAPT_CLASS *classes, + UNICHAR_ID *ambiguities, + ADAPT_RESULTS *results) { + if (int_features.empty()) return; + auto* CharNormArray = new uint8_t[unicharset.size()]; + UnicharRating int_result; + + results->BlobLength = GetCharNormFeature(fx_info, templates, nullptr, + CharNormArray); + bool debug = matcher_debug_level >= 2 || classify_debug_level > 1; + if (debug) + tprintf("AM Matches = "); + + int top = blob->bounding_box().top(); + int bottom = blob->bounding_box().bottom(); + while (*ambiguities >= 0) { + CLASS_ID class_id = *ambiguities; + + int_result.unichar_id = class_id; + im_.Match(ClassForClassId(templates, class_id), + AllProtosOn, AllConfigsOn, + int_features.size(), &int_features[0], + &int_result, + classify_adapt_feature_threshold, NO_DEBUG, + matcher_debug_separate_windows); + + ExpandShapesAndApplyCorrections(nullptr, debug, class_id, bottom, top, 0, + results->BlobLength, + classify_integer_matcher_multiplier, + CharNormArray, &int_result, results); + ambiguities++; + } + delete [] CharNormArray; +} /* AmbigClassifier */ + +/*---------------------------------------------------------------------------*/ +/// Factored-out calls to IntegerMatcher based on class pruner results. +/// Returns integer matcher results inside CLASS_PRUNER_RESULTS structure. +void Classify::MasterMatcher(INT_TEMPLATES templates, + int16_t num_features, + const INT_FEATURE_STRUCT* features, + const uint8_t* norm_factors, + ADAPT_CLASS* classes, + int debug, + int matcher_multiplier, + const TBOX& blob_box, + const std::vector<CP_RESULT_STRUCT>& results, + ADAPT_RESULTS* final_results) { + int top = blob_box.top(); + int bottom = blob_box.bottom(); + UnicharRating int_result; + for (int c = 0; c < results.size(); c++) { + CLASS_ID class_id = results[c].Class; + BIT_VECTOR protos = classes != nullptr ? classes[class_id]->PermProtos + : AllProtosOn; + BIT_VECTOR configs = classes != nullptr ? classes[class_id]->PermConfigs + : AllConfigsOn; + + int_result.unichar_id = class_id; + im_.Match(ClassForClassId(templates, class_id), + protos, configs, + num_features, features, + &int_result, classify_adapt_feature_threshold, debug, + matcher_debug_separate_windows); + bool is_debug = matcher_debug_level >= 2 || classify_debug_level > 1; + ExpandShapesAndApplyCorrections(classes, is_debug, class_id, bottom, top, + results[c].Rating, + final_results->BlobLength, + matcher_multiplier, norm_factors, + &int_result, final_results); + } +} + +// Converts configs to fonts, and if the result is not adapted, and a +// shape_table_ is present, the shape is expanded to include all +// unichar_ids represented, before applying a set of corrections to the +// distance rating in int_result, (see ComputeCorrectedRating.) +// The results are added to the final_results output. +void Classify::ExpandShapesAndApplyCorrections( + ADAPT_CLASS* classes, bool debug, int class_id, int bottom, int top, + float cp_rating, int blob_length, int matcher_multiplier, + const uint8_t* cn_factors, + UnicharRating* int_result, ADAPT_RESULTS* final_results) { + if (classes != nullptr) { + // Adapted result. Convert configs to fontinfo_ids. + int_result->adapted = true; + for (int f = 0; f < int_result->fonts.size(); ++f) { + int_result->fonts[f].fontinfo_id = + GetFontinfoId(classes[class_id], int_result->fonts[f].fontinfo_id); + } + } else { + // Pre-trained result. Map fonts using font_sets_. + int_result->adapted = false; + for (int f = 0; f < int_result->fonts.size(); ++f) { + int_result->fonts[f].fontinfo_id = + ClassAndConfigIDToFontOrShapeID(class_id, + int_result->fonts[f].fontinfo_id); + } + if (shape_table_ != nullptr) { + // Two possible cases: + // 1. Flat shapetable. All unichar-ids of the shapes referenced by + // int_result->fonts are the same. In this case build a new vector of + // mapped fonts and replace the fonts in int_result. + // 2. Multi-unichar shapetable. Variable unichars in the shapes referenced + // by int_result. In this case, build a vector of UnicharRating to + // gather together different font-ids for each unichar. Also covers case1. + GenericVector<UnicharRating> mapped_results; + for (int f = 0; f < int_result->fonts.size(); ++f) { + int shape_id = int_result->fonts[f].fontinfo_id; + const Shape& shape = shape_table_->GetShape(shape_id); + for (int c = 0; c < shape.size(); ++c) { + int unichar_id = shape[c].unichar_id; + if (!unicharset.get_enabled(unichar_id)) continue; + // Find the mapped_result for unichar_id. + int r = 0; + for (r = 0; r < mapped_results.size() && + mapped_results[r].unichar_id != unichar_id; ++r) {} + if (r == mapped_results.size()) { + mapped_results.push_back(*int_result); + mapped_results[r].unichar_id = unichar_id; + mapped_results[r].fonts.clear(); + } + for (int i = 0; i < shape[c].font_ids.size(); ++i) { + mapped_results[r].fonts.push_back( + ScoredFont(shape[c].font_ids[i], int_result->fonts[f].score)); + } + } + } + for (int m = 0; m < mapped_results.size(); ++m) { + mapped_results[m].rating = + ComputeCorrectedRating(debug, mapped_results[m].unichar_id, + cp_rating, int_result->rating, + int_result->feature_misses, bottom, top, + blob_length, matcher_multiplier, cn_factors); + AddNewResult(mapped_results[m], final_results); + } + return; + } + } + if (unicharset.get_enabled(class_id)) { + int_result->rating = ComputeCorrectedRating(debug, class_id, cp_rating, + int_result->rating, + int_result->feature_misses, + bottom, top, blob_length, + matcher_multiplier, cn_factors); + AddNewResult(*int_result, final_results); + } +} + +// Applies a set of corrections to the confidence im_rating, +// including the cn_correction, miss penalty and additional penalty +// for non-alnums being vertical misfits. Returns the corrected confidence. +double Classify::ComputeCorrectedRating(bool debug, int unichar_id, + double cp_rating, double im_rating, + int feature_misses, + int bottom, int top, + int blob_length, int matcher_multiplier, + const uint8_t* cn_factors) { + // Compute class feature corrections. + double cn_corrected = im_.ApplyCNCorrection(1.0 - im_rating, blob_length, + cn_factors[unichar_id], + matcher_multiplier); + double miss_penalty = tessedit_class_miss_scale * feature_misses; + double vertical_penalty = 0.0; + // Penalize non-alnums for being vertical misfits. + if (!unicharset.get_isalpha(unichar_id) && + !unicharset.get_isdigit(unichar_id) && + cn_factors[unichar_id] != 0 && classify_misfit_junk_penalty > 0.0) { + int min_bottom, max_bottom, min_top, max_top; + unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom, + &min_top, &max_top); + if (debug) { + tprintf("top=%d, vs [%d, %d], bottom=%d, vs [%d, %d]\n", + top, min_top, max_top, bottom, min_bottom, max_bottom); + } + if (top < min_top || top > max_top || + bottom < min_bottom || bottom > max_bottom) { + vertical_penalty = classify_misfit_junk_penalty; + } + } + double result = 1.0 - (cn_corrected + miss_penalty + vertical_penalty); + if (result < WORST_POSSIBLE_RATING) + result = WORST_POSSIBLE_RATING; + if (debug) { + tprintf("%s: %2.1f%%(CP%2.1f, IM%2.1f + CN%.2f(%d) + MP%2.1f + VP%2.1f)\n", + unicharset.id_to_unichar(unichar_id), + result * 100.0, + cp_rating * 100.0, + (1.0 - im_rating) * 100.0, + (cn_corrected - (1.0 - im_rating)) * 100.0, + cn_factors[unichar_id], + miss_penalty * 100.0, + vertical_penalty * 100.0); + } + return result; +} + +/*---------------------------------------------------------------------------*/ +/** + * This routine extracts baseline normalized features + * from the unknown character and matches them against the + * specified set of templates. The classes which match + * are added to Results. + * + * Globals: + * - BaselineCutoffs expected num features for each class + * + * @param Blob blob to be classified + * @param Templates current set of adapted templates + * @param Results place to put match results + * @param int_features + * @param fx_info + * + * @return Array of possible ambiguous chars that should be checked. + */ +UNICHAR_ID *Classify::BaselineClassifier( + TBLOB *Blob, const std::vector<INT_FEATURE_STRUCT>& int_features, + const INT_FX_RESULT_STRUCT& fx_info, + ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) { + if (int_features.empty()) return nullptr; + auto* CharNormArray = new uint8_t[unicharset.size()]; + ClearCharNormArray(CharNormArray); + + Results->BlobLength = IntCastRounded(fx_info.Length / kStandardFeatureLength); + PruneClasses(Templates->Templates, int_features.size(), -1, &int_features[0], + CharNormArray, BaselineCutoffs, &Results->CPResults); + + if (matcher_debug_level >= 2 || classify_debug_level > 1) + tprintf("BL Matches = "); + + MasterMatcher(Templates->Templates, int_features.size(), &int_features[0], + CharNormArray, + Templates->Class, matcher_debug_flags, 0, + Blob->bounding_box(), Results->CPResults, Results); + + delete [] CharNormArray; + CLASS_ID ClassId = Results->best_unichar_id; + if (ClassId == INVALID_UNICHAR_ID || Results->best_match_index < 0) + return nullptr; + + return Templates->Class[ClassId]-> + Config[Results->match[Results->best_match_index].config].Perm->Ambigs; +} /* BaselineClassifier */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine extracts character normalized features + * from the unknown character and matches them against the + * specified set of templates. The classes which match + * are added to Results. + * + * @param blob blob to be classified + * @param sample templates to classify unknown against + * @param adapt_results place to put match results + * + * Globals: + * - CharNormCutoffs expected num features for each class + * - AllProtosOn mask that enables all protos + * - AllConfigsOn mask that enables all configs + */ +int Classify::CharNormClassifier(TBLOB *blob, + const TrainingSample& sample, + ADAPT_RESULTS *adapt_results) { + // This is the length that is used for scaling ratings vs certainty. + adapt_results->BlobLength = + IntCastRounded(sample.outline_length() / kStandardFeatureLength); + std::vector<UnicharRating> unichar_results; + static_classifier_->UnicharClassifySample(sample, blob->denorm().pix(), 0, + -1, &unichar_results); + // Convert results to the format used internally by AdaptiveClassifier. + for (int r = 0; r < unichar_results.size(); ++r) { + AddNewResult(unichar_results[r], adapt_results); + } + return sample.num_features(); +} /* CharNormClassifier */ + +// As CharNormClassifier, but operates on a TrainingSample and outputs to +// a GenericVector of ShapeRating without conversion to classes. +int Classify::CharNormTrainingSample(bool pruner_only, + int keep_this, + const TrainingSample& sample, + std::vector<UnicharRating>* results) { + results->clear(); + auto* adapt_results = new ADAPT_RESULTS(); + adapt_results->Initialize(); + // Compute the bounding box of the features. + uint32_t num_features = sample.num_features(); + // Only the top and bottom of the blob_box are used by MasterMatcher, so + // fabricate right and left using top and bottom. + TBOX blob_box(sample.geo_feature(GeoBottom), sample.geo_feature(GeoBottom), + sample.geo_feature(GeoTop), sample.geo_feature(GeoTop)); + // Compute the char_norm_array from the saved cn_feature. + FEATURE norm_feature = sample.GetCNFeature(); + auto* char_norm_array = new uint8_t[unicharset.size()]; + int num_pruner_classes = std::max(unicharset.size(), + PreTrainedTemplates->NumClasses); + auto* pruner_norm_array = new uint8_t[num_pruner_classes]; + adapt_results->BlobLength = + static_cast<int>(ActualOutlineLength(norm_feature) * 20 + 0.5); + ComputeCharNormArrays(norm_feature, PreTrainedTemplates, char_norm_array, + pruner_norm_array); + + PruneClasses(PreTrainedTemplates, num_features, keep_this, sample.features(), + pruner_norm_array, + shape_table_ != nullptr ? &shapetable_cutoffs_[0] : CharNormCutoffs, + &adapt_results->CPResults); + delete [] pruner_norm_array; + if (keep_this >= 0) { + adapt_results->CPResults[0].Class = keep_this; + adapt_results->CPResults.resize(1); + } + if (pruner_only) { + // Convert pruner results to output format. + for (int i = 0; i < adapt_results->CPResults.size(); ++i) { + int class_id = adapt_results->CPResults[i].Class; + results->push_back( + UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating)); + } + } else { + MasterMatcher(PreTrainedTemplates, num_features, sample.features(), + char_norm_array, + nullptr, matcher_debug_flags, + classify_integer_matcher_multiplier, + blob_box, adapt_results->CPResults, adapt_results); + // Convert master matcher results to output format. + for (int i = 0; i < adapt_results->match.size(); i++) { + results->push_back(adapt_results->match[i]); + } + if (results->size() > 1) { + std::sort(results->begin(), results->end(), SortDescendingRating); + } + } + delete [] char_norm_array; + delete adapt_results; + return num_features; +} /* CharNormTrainingSample */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine computes a rating which reflects the + * likelihood that the blob being classified is a noise + * blob. NOTE: assumes that the blob length has already been + * computed and placed into Results. + * + * @param results results to add noise classification to + * + * Globals: + * - matcher_avg_noise_size avg. length of a noise blob + */ +void Classify::ClassifyAsNoise(ADAPT_RESULTS *results) { + float rating = results->BlobLength / matcher_avg_noise_size; + rating *= rating; + rating /= 1.0 + rating; + + AddNewResult(UnicharRating(UNICHAR_SPACE, 1.0f - rating), results); +} /* ClassifyAsNoise */ + +/// The function converts the given match ratings to the list of blob +/// choices with ratings and certainties (used by the context checkers). +/// If character fragments are present in the results, this function also makes +/// sure that there is at least one non-fragmented classification included. +/// For each classification result check the unicharset for "definite" +/// ambiguities and modify the resulting Choices accordingly. +void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box, + ADAPT_RESULTS *Results, + BLOB_CHOICE_LIST *Choices) { + assert(Choices != nullptr); + float Rating; + float Certainty; + BLOB_CHOICE_IT temp_it; + bool contains_nonfrag = false; + temp_it.set_to_list(Choices); + int choices_length = 0; + // With no shape_table_ maintain the previous MAX_MATCHES as the maximum + // number of returned results, but with a shape_table_ we want to have room + // for at least the biggest shape (which might contain hundreds of Indic + // grapheme fragments) and more, so use double the size of the biggest shape + // if that is more than the default. + int max_matches = MAX_MATCHES; + if (shape_table_ != nullptr) { + max_matches = shape_table_->MaxNumUnichars() * 2; + if (max_matches < MAX_MATCHES) + max_matches = MAX_MATCHES; + } + + float best_certainty = -FLT_MAX; + for (int i = 0; i < Results->match.size(); i++) { + const UnicharRating& result = Results->match[i]; + bool adapted = result.adapted; + bool current_is_frag = (unicharset.get_fragment(result.unichar_id) != nullptr); + if (temp_it.length()+1 == max_matches && + !contains_nonfrag && current_is_frag) { + continue; // look for a non-fragmented character to fill the + // last spot in Choices if only fragments are present + } + // BlobLength can never be legally 0, this means recognition failed. + // But we must return a classification result because some invoking + // functions (chopper/permuter) do not anticipate a null blob choice. + // So we need to assign a poor, but not infinitely bad score. + if (Results->BlobLength == 0) { + Certainty = -20; + Rating = 100; // should be -certainty * real_blob_length + } else { + Rating = Certainty = (1.0f - result.rating); + Rating *= rating_scale * Results->BlobLength; + Certainty *= -(getDict().certainty_scale); + } + // Adapted results, by their very nature, should have good certainty. + // Those that don't are at best misleading, and often lead to errors, + // so don't accept adapted results that are too far behind the best result, + // whether adapted or static. + // TODO(rays) find some way of automatically tuning these constants. + if (Certainty > best_certainty) { + best_certainty = std::min(Certainty, static_cast<float>(classify_adapted_pruning_threshold)); + } else if (adapted && + Certainty / classify_adapted_pruning_factor < best_certainty) { + continue; // Don't accept bad adapted results. + } + + float min_xheight, max_xheight, yshift; + denorm.XHeightRange(result.unichar_id, unicharset, box, + &min_xheight, &max_xheight, &yshift); + auto* choice = + new BLOB_CHOICE(result.unichar_id, Rating, Certainty, + unicharset.get_script(result.unichar_id), + min_xheight, max_xheight, yshift, + adapted ? BCC_ADAPTED_CLASSIFIER + : BCC_STATIC_CLASSIFIER); + choice->set_fonts(result.fonts); + temp_it.add_to_end(choice); + contains_nonfrag |= !current_is_frag; // update contains_nonfrag + choices_length++; + if (choices_length >= max_matches) break; + } + Results->match.resize(choices_length); +} // ConvertMatchesToChoices + + +/*---------------------------------------------------------------------------*/ +#ifndef GRAPHICS_DISABLED +/** + * + * @param blob blob whose classification is being debugged + * @param Results results of match being debugged + * + * Globals: none + */ +void Classify::DebugAdaptiveClassifier(TBLOB *blob, + ADAPT_RESULTS *Results) { + if (static_classifier_ == nullptr) return; + INT_FX_RESULT_STRUCT fx_info; + std::vector<INT_FEATURE_STRUCT> bl_features; + TrainingSample* sample = + BlobToTrainingSample(*blob, false, &fx_info, &bl_features); + if (sample == nullptr) return; + static_classifier_->DebugDisplay(*sample, blob->denorm().pix(), + Results->best_unichar_id); +} /* DebugAdaptiveClassifier */ +#endif + +/*---------------------------------------------------------------------------*/ +/** + * This routine performs an adaptive classification. + * If we have not yet adapted to enough classes, a simple + * classification to the pre-trained templates is performed. + * Otherwise, we match the blob against the adapted templates. + * If the adapted templates do not match well, we try a + * match against the pre-trained templates. If an adapted + * template match is found, we do a match to any pre-trained + * templates which could be ambiguous. The results from all + * of these classifications are merged together into Results. + * + * @param Blob blob to be classified + * @param Results place to put match results + * + * Globals: + * - PreTrainedTemplates built-in training templates + * - AdaptedTemplates templates adapted for this page + * - matcher_reliable_adaptive_result rating limit for a great match + */ +void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) { + UNICHAR_ID *Ambiguities; + + INT_FX_RESULT_STRUCT fx_info; + std::vector<INT_FEATURE_STRUCT> bl_features; + TrainingSample* sample = + BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info, + &bl_features); + if (sample == nullptr) return; + + // TODO: With LSTM, static_classifier_ is nullptr. + // Return to avoid crash in CharNormClassifier. + if (static_classifier_ == nullptr) { + delete sample; + return; + } + + if (AdaptedTemplates->NumPermClasses < matcher_permanent_classes_min || + tess_cn_matching) { + CharNormClassifier(Blob, *sample, Results); + } else { + Ambiguities = BaselineClassifier(Blob, bl_features, fx_info, + AdaptedTemplates, Results); + if ((!Results->match.empty() && + MarginalMatch(Results->best_rating, + matcher_reliable_adaptive_result) && + !tess_bn_matching) || + Results->match.empty()) { + CharNormClassifier(Blob, *sample, Results); + } else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) { + AmbigClassifier(bl_features, fx_info, Blob, + PreTrainedTemplates, + AdaptedTemplates->Class, + Ambiguities, + Results); + } + } + + // Force the blob to be classified as noise + // if the results contain only fragments. + // TODO(daria): verify that this is better than + // just adding a nullptr classification. + if (!Results->HasNonfragment || Results->match.empty()) + ClassifyAsNoise(Results); + delete sample; +} /* DoAdaptiveMatch */ + +/*---------------------------------------------------------------------------*/ +/** + * This routine matches blob to the built-in templates + * to find out if there are any classes other than the correct + * class which are potential ambiguities. + * + * @param Blob blob to get classification ambiguities for + * @param CorrectClass correct class for Blob + * + * Globals: + * - CurrentRatings used by qsort compare routine + * - PreTrainedTemplates built-in templates + * + * @return String containing all possible ambiguous classes. + */ +UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob, + CLASS_ID CorrectClass) { + auto *Results = new ADAPT_RESULTS(); + UNICHAR_ID *Ambiguities; + int i; + + Results->Initialize(); + INT_FX_RESULT_STRUCT fx_info; + std::vector<INT_FEATURE_STRUCT> bl_features; + TrainingSample* sample = + BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info, + &bl_features); + if (sample == nullptr) { + delete Results; + return nullptr; + } + + CharNormClassifier(Blob, *sample, Results); + delete sample; + RemoveBadMatches(Results); + std::sort(Results->match.begin(), Results->match.end(), SortDescendingRating); + + /* copy the class id's into an string of ambiguities - don't copy if + the correct class is the only class id matched */ + Ambiguities = new UNICHAR_ID[Results->match.size() + 1]; + if (Results->match.size() > 1 || + (Results->match.size() == 1 && + Results->match[0].unichar_id != CorrectClass)) { + for (i = 0; i < Results->match.size(); i++) + Ambiguities[i] = Results->match[i].unichar_id; + Ambiguities[i] = -1; + } else { + Ambiguities[0] = -1; + } + + delete Results; + return Ambiguities; +} /* GetAmbiguities */ + +// Returns true if the given blob looks too dissimilar to any character +// present in the classifier templates. +bool Classify::LooksLikeGarbage(TBLOB *blob) { + auto *ratings = new BLOB_CHOICE_LIST(); + AdaptiveClassifier(blob, ratings); + BLOB_CHOICE_IT ratings_it(ratings); + const UNICHARSET &unicharset = getDict().getUnicharset(); + if (classify_debug_character_fragments) { + print_ratings_list("======================\nLooksLikeGarbage() got ", + ratings, unicharset); + } + for (ratings_it.mark_cycle_pt(); !ratings_it.cycled_list(); + ratings_it.forward()) { + if (unicharset.get_fragment(ratings_it.data()->unichar_id()) != nullptr) { + continue; + } + float certainty = ratings_it.data()->certainty(); + delete ratings; + return certainty < + classify_character_fragments_garbage_certainty_threshold; + } + delete ratings; + return true; // no whole characters in ratings +} + +/*---------------------------------------------------------------------------*/ +/** + * This routine calls the integer (Hardware) feature + * extractor if it has not been called before for this blob. + * + * The results from the feature extractor are placed into + * globals so that they can be used in other routines without + * re-extracting the features. + * + * It then copies the char norm features into the IntFeatures + * array provided by the caller. + * + * @param templates used to compute char norm adjustments + * @param pruner_norm_array Array of factors from blob normalization + * process + * @param char_norm_array array to fill with dummy char norm adjustments + * @param fx_info + * + * Globals: + * + * @return Number of features extracted or 0 if an error occurred. + */ +int Classify::GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info, + INT_TEMPLATES templates, + uint8_t* pruner_norm_array, + uint8_t* char_norm_array) { + FEATURE norm_feature = NewFeature(&CharNormDesc); + float baseline = kBlnBaselineOffset; + float scale = MF_SCALE_FACTOR; + norm_feature->Params[CharNormY] = (fx_info.Ymean - baseline) * scale; + norm_feature->Params[CharNormLength] = + fx_info.Length * scale / LENGTH_COMPRESSION; + norm_feature->Params[CharNormRx] = fx_info.Rx * scale; + norm_feature->Params[CharNormRy] = fx_info.Ry * scale; + // Deletes norm_feature. + ComputeCharNormArrays(norm_feature, templates, char_norm_array, + pruner_norm_array); + return IntCastRounded(fx_info.Length / kStandardFeatureLength); +} /* GetCharNormFeature */ + +// Computes the char_norm_array for the unicharset and, if not nullptr, the +// pruner_array as appropriate according to the existence of the shape_table. +void Classify::ComputeCharNormArrays(FEATURE_STRUCT* norm_feature, + INT_TEMPLATES_STRUCT* templates, + uint8_t* char_norm_array, + uint8_t* pruner_array) { + ComputeIntCharNormArray(*norm_feature, char_norm_array); + if (pruner_array != nullptr) { + if (shape_table_ == nullptr) { + ComputeIntCharNormArray(*norm_feature, pruner_array); + } else { + memset(pruner_array, UINT8_MAX, + templates->NumClasses * sizeof(pruner_array[0])); + // Each entry in the pruner norm array is the MIN of all the entries of + // the corresponding unichars in the CharNormArray. + for (int id = 0; id < templates->NumClasses; ++id) { + int font_set_id = templates->Class[id]->font_set_id; + const FontSet &fs = fontset_table_.get(font_set_id); + for (int config = 0; config < fs.size; ++config) { + const Shape& shape = shape_table_->GetShape(fs.configs[config]); + for (int c = 0; c < shape.size(); ++c) { + if (char_norm_array[shape[c].unichar_id] < pruner_array[id]) + pruner_array[id] = char_norm_array[shape[c].unichar_id]; + } + } + } + } + } + FreeFeature(norm_feature); +} + +/*---------------------------------------------------------------------------*/ +/** + * + * @param Templates adapted templates to add new config to + * @param ClassId class id to associate with new config + * @param FontinfoId font information inferred from pre-trained templates + * @param NumFeatures number of features in IntFeatures + * @param Features features describing model for new config + * @param FloatFeatures floating-pt representation of features + * + * @return The id of the new config created, a negative integer in + * case of error. + */ +int Classify::MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, + CLASS_ID ClassId, + int FontinfoId, + int NumFeatures, + INT_FEATURE_ARRAY Features, + FEATURE_SET FloatFeatures) { + INT_CLASS IClass; + ADAPT_CLASS Class; + PROTO_ID OldProtos[MAX_NUM_PROTOS]; + FEATURE_ID BadFeatures[MAX_NUM_INT_FEATURES]; + int NumOldProtos; + int NumBadFeatures; + int MaxProtoId, OldMaxProtoId; + int MaskSize; + int ConfigId; + TEMP_CONFIG Config; + int i; + int debug_level = NO_DEBUG; + + if (classify_learning_debug_level >= 3) + debug_level = + PRINT_MATCH_SUMMARY | PRINT_FEATURE_MATCHES | PRINT_PROTO_MATCHES; + + IClass = ClassForClassId(Templates->Templates, ClassId); + Class = Templates->Class[ClassId]; + + if (IClass->NumConfigs >= MAX_NUM_CONFIGS) { + ++NumAdaptationsFailed; + if (classify_learning_debug_level >= 1) + tprintf("Cannot make new temporary config: maximum number exceeded.\n"); + return -1; + } + + OldMaxProtoId = IClass->NumProtos - 1; + + NumOldProtos = im_.FindGoodProtos(IClass, AllProtosOn, AllConfigsOff, + NumFeatures, Features, + OldProtos, classify_adapt_proto_threshold, + debug_level); + + MaskSize = WordsInVectorOfSize(MAX_NUM_PROTOS); + zero_all_bits(TempProtoMask, MaskSize); + for (i = 0; i < NumOldProtos; i++) + SET_BIT(TempProtoMask, OldProtos[i]); + + NumBadFeatures = im_.FindBadFeatures(IClass, TempProtoMask, AllConfigsOn, + NumFeatures, Features, + BadFeatures, + classify_adapt_feature_threshold, + debug_level); + + MaxProtoId = MakeNewTempProtos(FloatFeatures, NumBadFeatures, BadFeatures, + IClass, Class, TempProtoMask); + if (MaxProtoId == NO_PROTO) { + ++NumAdaptationsFailed; + if (classify_learning_debug_level >= 1) + tprintf("Cannot make new temp protos: maximum number exceeded.\n"); + return -1; + } + + ConfigId = AddIntConfig(IClass); + ConvertConfig(TempProtoMask, ConfigId, IClass); + Config = NewTempConfig(MaxProtoId, FontinfoId); + TempConfigFor(Class, ConfigId) = Config; + copy_all_bits(TempProtoMask, Config->Protos, Config->ProtoVectorSize); + + if (classify_learning_debug_level >= 1) + tprintf("Making new temp config %d fontinfo id %d" + " using %d old and %d new protos.\n", + ConfigId, Config->FontinfoId, + NumOldProtos, MaxProtoId - OldMaxProtoId); + + return ConfigId; +} /* MakeNewTemporaryConfig */ + +/*---------------------------------------------------------------------------*/ +/** + * This routine finds sets of sequential bad features + * that all have the same angle and converts each set into + * a new temporary proto. The temp proto is added to the + * proto pruner for IClass, pushed onto the list of temp + * protos in Class, and added to TempProtoMask. + * + * @param Features floating-pt features describing new character + * @param NumBadFeat number of bad features to turn into protos + * @param BadFeat feature id's of bad features + * @param IClass integer class templates to add new protos to + * @param Class adapted class templates to add new protos to + * @param TempProtoMask proto mask to add new protos to + * + * Globals: none + * + * @return Max proto id in class after all protos have been added. + */ +PROTO_ID Classify::MakeNewTempProtos(FEATURE_SET Features, + int NumBadFeat, + FEATURE_ID BadFeat[], + INT_CLASS IClass, + ADAPT_CLASS Class, + BIT_VECTOR TempProtoMask) { + FEATURE_ID *ProtoStart; + FEATURE_ID *ProtoEnd; + FEATURE_ID *LastBad; + TEMP_PROTO TempProto; + PROTO Proto; + FEATURE F1, F2; + float X1, X2, Y1, Y2; + float A1, A2, AngleDelta; + float SegmentLength; + PROTO_ID Pid; + + for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat; + ProtoStart < LastBad; ProtoStart = ProtoEnd) { + F1 = Features->Features[*ProtoStart]; + X1 = F1->Params[PicoFeatX]; + Y1 = F1->Params[PicoFeatY]; + A1 = F1->Params[PicoFeatDir]; + + for (ProtoEnd = ProtoStart + 1, + SegmentLength = GetPicoFeatureLength(); + ProtoEnd < LastBad; + ProtoEnd++, SegmentLength += GetPicoFeatureLength()) { + F2 = Features->Features[*ProtoEnd]; + X2 = F2->Params[PicoFeatX]; + Y2 = F2->Params[PicoFeatY]; + A2 = F2->Params[PicoFeatDir]; + + AngleDelta = fabs(A1 - A2); + if (AngleDelta > 0.5) + AngleDelta = 1.0 - AngleDelta; + + if (AngleDelta > matcher_clustering_max_angle_delta || + fabs(X1 - X2) > SegmentLength || + fabs(Y1 - Y2) > SegmentLength) + break; + } + + F2 = Features->Features[*(ProtoEnd - 1)]; + X2 = F2->Params[PicoFeatX]; + Y2 = F2->Params[PicoFeatY]; + A2 = F2->Params[PicoFeatDir]; + + Pid = AddIntProto(IClass); + if (Pid == NO_PROTO) + return (NO_PROTO); + + TempProto = NewTempProto(); + Proto = &(TempProto->Proto); + + /* compute proto params - NOTE that Y_DIM_OFFSET must be used because + ConvertProto assumes that the Y dimension varies from -0.5 to 0.5 + instead of the -0.25 to 0.75 used in baseline normalization */ + Proto->Length = SegmentLength; + Proto->Angle = A1; + Proto->X = (X1 + X2) / 2.0; + Proto->Y = (Y1 + Y2) / 2.0 - Y_DIM_OFFSET; + FillABC(Proto); + + TempProto->ProtoId = Pid; + SET_BIT(TempProtoMask, Pid); + + ConvertProto(Proto, Pid, IClass); + AddProtoToProtoPruner(Proto, Pid, IClass, + classify_learning_debug_level >= 2); + + Class->TempProtos = push(Class->TempProtos, TempProto); + } + return IClass->NumProtos - 1; +} /* MakeNewTempProtos */ + +/*---------------------------------------------------------------------------*/ +/** + * + * @param Templates current set of adaptive templates + * @param ClassId class containing config to be made permanent + * @param ConfigId config to be made permanent + * @param Blob current blob being adapted to + * + * Globals: none + */ +void Classify::MakePermanent(ADAPT_TEMPLATES Templates, + CLASS_ID ClassId, + int ConfigId, + TBLOB *Blob) { + UNICHAR_ID *Ambigs; + TEMP_CONFIG Config; + ADAPT_CLASS Class; + PROTO_KEY ProtoKey; + + Class = Templates->Class[ClassId]; + Config = TempConfigFor(Class, ConfigId); + + MakeConfigPermanent(Class, ConfigId); + if (Class->NumPermConfigs == 0) + Templates->NumPermClasses++; + Class->NumPermConfigs++; + + // Initialize permanent config. + Ambigs = GetAmbiguities(Blob, ClassId); + auto Perm = static_cast<PERM_CONFIG>(malloc(sizeof(PERM_CONFIG_STRUCT))); + Perm->Ambigs = Ambigs; + Perm->FontinfoId = Config->FontinfoId; + + // Free memory associated with temporary config (since ADAPTED_CONFIG + // is a union we need to clean up before we record permanent config). + ProtoKey.Templates = Templates; + ProtoKey.ClassId = ClassId; + ProtoKey.ConfigId = ConfigId; + Class->TempProtos = delete_d(Class->TempProtos, &ProtoKey, MakeTempProtoPerm); + FreeTempConfig(Config); + + // Record permanent config. + PermConfigFor(Class, ConfigId) = Perm; + + if (classify_learning_debug_level >= 1) { + tprintf("Making config %d for %s (ClassId %d) permanent:" + " fontinfo id %d, ambiguities '", + ConfigId, getDict().getUnicharset().debug_str(ClassId).c_str(), + ClassId, PermConfigFor(Class, ConfigId)->FontinfoId); + for (UNICHAR_ID *AmbigsPointer = Ambigs; + *AmbigsPointer >= 0; ++AmbigsPointer) + tprintf("%s", unicharset.id_to_unichar(*AmbigsPointer)); + tprintf("'.\n"); + } +} /* MakePermanent */ + +/*---------------------------------------------------------------------------*/ +/** + * This routine converts TempProto to be permanent if + * its proto id is used by the configuration specified in + * ProtoKey. + * + * @param item1 (TEMP_PROTO) temporary proto to compare to key + * @param item2 (PROTO_KEY) defines which protos to make permanent + * + * Globals: none + * + * @return true if TempProto is converted, false otherwise + */ +int MakeTempProtoPerm(void *item1, void *item2) { + ADAPT_CLASS Class; + TEMP_CONFIG Config; + TEMP_PROTO TempProto; + PROTO_KEY *ProtoKey; + + TempProto = static_cast<TEMP_PROTO>(item1); + ProtoKey = static_cast<PROTO_KEY *>(item2); + + Class = ProtoKey->Templates->Class[ProtoKey->ClassId]; + Config = TempConfigFor(Class, ProtoKey->ConfigId); + + if (TempProto->ProtoId > Config->MaxProtoId || + !test_bit (Config->Protos, TempProto->ProtoId)) + return false; + + MakeProtoPermanent(Class, TempProto->ProtoId); + AddProtoToClassPruner(&(TempProto->Proto), ProtoKey->ClassId, + ProtoKey->Templates->Templates); + FreeTempProto(TempProto); + + return true; +} /* MakeTempProtoPerm */ + +/*---------------------------------------------------------------------------*/ +/** + * This routine writes the matches in Results to File. + * + * @param results match results to write to File + * + * Globals: none + */ +void Classify::PrintAdaptiveMatchResults(const ADAPT_RESULTS& results) { + for (int i = 0; i < results.match.size(); ++i) { + tprintf("%s ", unicharset.debug_str(results.match[i].unichar_id).c_str()); + results.match[i].Print(); + } +} /* PrintAdaptiveMatchResults */ + +/*---------------------------------------------------------------------------*/ +/** + * This routine steps through each matching class in Results + * and removes it from the match list if its rating + * is worse than the BestRating plus a pad. In other words, + * all good matches get moved to the front of the classes + * array. + * + * @param Results contains matches to be filtered + * + * Globals: + * - matcher_bad_match_pad defines a "bad match" + */ +void Classify::RemoveBadMatches(ADAPT_RESULTS *Results) { + int Next, NextGood; + float BadMatchThreshold; + static const char* romans = "i v x I V X"; + BadMatchThreshold = Results->best_rating - matcher_bad_match_pad; + + if (classify_bln_numeric_mode) { + UNICHAR_ID unichar_id_one = unicharset.contains_unichar("1") ? + unicharset.unichar_to_id("1") : -1; + UNICHAR_ID unichar_id_zero = unicharset.contains_unichar("0") ? + unicharset.unichar_to_id("0") : -1; + float scored_one = ScoredUnichar(unichar_id_one, *Results); + float scored_zero = ScoredUnichar(unichar_id_zero, *Results); + + for (Next = NextGood = 0; Next < Results->match.size(); Next++) { + const UnicharRating& match = Results->match[Next]; + if (match.rating >= BadMatchThreshold) { + if (!unicharset.get_isalpha(match.unichar_id) || + strstr(romans, + unicharset.id_to_unichar(match.unichar_id)) != nullptr) { + } else if (unicharset.eq(match.unichar_id, "l") && + scored_one < BadMatchThreshold) { + Results->match[Next].unichar_id = unichar_id_one; + } else if (unicharset.eq(match.unichar_id, "O") && + scored_zero < BadMatchThreshold) { + Results->match[Next].unichar_id = unichar_id_zero; + } else { + Results->match[Next].unichar_id = INVALID_UNICHAR_ID; // Don't copy. + } + if (Results->match[Next].unichar_id != INVALID_UNICHAR_ID) { + if (NextGood == Next) { + ++NextGood; + } else { + Results->match[NextGood++] = Results->match[Next]; + } + } + } + } + } else { + for (Next = NextGood = 0; Next < Results->match.size(); Next++) { + if (Results->match[Next].rating >= BadMatchThreshold) { + if (NextGood == Next) { + ++NextGood; + } else { + Results->match[NextGood++] = Results->match[Next]; + } + } + } + } + Results->match.resize(NextGood); +} /* RemoveBadMatches */ + +/*----------------------------------------------------------------------------*/ +/** + * This routine discards extra digits or punctuation from the results. + * We keep only the top 2 punctuation answers and the top 1 digit answer if + * present. + * + * @param Results contains matches to be filtered + */ +void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) { + int Next, NextGood; + int punc_count; /*no of garbage characters */ + int digit_count; + /*garbage characters */ + static char punc_chars[] = ". , ; : / ` ~ ' - = \\ | \" ! _ ^"; + static char digit_chars[] = "0 1 2 3 4 5 6 7 8 9"; + + punc_count = 0; + digit_count = 0; + for (Next = NextGood = 0; Next < Results->match.size(); Next++) { + const UnicharRating& match = Results->match[Next]; + bool keep = true; + if (strstr(punc_chars, + unicharset.id_to_unichar(match.unichar_id)) != nullptr) { + if (punc_count >= 2) + keep = false; + punc_count++; + } else { + if (strstr(digit_chars, + unicharset.id_to_unichar(match.unichar_id)) != nullptr) { + if (digit_count >= 1) + keep = false; + digit_count++; + } + } + if (keep) { + if (NextGood == Next) { + ++NextGood; + } else { + Results->match[NextGood++] = match; + } + } + } + Results->match.resize(NextGood); +} /* RemoveExtraPuncs */ + +/*---------------------------------------------------------------------------*/ +/** + * This routine resets the internal thresholds inside + * the integer matcher to correspond to the specified + * threshold. + * + * @param Threshold threshold for creating new templates + * + * Globals: + * - matcher_good_threshold default good match rating + */ +void Classify::SetAdaptiveThreshold(float Threshold) { + Threshold = (Threshold == matcher_good_threshold) ? 0.9: (1.0 - Threshold); + classify_adapt_proto_threshold.set_value( + ClipToRange<int>(255 * Threshold, 0, 255)); + classify_adapt_feature_threshold.set_value( + ClipToRange<int>(255 * Threshold, 0, 255)); +} /* SetAdaptiveThreshold */ + +#ifndef GRAPHICS_DISABLED + +/*---------------------------------------------------------------------------*/ +/** + * This routine displays debug information for the best config + * of the given shape_id for the given set of features. + * + * @param shape_id classifier id to work with + * @param features features of the unknown character + * @param num_features Number of features in the features array. + */ + +void Classify::ShowBestMatchFor(int shape_id, + const INT_FEATURE_STRUCT* features, + int num_features) { + uint32_t config_mask; + if (UnusedClassIdIn(PreTrainedTemplates, shape_id)) { + tprintf("No built-in templates for class/shape %d\n", shape_id); + return; + } + if (num_features <= 0) { + tprintf("Illegal blob (char norm features)!\n"); + return; + } + UnicharRating cn_result; + classify_norm_method.set_value(character); + im_.Match(ClassForClassId(PreTrainedTemplates, shape_id), + AllProtosOn, AllConfigsOn, + num_features, features, &cn_result, + classify_adapt_feature_threshold, NO_DEBUG, + matcher_debug_separate_windows); + tprintf("\n"); + config_mask = 1 << cn_result.config; + + tprintf("Static Shape ID: %d\n", shape_id); + ShowMatchDisplay(); + im_.Match(ClassForClassId(PreTrainedTemplates, shape_id), AllProtosOn, + &config_mask, num_features, features, &cn_result, + classify_adapt_feature_threshold, matcher_debug_flags, + matcher_debug_separate_windows); + UpdateMatchDisplay(); +} /* ShowBestMatchFor */ + +#endif // !GRAPHICS_DISABLED + +// Returns a string for the classifier class_id: either the corresponding +// unicharset debug_str or the shape_table_ debug str. +STRING Classify::ClassIDToDebugStr(const INT_TEMPLATES_STRUCT* templates, + int class_id, int config_id) const { + STRING class_string; + if (templates == PreTrainedTemplates && shape_table_ != nullptr) { + int shape_id = ClassAndConfigIDToFontOrShapeID(class_id, config_id); + class_string = shape_table_->DebugStr(shape_id); + } else { + class_string = unicharset.debug_str(class_id); + } + return class_string; +} + +// Converts a classifier class_id index to a shape_table_ index +int Classify::ClassAndConfigIDToFontOrShapeID(int class_id, + int int_result_config) const { + int font_set_id = PreTrainedTemplates->Class[class_id]->font_set_id; + // Older inttemps have no font_ids. + if (font_set_id < 0) + return kBlankFontinfoId; + const FontSet &fs = fontset_table_.get(font_set_id); + ASSERT_HOST(int_result_config >= 0 && int_result_config < fs.size); + return fs.configs[int_result_config]; +} + +// Converts a shape_table_ index to a classifier class_id index (not a +// unichar-id!). Uses a search, so not fast. +int Classify::ShapeIDToClassID(int shape_id) const { + for (int id = 0; id < PreTrainedTemplates->NumClasses; ++id) { + int font_set_id = PreTrainedTemplates->Class[id]->font_set_id; + ASSERT_HOST(font_set_id >= 0); + const FontSet &fs = fontset_table_.get(font_set_id); + for (int config = 0; config < fs.size; ++config) { + if (fs.configs[config] == shape_id) + return id; + } + } + tprintf("Shape %d not found\n", shape_id); + return -1; +} + +// Returns true if the given TEMP_CONFIG is good enough to make it +// a permanent config. +bool Classify::TempConfigReliable(CLASS_ID class_id, + const TEMP_CONFIG &config) { + if (classify_learning_debug_level >= 1) { + tprintf("NumTimesSeen for config of %s is %d\n", + getDict().getUnicharset().debug_str(class_id).c_str(), + config->NumTimesSeen); + } + if (config->NumTimesSeen >= matcher_sufficient_examples_for_prototyping) { + return true; + } else if (config->NumTimesSeen < matcher_min_examples_for_prototyping) { + return false; + } else if (use_ambigs_for_adaption) { + // Go through the ambigs vector and see whether we have already seen + // enough times all the characters represented by the ambigs vector. + const UnicharIdVector *ambigs = + getDict().getUnicharAmbigs().AmbigsForAdaption(class_id); + int ambigs_size = (ambigs == nullptr) ? 0 : ambigs->size(); + for (int ambig = 0; ambig < ambigs_size; ++ambig) { + ADAPT_CLASS ambig_class = AdaptedTemplates->Class[(*ambigs)[ambig]]; + assert(ambig_class != nullptr); + if (ambig_class->NumPermConfigs == 0 && + ambig_class->MaxNumTimesSeen < + matcher_min_examples_for_prototyping) { + if (classify_learning_debug_level >= 1) { + tprintf("Ambig %s has not been seen enough times," + " not making config for %s permanent\n", + getDict().getUnicharset().debug_str( + (*ambigs)[ambig]).c_str(), + getDict().getUnicharset().debug_str(class_id).c_str()); + } + return false; + } + } + } + return true; +} + +void Classify::UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob) { + const UnicharIdVector *ambigs = + getDict().getUnicharAmbigs().ReverseAmbigsForAdaption(class_id); + int ambigs_size = (ambigs == nullptr) ? 0 : ambigs->size(); + if (classify_learning_debug_level >= 1) { + tprintf("Running UpdateAmbigsGroup for %s class_id=%d\n", + getDict().getUnicharset().debug_str(class_id).c_str(), class_id); + } + for (int ambig = 0; ambig < ambigs_size; ++ambig) { + CLASS_ID ambig_class_id = (*ambigs)[ambig]; + const ADAPT_CLASS ambigs_class = AdaptedTemplates->Class[ambig_class_id]; + for (int cfg = 0; cfg < MAX_NUM_CONFIGS; ++cfg) { + if (ConfigIsPermanent(ambigs_class, cfg)) continue; + const TEMP_CONFIG config = + TempConfigFor(AdaptedTemplates->Class[ambig_class_id], cfg); + if (config != nullptr && TempConfigReliable(ambig_class_id, config)) { + if (classify_learning_debug_level >= 1) { + tprintf("Making config %d of %s permanent\n", cfg, + getDict().getUnicharset().debug_str( + ambig_class_id).c_str()); + } + MakePermanent(AdaptedTemplates, ambig_class_id, cfg, Blob); + } + } + } +} + +} // namespace tesseract diff --git a/tesseract/src/classify/blobclass.cpp b/tesseract/src/classify/blobclass.cpp new file mode 100644 index 00000000..497ad045 --- /dev/null +++ b/tesseract/src/classify/blobclass.cpp @@ -0,0 +1,110 @@ +/****************************************************************************** + ** Filename: blobclass.c + ** Purpose: High level blob classification and training routines. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#include "blobclass.h" + +#include <cstdio> + +#include "classify.h" +#include "featdefs.h" +#include "mf.h" +#include "normfeat.h" + +namespace tesseract { + +static const char kUnknownFontName[] = "UnknownFont"; + +static STRING_VAR(classify_font_name, kUnknownFontName, + "Default font name to be used in training"); + +/**---------------------------------------------------------------------------- + Public Code +----------------------------------------------------------------------------**/ + +// Finds the name of the training font and returns it in fontname, by cutting +// it out based on the expectation that the filename is of the form: +// /path/to/dir/[lang].[fontname].exp[num] +// The [lang], [fontname] and [num] fields should not have '.' characters. +// If the global parameter classify_font_name is set, its value is used instead. +void ExtractFontName(const char* filename, STRING* fontname) { + *fontname = classify_font_name; + if (*fontname == kUnknownFontName) { + // filename is expected to be of the form [lang].[fontname].exp[num] + // The [lang], [fontname] and [num] fields should not have '.' characters. + const char *basename = strrchr(filename, '/'); + const char *firstdot = strchr(basename ? basename : filename, '.'); + const char *lastdot = strrchr(filename, '.'); + if (firstdot != lastdot && firstdot != nullptr && lastdot != nullptr) { + ++firstdot; + *fontname = firstdot; + fontname->truncate_at(lastdot - firstdot); + } + } +} + + +/*---------------------------------------------------------------------------*/ + +// Extracts features from the given blob and saves them in the tr_file_data_ +// member variable. +// fontname: Name of font that this blob was printed in. +// cn_denorm: Character normalization transformation to apply to the blob. +// fx_info: Character normalization parameters computed with cn_denorm. +// blob_text: Ground truth text for the blob. +void Classify::LearnBlob(const STRING& fontname, TBLOB* blob, + const DENORM& cn_denorm, + const INT_FX_RESULT_STRUCT& fx_info, + const char* blob_text) { + CHAR_DESC CharDesc = NewCharDescription(feature_defs_); + CharDesc->FeatureSets[0] = ExtractMicros(blob, cn_denorm); + CharDesc->FeatureSets[1] = ExtractCharNormFeatures(fx_info); + CharDesc->FeatureSets[2] = ExtractIntCNFeatures(*blob, fx_info); + CharDesc->FeatureSets[3] = ExtractIntGeoFeatures(*blob, fx_info); + + if (ValidCharDescription(feature_defs_, CharDesc)) { + // Label the features with a class name and font name. + tr_file_data_ += "\n"; + tr_file_data_ += fontname; + tr_file_data_ += " "; + tr_file_data_ += blob_text; + tr_file_data_ += "\n"; + + // write micro-features to file and clean up + WriteCharDescription(feature_defs_, CharDesc, &tr_file_data_); + } else { + tprintf("Blob learned was invalid!\n"); + } + FreeCharDescription(CharDesc); +} // LearnBlob + +// Writes stored training data to a .tr file based on the given filename. +// Returns false on error. +bool Classify::WriteTRFile(const char* filename) { + bool result = false; + std::string tr_filename = filename; + tr_filename += ".tr"; + FILE* fp = fopen(tr_filename.c_str(), "wb"); + if (fp) { + result = + tesseract::Serialize(fp, &tr_file_data_[0], tr_file_data_.length()); + fclose(fp); + } + tr_file_data_.truncate_at(0); + return result; +} + +} // namespace tesseract diff --git a/tesseract/src/classify/blobclass.h b/tesseract/src/classify/blobclass.h new file mode 100644 index 00000000..94532fc9 --- /dev/null +++ b/tesseract/src/classify/blobclass.h @@ -0,0 +1,39 @@ +/****************************************************************************** + ** Filename: blobclass.h + ** Purpose: Interface to high level classification and training. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#ifndef BLOBCLASS_H +#define BLOBCLASS_H + +/**---------------------------------------------------------------------------- + Include Files and Type Defines +----------------------------------------------------------------------------**/ +#include "strngs.h" + +/**---------------------------------------------------------------------------- + Public Function Prototypes +----------------------------------------------------------------------------**/ +namespace tesseract { +// Finds the name of the training font and returns it in fontname, by cutting +// it out based on the expectation that the filename is of the form: +// /path/to/dir/[lang].[fontname].exp[num] +// The [lang], [fontname] and [num] fields should not have '.' characters. +// If the global parameter classify_font_name is set, its value is used instead. +void ExtractFontName(const char* filename, STRING* fontname); + +} // namespace tesseract. + +#endif diff --git a/tesseract/src/classify/classify.cpp b/tesseract/src/classify/classify.cpp new file mode 100644 index 00000000..939036d0 --- /dev/null +++ b/tesseract/src/classify/classify.cpp @@ -0,0 +1,230 @@ +/////////////////////////////////////////////////////////////////////// +// File: classify.cpp +// Description: classify class. +// Author: Samuel Charron +// +// (C) Copyright 2006, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "classify.h" + +#ifdef DISABLED_LEGACY_ENGINE + +#include <string.h> + +namespace tesseract { + +Classify::Classify() + : + INT_MEMBER(classify_debug_level, 0, "Classify debug level", + this->params()), + + BOOL_MEMBER(classify_bln_numeric_mode, 0, +"Assume the input is numbers [0-9].", this->params()), + + double_MEMBER(classify_max_rating_ratio, 1.5, + "Veto ratio between classifier ratings", this->params()), + + double_MEMBER(classify_max_certainty_margin, 5.5, + "Veto difference between classifier certainties", + this->params()), + + dict_(this) {} + +Classify::~Classify() {} + +} // namespace tesseract + +#else // DISABLED_LEGACY_ENGINE not defined + +#include "fontinfo.h" +#include "intproto.h" +#include "mfoutline.h" +#include "scrollview.h" +#include "shapeclassifier.h" +#include "shapetable.h" +#include "unicity_table.h" +#include <cstring> + +namespace tesseract { +Classify::Classify() + : BOOL_MEMBER(allow_blob_division, true, "Use divisible blobs chopping", + this->params()), + BOOL_MEMBER(prioritize_division, false, + "Prioritize blob division over chopping", this->params()), + BOOL_MEMBER(classify_enable_learning, true, "Enable adaptive classifier", + this->params()), + INT_MEMBER(classify_debug_level, 0, "Classify debug level", + this->params()), + INT_MEMBER(classify_norm_method, character, "Normalization Method ...", + this->params()), + double_MEMBER(classify_char_norm_range, 0.2, + "Character Normalization Range ...", this->params()), + double_MEMBER(classify_max_rating_ratio, 1.5, + "Veto ratio between classifier ratings", this->params()), + double_MEMBER(classify_max_certainty_margin, 5.5, + "Veto difference between classifier certainties", + this->params()), + BOOL_MEMBER(tess_cn_matching, 0, "Character Normalized Matching", + this->params()), + BOOL_MEMBER(tess_bn_matching, 0, "Baseline Normalized Matching", + this->params()), + BOOL_MEMBER(classify_enable_adaptive_matcher, 1, + "Enable adaptive classifier", this->params()), + BOOL_MEMBER(classify_use_pre_adapted_templates, 0, + "Use pre-adapted classifier templates", this->params()), + BOOL_MEMBER(classify_save_adapted_templates, 0, + "Save adapted templates to a file", this->params()), + BOOL_MEMBER(classify_enable_adaptive_debugger, 0, "Enable match debugger", + this->params()), + BOOL_MEMBER(classify_nonlinear_norm, 0, + "Non-linear stroke-density normalization", this->params()), + INT_MEMBER(matcher_debug_level, 0, "Matcher Debug Level", this->params()), + INT_MEMBER(matcher_debug_flags, 0, "Matcher Debug Flags", this->params()), + INT_MEMBER(classify_learning_debug_level, 0, "Learning Debug Level: ", + this->params()), + double_MEMBER(matcher_good_threshold, 0.125, "Good Match (0-1)", + this->params()), + double_MEMBER(matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)", + this->params()), + double_MEMBER(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)", + this->params()), + double_MEMBER(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)", + this->params()), + double_MEMBER(matcher_rating_margin, 0.1, "New template margin (0-1)", + this->params()), + double_MEMBER(matcher_avg_noise_size, 12.0, "Avg. noise blob length", + this->params()), + INT_MEMBER(matcher_permanent_classes_min, 1, "Min # of permanent classes", + this->params()), + INT_MEMBER(matcher_min_examples_for_prototyping, 3, + "Reliable Config Threshold", this->params()), + INT_MEMBER(matcher_sufficient_examples_for_prototyping, 5, + "Enable adaption even if the ambiguities have not been seen", + this->params()), + double_MEMBER(matcher_clustering_max_angle_delta, 0.015, + "Maximum angle delta for prototype clustering", + this->params()), + double_MEMBER(classify_misfit_junk_penalty, 0.0, + "Penalty to apply when a non-alnum is vertically out of " + "its expected textline position", + this->params()), + double_MEMBER(rating_scale, 1.5, "Rating scaling factor", this->params()), + double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor", + this->params()), + double_MEMBER(tessedit_class_miss_scale, 0.00390625, + "Scale factor for features not used", this->params()), + double_MEMBER( + classify_adapted_pruning_factor, 2.5, + "Prune poor adapted results this much worse than best result", + this->params()), + double_MEMBER(classify_adapted_pruning_threshold, -1.0, + "Threshold at which classify_adapted_pruning_factor starts", + this->params()), + INT_MEMBER(classify_adapt_proto_threshold, 230, + "Threshold for good protos during adaptive 0-255", + this->params()), + INT_MEMBER(classify_adapt_feature_threshold, 230, + "Threshold for good features during adaptive 0-255", + this->params()), + BOOL_MEMBER(disable_character_fragments, true, + "Do not include character fragments in the" + " results of the classifier", + this->params()), + double_MEMBER(classify_character_fragments_garbage_certainty_threshold, + -3.0, + "Exclude fragments that do not look like whole" + " characters from training and adaption", + this->params()), + BOOL_MEMBER(classify_debug_character_fragments, false, + "Bring up graphical debugging windows for fragments training", + this->params()), + BOOL_MEMBER(matcher_debug_separate_windows, false, + "Use two different windows for debugging the matching: " + "One for the protos and one for the features.", + this->params()), + STRING_MEMBER(classify_learn_debug_str, "", "Class str to debug learning", + this->params()), + INT_MEMBER(classify_class_pruner_threshold, 229, + "Class Pruner Threshold 0-255", this->params()), + INT_MEMBER(classify_class_pruner_multiplier, 15, + "Class Pruner Multiplier 0-255: ", this->params()), + INT_MEMBER(classify_cp_cutoff_strength, 7, + "Class Pruner CutoffStrength: ", this->params()), + INT_MEMBER(classify_integer_matcher_multiplier, 10, + "Integer Matcher Multiplier 0-255: ", this->params()), + BOOL_MEMBER(classify_bln_numeric_mode, 0, + "Assume the input is numbers [0-9].", this->params()), + double_MEMBER(speckle_large_max_size, 0.30, "Max large speckle size", + this->params()), + double_MEMBER(speckle_rating_penalty, 10.0, + "Penalty to add to worst rating for noise", this->params()), + im_(&classify_debug_level), + dict_(this) { + using namespace std::placeholders; // for _1, _2 + fontinfo_table_.set_clear_callback(std::bind(FontInfoDeleteCallback, _1)); + fontset_table_.set_clear_callback(std::bind(FontSetDeleteCallback, _1)); + + InitFeatureDefs(&feature_defs_); +} + +Classify::~Classify() { + EndAdaptiveClassifier(); + delete learn_debug_win_; + delete learn_fragmented_word_debug_win_; + delete learn_fragments_debug_win_; +} + + +// Takes ownership of the given classifier, and uses it for future calls +// to CharNormClassifier. +void Classify::SetStaticClassifier(ShapeClassifier* static_classifier) { + delete static_classifier_; + static_classifier_ = static_classifier; +} + +// Moved from speckle.cpp +// Adds a noise classification result that is a bit worse than the worst +// current result, or the worst possible result if no current results. +void Classify::AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices) { + BLOB_CHOICE_IT bc_it(choices); + // If there is no classifier result, we will use the worst possible certainty + // and corresponding rating. + float certainty = -getDict().certainty_scale; + float rating = rating_scale * blob_length; + if (!choices->empty() && blob_length > 0) { + bc_it.move_to_last(); + BLOB_CHOICE* worst_choice = bc_it.data(); + // Add speckle_rating_penalty to worst rating, matching old value. + rating = worst_choice->rating() + speckle_rating_penalty; + // Compute the rating to correspond to the certainty. (Used to be kept + // the same, but that messes up the language model search.) + certainty = -rating * getDict().certainty_scale / + (rating_scale * blob_length); + } + auto* blob_choice = new BLOB_CHOICE(UNICHAR_SPACE, rating, certainty, + -1, 0.0f, FLT_MAX, 0, + BCC_SPECKLE_CLASSIFIER); + bc_it.add_to_end(blob_choice); +} + +// Returns true if the blob is small enough to be a large speckle. +bool Classify::LargeSpeckle(const TBLOB &blob) { + double speckle_size = kBlnXHeight * speckle_large_max_size; + TBOX bbox = blob.bounding_box(); + return bbox.width() < speckle_size && bbox.height() < speckle_size; +} + +} // namespace tesseract + +#endif // def DISABLED_LEGACY_ENGINE diff --git a/tesseract/src/classify/classify.h b/tesseract/src/classify/classify.h new file mode 100644 index 00000000..44e0a77b --- /dev/null +++ b/tesseract/src/classify/classify.h @@ -0,0 +1,583 @@ +/////////////////////////////////////////////////////////////////////// +// File: classify.h +// Description: classify class. +// Author: Samuel Charron +// +// (C) Copyright 2006, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_CLASSIFY_CLASSIFY_H_ +#define TESSERACT_CLASSIFY_CLASSIFY_H_ + +// Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#ifdef DISABLED_LEGACY_ENGINE + +#include "ccstruct.h" +#include "dict.h" + +namespace tesseract { + +class Classify : public CCStruct { + public: + Classify(); + virtual ~Classify(); + virtual Dict& getDict() { + return dict_; + } + + // Member variables. + + INT_VAR_H(classify_debug_level, 0, "Classify debug level"); + + BOOL_VAR_H(classify_bln_numeric_mode, 0, + "Assume the input is numbers [0-9]."); + + double_VAR_H(classify_max_rating_ratio, 1.5, + "Veto ratio between classifier ratings"); + + double_VAR_H(classify_max_certainty_margin, 5.5, + "Veto difference between classifier certainties"); + + private: + Dict dict_; +}; + +} // namespace tesseract + + +#else // DISABLED_LEGACY_ENGINE not defined + +#include "adaptive.h" +#include "ccstruct.h" +#include "dict.h" +#include "featdefs.h" +#include "fontinfo.h" +#include "imagedata.h" +#include "intfx.h" +#include "intmatcher.h" +#include "normalis.h" +#include "ratngs.h" +#include "ocrfeatures.h" +#include "unicity_table.h" + +namespace tesseract { + +class ScrollView; +class WERD_CHOICE; +class WERD_RES; +struct ADAPT_RESULTS; +struct NORM_PROTOS; + +static const int kUnknownFontinfoId = -1; +static const int kBlankFontinfoId = -2; + +class ShapeClassifier; +struct ShapeRating; +class ShapeTable; +struct UnicharRating; + +// How segmented is a blob. In this enum, character refers to a classifiable +// unit, but that is too long and character is usually easier to understand. +enum CharSegmentationType { + CST_FRAGMENT, // A partial character. + CST_WHOLE, // A correctly segmented character. + CST_IMPROPER, // More than one but less than 2 characters. + CST_NGRAM // Multiple characters. +}; + +class TESS_API Classify : public CCStruct { + public: + Classify(); + ~Classify() override; + virtual Dict& getDict() { + return dict_; + } + + const ShapeTable* shape_table() const { + return shape_table_; + } + + // Takes ownership of the given classifier, and uses it for future calls + // to CharNormClassifier. + void SetStaticClassifier(ShapeClassifier* static_classifier); + + // Adds a noise classification result that is a bit worse than the worst + // current result, or the worst possible result if no current results. + void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices); + + // Returns true if the blob is small enough to be a large speckle. + bool LargeSpeckle(const TBLOB &blob); + + /* adaptive.cpp ************************************************************/ + ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset); + int GetFontinfoId(ADAPT_CLASS Class, uint8_t ConfigId); + // Runs the class pruner from int_templates on the given features, returning + // the number of classes output in results. + // int_templates Class pruner tables + // num_features Number of features in blob + // features Array of features + // normalization_factors (input) Array of int_templates->NumClasses fudge + // factors from blob normalization process. + // (Indexed by CLASS_INDEX) + // expected_num_features (input) Array of int_templates->NumClasses + // expected number of features for each class. + // (Indexed by CLASS_INDEX) + // results (output) Sorted Array of pruned classes. + // Array must be sized to take the maximum possible + // number of outputs : int_templates->NumClasses. + int PruneClasses(const INT_TEMPLATES_STRUCT* int_templates, int num_features, + int keep_this, const INT_FEATURE_STRUCT* features, + const uint8_t* normalization_factors, + const uint16_t* expected_num_features, + std::vector<CP_RESULT_STRUCT>* results); + void ReadNewCutoffs(TFile* fp, uint16_t* Cutoffs); + void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates); + void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates); + ADAPT_TEMPLATES ReadAdaptedTemplates(TFile* File); + /* normmatch.cpp ************************************************************/ + float ComputeNormMatch(CLASS_ID ClassId, + const FEATURE_STRUCT& feature, bool DebugMatch); + void FreeNormProtos(); + NORM_PROTOS* ReadNormProtos(TFile* fp); + /* protos.cpp ***************************************************************/ + void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class); + INT_TEMPLATES CreateIntTemplates(CLASSES FloatProtos, + const UNICHARSET& target_unicharset); + /* adaptmatch.cpp ***********************************************************/ + + // Learns the given word using its chopped_word, seam_array, denorm, + // box_word, best_state, and correct_text to learn both correctly and + // incorrectly segmented blobs. If fontname is not nullptr, then LearnBlob + // is called and the data will be saved in an internal buffer. + // Otherwise AdaptToBlob is called for adaption within a document. + void LearnWord(const char* fontname, WERD_RES* word); + + // Builds a blob of length fragments, from the word, starting at start, + // and then learns it, as having the given correct_text. + // If fontname is not nullptr, then LearnBlob is called and the data will be + // saved in an internal buffer for static training. + // Otherwise AdaptToBlob is called for adaption within a document. + // threshold is a magic number required by AdaptToChar and generated by + // ComputeAdaptionThresholds. + // Although it can be partly inferred from the string, segmentation is + // provided to explicitly clarify the character segmentation. + void LearnPieces(const char* fontname, int start, int length, float threshold, + CharSegmentationType segmentation, const char* correct_text, + WERD_RES* word); + void InitAdaptiveClassifier(TessdataManager* mgr); + void InitAdaptedClass(TBLOB *Blob, + CLASS_ID ClassId, + int FontinfoId, + ADAPT_CLASS Class, + ADAPT_TEMPLATES Templates); + void AmbigClassifier(const std::vector<INT_FEATURE_STRUCT>& int_features, + const INT_FX_RESULT_STRUCT& fx_info, + const TBLOB *blob, + INT_TEMPLATES templates, + ADAPT_CLASS *classes, + UNICHAR_ID *ambiguities, + ADAPT_RESULTS *results); + void MasterMatcher(INT_TEMPLATES templates, + int16_t num_features, + const INT_FEATURE_STRUCT* features, + const uint8_t* norm_factors, + ADAPT_CLASS* classes, + int debug, + int matcher_multiplier, + const TBOX& blob_box, + const std::vector<CP_RESULT_STRUCT>& results, + ADAPT_RESULTS* final_results); + // Converts configs to fonts, and if the result is not adapted, and a + // shape_table_ is present, the shape is expanded to include all + // unichar_ids represented, before applying a set of corrections to the + // distance rating in int_result, (see ComputeCorrectedRating.) + // The results are added to the final_results output. + void ExpandShapesAndApplyCorrections(ADAPT_CLASS* classes, + bool debug, + int class_id, + int bottom, int top, + float cp_rating, + int blob_length, + int matcher_multiplier, + const uint8_t* cn_factors, + UnicharRating* int_result, + ADAPT_RESULTS* final_results); + // Applies a set of corrections to the distance im_rating, + // including the cn_correction, miss penalty and additional penalty + // for non-alnums being vertical misfits. Returns the corrected distance. + double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, + double im_rating, int feature_misses, + int bottom, int top, + int blob_length, int matcher_multiplier, + const uint8_t* cn_factors); + void ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box, + ADAPT_RESULTS *Results, + BLOB_CHOICE_LIST *Choices); + void AddNewResult(const UnicharRating& new_result, ADAPT_RESULTS *results); + int GetAdaptiveFeatures(TBLOB *Blob, + INT_FEATURE_ARRAY IntFeatures, + FEATURE_SET *FloatFeatures); + +#ifndef GRAPHICS_DISABLED + void DebugAdaptiveClassifier(TBLOB *Blob, + ADAPT_RESULTS *Results); +#endif + PROTO_ID MakeNewTempProtos(FEATURE_SET Features, + int NumBadFeat, + FEATURE_ID BadFeat[], + INT_CLASS IClass, + ADAPT_CLASS Class, + BIT_VECTOR TempProtoMask); + int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, + CLASS_ID ClassId, + int FontinfoId, + int NumFeatures, + INT_FEATURE_ARRAY Features, + FEATURE_SET FloatFeatures); + void MakePermanent(ADAPT_TEMPLATES Templates, + CLASS_ID ClassId, + int ConfigId, + TBLOB *Blob); + void PrintAdaptiveMatchResults(const ADAPT_RESULTS& results); + void RemoveExtraPuncs(ADAPT_RESULTS *Results); + void RemoveBadMatches(ADAPT_RESULTS *Results); + void SetAdaptiveThreshold(float Threshold); + void ShowBestMatchFor(int shape_id, + const INT_FEATURE_STRUCT* features, + int num_features); + // Returns a string for the classifier class_id: either the corresponding + // unicharset debug_str or the shape_table_ debug str. + STRING ClassIDToDebugStr(const INT_TEMPLATES_STRUCT* templates, + int class_id, int config_id) const; + // Converts a classifier class_id index with a config ID to: + // shape_table_ present: a shape_table_ index OR + // No shape_table_: a font ID. + // Without shape training, each class_id, config pair represents a single + // unichar id/font combination, so this function looks up the corresponding + // font id. + // With shape training, each class_id, config pair represents a single + // shape table index, so the fontset_table stores the shape table index, + // and the shape_table_ must be consulted to obtain the actual unichar_id/ + // font combinations that the shape represents. + int ClassAndConfigIDToFontOrShapeID(int class_id, + int int_result_config) const; + // Converts a shape_table_ index to a classifier class_id index (not a + // unichar-id!). Uses a search, so not fast. + int ShapeIDToClassID(int shape_id) const; + UNICHAR_ID *BaselineClassifier( + TBLOB *Blob, const std::vector<INT_FEATURE_STRUCT>& int_features, + const INT_FX_RESULT_STRUCT& fx_info, + ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results); + int CharNormClassifier(TBLOB *blob, + const TrainingSample& sample, + ADAPT_RESULTS *adapt_results); + + // As CharNormClassifier, but operates on a TrainingSample and outputs to + // a GenericVector of ShapeRating without conversion to classes. + int CharNormTrainingSample(bool pruner_only, int keep_this, + const TrainingSample& sample, + std::vector<UnicharRating>* results); + UNICHAR_ID *GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass); + void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results); + void AdaptToChar(TBLOB* Blob, CLASS_ID ClassId, int FontinfoId, + float Threshold, ADAPT_TEMPLATES adaptive_templates); + void DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class); + bool AdaptableWord(WERD_RES* word); + void EndAdaptiveClassifier(); + void SettupPass1(); + void SettupPass2(); + void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices); + void ClassifyAsNoise(ADAPT_RESULTS *Results); + void ResetAdaptiveClassifierInternal(); + void SwitchAdaptiveClassifier(); + void StartBackupAdaptiveClassifier(); + + int GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info, + INT_TEMPLATES templates, + uint8_t* pruner_norm_array, + uint8_t* char_norm_array); + // Computes the char_norm_array for the unicharset and, if not nullptr, the + // pruner_array as appropriate according to the existence of the shape_table. + // The norm_feature is deleted as it is almost certainly no longer needed. + void ComputeCharNormArrays(FEATURE_STRUCT* norm_feature, + INT_TEMPLATES_STRUCT* templates, + uint8_t* char_norm_array, + uint8_t* pruner_array); + + bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config); + void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob); + + bool AdaptiveClassifierIsFull() const { return NumAdaptationsFailed > 0; } + bool AdaptiveClassifierIsEmpty() const { + return AdaptedTemplates->NumPermClasses == 0; + } + bool LooksLikeGarbage(TBLOB *blob); + void RefreshDebugWindow(ScrollView **win, const char *msg, + int y_offset, const TBOX &wbox); + // intfx.cpp + // Computes the DENORMS for bl(baseline) and cn(character) normalization + // during feature extraction. The input denorm describes the current state + // of the blob, which is usually a baseline-normalized word. + // The Transforms setup are as follows: + // Baseline Normalized (bl) Output: + // We center the grapheme by aligning the x-coordinate of its centroid with + // x=128 and leaving the already-baseline-normalized y as-is. + // + // Character Normalized (cn) Output: + // We align the grapheme's centroid at the origin and scale it + // asymmetrically in x and y so that the 2nd moments are a standard value + // (51.2) ie the result is vaguely square. + // If classify_nonlinear_norm is true: + // A non-linear normalization is setup that attempts to evenly distribute + // edges across x and y. + // + // Some of the fields of fx_info are also setup: + // Length: Total length of outline. + // Rx: Rounded y second moment. (Reversed by convention.) + // Ry: rounded x second moment. + // Xmean: Rounded x center of mass of the blob. + // Ymean: Rounded y center of mass of the blob. + static void SetupBLCNDenorms(const TBLOB& blob, bool nonlinear_norm, + DENORM* bl_denorm, DENORM* cn_denorm, + INT_FX_RESULT_STRUCT* fx_info); + + // Extracts sets of 3-D features of length kStandardFeatureLength (=12.8), as + // (x,y) position and angle as measured counterclockwise from the vector + // <-1, 0>, from blob using two normalizations defined by bl_denorm and + // cn_denorm. See SetpuBLCNDenorms for definitions. + // If outline_cn_counts is not nullptr, on return it contains the cumulative + // number of cn features generated for each outline in the blob (in order). + // Thus after the first outline, there were (*outline_cn_counts)[0] features, + // after the second outline, there were (*outline_cn_counts)[1] features etc. + static void ExtractFeatures(const TBLOB& blob, + bool nonlinear_norm, + std::vector<INT_FEATURE_STRUCT>* bl_features, + std::vector<INT_FEATURE_STRUCT>* cn_features, + INT_FX_RESULT_STRUCT* results, + GenericVector<int>* outline_cn_counts); + /* float2int.cpp ************************************************************/ + void ClearCharNormArray(uint8_t* char_norm_array); + void ComputeIntCharNormArray(const FEATURE_STRUCT& norm_feature, + uint8_t* char_norm_array); + void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures); + /* intproto.cpp *************************************************************/ + INT_TEMPLATES ReadIntTemplates(TFile* fp); + void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, + const UNICHARSET& target_unicharset); + CLASS_ID GetClassToDebug(const char *Prompt, bool* adaptive_on, + bool* pretrained_on, int* shape_id); + void ShowMatchDisplay(); + /* font detection ***********************************************************/ + UnicityTable<FontInfo>& get_fontinfo_table() { + return fontinfo_table_; + } + const UnicityTable<FontInfo>& get_fontinfo_table() const { + return fontinfo_table_; + } + UnicityTable<FontSet>& get_fontset_table() { + return fontset_table_; + } + /* mfoutline.cpp ***********************************************************/ + void NormalizeOutlines(LIST Outlines, float *XScale, float *YScale); + /* outfeat.cpp ***********************************************************/ + FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob); + /* picofeat.cpp ***********************************************************/ + FEATURE_SET ExtractPicoFeatures(TBLOB *Blob); + FEATURE_SET ExtractIntCNFeatures(const TBLOB& blob, + const INT_FX_RESULT_STRUCT& fx_info); + FEATURE_SET ExtractIntGeoFeatures(const TBLOB& blob, + const INT_FX_RESULT_STRUCT& fx_info); + /* blobclass.cpp ***********************************************************/ + // Extracts features from the given blob and saves them in the tr_file_data_ + // member variable. + // fontname: Name of font that this blob was printed in. + // cn_denorm: Character normalization transformation to apply to the blob. + // fx_info: Character normalization parameters computed with cn_denorm. + // blob_text: Ground truth text for the blob. + void LearnBlob(const STRING& fontname, TBLOB* Blob, const DENORM& cn_denorm, + const INT_FX_RESULT_STRUCT& fx_info, const char* blob_text); + // Writes stored training data to a .tr file based on the given filename. + // Returns false on error. + bool WriteTRFile(const char* filename); + + // Member variables. + + // Parameters. + // Set during training (in lang.config) to indicate whether the divisible + // blobs chopper should be used (true for latin script.) + BOOL_VAR_H(allow_blob_division, true, "Use divisible blobs chopping"); + // Set during training (in lang.config) to indicate whether the divisible + // blobs chopper should be used in preference to chopping. Set to true for + // southern Indic scripts. + BOOL_VAR_H(prioritize_division, false, + "Prioritize blob division over chopping"); + BOOL_VAR_H(classify_enable_learning, true, "Enable adaptive classifier"); + INT_VAR_H(classify_debug_level, 0, "Classify debug level"); + + /* mfoutline.cpp ***********************************************************/ + /* control knobs used to control normalization of outlines */ + INT_VAR_H(classify_norm_method, character, "Normalization Method ..."); + double_VAR_H(classify_char_norm_range, 0.2, + "Character Normalization Range ..."); + double_VAR_H(classify_max_rating_ratio, 1.5, + "Veto ratio between classifier ratings"); + double_VAR_H(classify_max_certainty_margin, 5.5, + "Veto difference between classifier certainties"); + + /* adaptmatch.cpp ***********************************************************/ + BOOL_VAR_H(tess_cn_matching, 0, "Character Normalized Matching"); + BOOL_VAR_H(tess_bn_matching, 0, "Baseline Normalized Matching"); + BOOL_VAR_H(classify_enable_adaptive_matcher, 1, "Enable adaptive classifier"); + BOOL_VAR_H(classify_use_pre_adapted_templates, 0, + "Use pre-adapted classifier templates"); + BOOL_VAR_H(classify_save_adapted_templates, 0, + "Save adapted templates to a file"); + BOOL_VAR_H(classify_enable_adaptive_debugger, 0, "Enable match debugger"); + BOOL_VAR_H(classify_nonlinear_norm, 0, + "Non-linear stroke-density normalization"); + INT_VAR_H(matcher_debug_level, 0, "Matcher Debug Level"); + INT_VAR_H(matcher_debug_flags, 0, "Matcher Debug Flags"); + INT_VAR_H(classify_learning_debug_level, 0, "Learning Debug Level: "); + double_VAR_H(matcher_good_threshold, 0.125, "Good Match (0-1)"); + double_VAR_H(matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)"); + double_VAR_H(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)"); + double_VAR_H(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)"); + double_VAR_H(matcher_rating_margin, 0.1, "New template margin (0-1)"); + double_VAR_H(matcher_avg_noise_size, 12.0, "Avg. noise blob length: "); + INT_VAR_H(matcher_permanent_classes_min, 1, "Min # of permanent classes"); + INT_VAR_H(matcher_min_examples_for_prototyping, 3, + "Reliable Config Threshold"); + INT_VAR_H(matcher_sufficient_examples_for_prototyping, 5, + "Enable adaption even if the ambiguities have not been seen"); + double_VAR_H(matcher_clustering_max_angle_delta, 0.015, + "Maximum angle delta for prototype clustering"); + double_VAR_H(classify_misfit_junk_penalty, 0.0, + "Penalty to apply when a non-alnum is vertically out of " + "its expected textline position"); + double_VAR_H(rating_scale, 1.5, "Rating scaling factor"); + double_VAR_H(certainty_scale, 20.0, "Certainty scaling factor"); + double_VAR_H(tessedit_class_miss_scale, 0.00390625, + "Scale factor for features not used"); + double_VAR_H(classify_adapted_pruning_factor, 2.5, + "Prune poor adapted results this much worse than best result"); + double_VAR_H(classify_adapted_pruning_threshold, -1.0, + "Threshold at which classify_adapted_pruning_factor starts"); + INT_VAR_H(classify_adapt_proto_threshold, 230, + "Threshold for good protos during adaptive 0-255"); + INT_VAR_H(classify_adapt_feature_threshold, 230, + "Threshold for good features during adaptive 0-255"); + BOOL_VAR_H(disable_character_fragments, true, + "Do not include character fragments in the" + " results of the classifier"); + double_VAR_H(classify_character_fragments_garbage_certainty_threshold, -3.0, + "Exclude fragments that do not match any whole character" + " with at least this certainty"); + BOOL_VAR_H(classify_debug_character_fragments, false, + "Bring up graphical debugging windows for fragments training"); + BOOL_VAR_H(matcher_debug_separate_windows, false, + "Use two different windows for debugging the matching: " + "One for the protos and one for the features."); + STRING_VAR_H(classify_learn_debug_str, "", "Class str to debug learning"); + + /* intmatcher.cpp **********************************************************/ + INT_VAR_H(classify_class_pruner_threshold, 229, + "Class Pruner Threshold 0-255"); + INT_VAR_H(classify_class_pruner_multiplier, 15, + "Class Pruner Multiplier 0-255: "); + INT_VAR_H(classify_cp_cutoff_strength, 7, + "Class Pruner CutoffStrength: "); + INT_VAR_H(classify_integer_matcher_multiplier, 10, + "Integer Matcher Multiplier 0-255: "); + + BOOL_VAR_H(classify_bln_numeric_mode, 0, + "Assume the input is numbers [0-9]."); + double_VAR_H(speckle_large_max_size, 0.30, "Max large speckle size"); + double_VAR_H(speckle_rating_penalty, 10.0, + "Penalty to add to worst rating for noise"); + + // Use class variables to hold onto built-in templates and adapted templates. + INT_TEMPLATES PreTrainedTemplates = nullptr; + ADAPT_TEMPLATES AdaptedTemplates = nullptr; + // The backup adapted templates are created from the previous page (only) + // so they are always ready and reasonably well trained if the primary + // adapted templates become full. + ADAPT_TEMPLATES BackupAdaptedTemplates = nullptr; + + // Create dummy proto and config masks for use with the built-in templates. + BIT_VECTOR AllProtosOn = nullptr; + BIT_VECTOR AllConfigsOn = nullptr; + BIT_VECTOR AllConfigsOff = nullptr; + BIT_VECTOR TempProtoMask = nullptr; + /* normmatch.cpp */ + NORM_PROTOS* NormProtos = nullptr; + /* font detection ***********************************************************/ + UnicityTable<FontInfo> fontinfo_table_; + // Without shape training, each class_id, config pair represents a single + // unichar id/font combination, so each fontset_table_ entry holds font ids + // for each config in the class. + // With shape training, each class_id, config pair represents a single + // shape_table_ index, so the fontset_table_ stores the shape_table_ index, + // and the shape_table_ must be consulted to obtain the actual unichar_id/ + // font combinations that the shape represents. + UnicityTable<FontSet> fontset_table_; + + protected: + IntegerMatcher im_; + FEATURE_DEFS_STRUCT feature_defs_; + // If a shape_table_ is present, it is used to remap classifier output in + // ExpandShapesAndApplyCorrections. font_ids referenced by configs actually + // mean an index to the shape_table_ and the choices returned are *all* the + // shape_table_ entries at that index. + ShapeTable* shape_table_ = nullptr; + + private: + // The currently active static classifier. + ShapeClassifier* static_classifier_ = nullptr; + ScrollView* learn_debug_win_ = nullptr; + ScrollView* learn_fragmented_word_debug_win_ = nullptr; + ScrollView* learn_fragments_debug_win_ = nullptr; + + // Training data gathered here for all the images in a document. + STRING tr_file_data_; + + Dict dict_; + + GenericVector<uint16_t> shapetable_cutoffs_; + + /* variables used to hold performance statistics */ + int NumAdaptationsFailed = 0; + + // Expected number of features in the class pruner, used to penalize + // unknowns that have too few features (like a c being classified as e) so + // it doesn't recognize everything as '@' or '#'. + // CharNormCutoffs is for the static classifier (with no shapetable). + // BaselineCutoffs gets a copy of CharNormCutoffs as an estimate of the real + // value in the adaptive classifier. Both are indexed by unichar_id. + // shapetable_cutoffs_ provides a similar value for each shape in the + // shape_table_ + uint16_t CharNormCutoffs[MAX_NUM_CLASSES]; + uint16_t BaselineCutoffs[MAX_NUM_CLASSES]; + + public: + bool EnableLearning = true; +}; + +} // namespace tesseract + +#endif // DISABLED_LEGACY_ENGINE + +#endif // TESSERACT_CLASSIFY_CLASSIFY_H_ diff --git a/tesseract/src/classify/cluster.cpp b/tesseract/src/classify/cluster.cpp new file mode 100644 index 00000000..25b2776d --- /dev/null +++ b/tesseract/src/classify/cluster.cpp @@ -0,0 +1,2425 @@ +/****************************************************************************** + ** Filename: cluster.cpp + ** Purpose: Routines for clustering points in N-D space + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + *****************************************************************************/ + +#define _USE_MATH_DEFINES // for M_PI + +#include "cluster.h" + +#include "genericheap.h" +#include "kdpair.h" +#include "matrix.h" +#include "tprintf.h" + +#include "helpers.h" + +#include <cfloat> // for FLT_MAX +#include <cmath> // for M_PI +#include <vector> // for std::vector + +namespace tesseract { + +#define HOTELLING 1 // If true use Hotelling's test to decide where to split. +#define FTABLE_X 10 // Size of FTable. +#define FTABLE_Y 100 // Size of FTable. + +// Table of values approximating the cumulative F-distribution for a confidence of 1%. +const double FTable[FTABLE_Y][FTABLE_X] = { + {4052.19, 4999.52, 5403.34, 5624.62, 5763.65, 5858.97, 5928.33, 5981.10, 6022.50, 6055.85,}, + {98.502, 99.000, 99.166, 99.249, 99.300, 99.333, 99.356, 99.374, 99.388, 99.399,}, + {34.116, 30.816, 29.457, 28.710, 28.237, 27.911, 27.672, 27.489, 27.345, 27.229,}, + {21.198, 18.000, 16.694, 15.977, 15.522, 15.207, 14.976, 14.799, 14.659, 14.546,}, + {16.258, 13.274, 12.060, 11.392, 10.967, 10.672, 10.456, 10.289, 10.158, 10.051,}, + {13.745, 10.925, 9.780, 9.148, 8.746, 8.466, 8.260, 8.102, 7.976, 7.874,}, + {12.246, 9.547, 8.451, 7.847, 7.460, 7.191, 6.993, 6.840, 6.719, 6.620,}, + {11.259, 8.649, 7.591, 7.006, 6.632, 6.371, 6.178, 6.029, 5.911, 5.814,}, + {10.561, 8.022, 6.992, 6.422, 6.057, 5.802, 5.613, 5.467, 5.351, 5.257,}, + {10.044, 7.559, 6.552, 5.994, 5.636, 5.386, 5.200, 5.057, 4.942, 4.849,}, + { 9.646, 7.206, 6.217, 5.668, 5.316, 5.069, 4.886, 4.744, 4.632, 4.539,}, + { 9.330, 6.927, 5.953, 5.412, 5.064, 4.821, 4.640, 4.499, 4.388, 4.296,}, + { 9.074, 6.701, 5.739, 5.205, 4.862, 4.620, 4.441, 4.302, 4.191, 4.100,}, + { 8.862, 6.515, 5.564, 5.035, 4.695, 4.456, 4.278, 4.140, 4.030, 3.939,}, + { 8.683, 6.359, 5.417, 4.893, 4.556, 4.318, 4.142, 4.004, 3.895, 3.805,}, + { 8.531, 6.226, 5.292, 4.773, 4.437, 4.202, 4.026, 3.890, 3.780, 3.691,}, + { 8.400, 6.112, 5.185, 4.669, 4.336, 4.102, 3.927, 3.791, 3.682, 3.593,}, + { 8.285, 6.013, 5.092, 4.579, 4.248, 4.015, 3.841, 3.705, 3.597, 3.508,}, + { 8.185, 5.926, 5.010, 4.500, 4.171, 3.939, 3.765, 3.631, 3.523, 3.434,}, + { 8.096, 5.849, 4.938, 4.431, 4.103, 3.871, 3.699, 3.564, 3.457, 3.368,}, + { 8.017, 5.780, 4.874, 4.369, 4.042, 3.812, 3.640, 3.506, 3.398, 3.310,}, + { 7.945, 5.719, 4.817, 4.313, 3.988, 3.758, 3.587, 3.453, 3.346, 3.258,}, + { 7.881, 5.664, 4.765, 4.264, 3.939, 3.710, 3.539, 3.406, 3.299, 3.211,}, + { 7.823, 5.614, 4.718, 4.218, 3.895, 3.667, 3.496, 3.363, 3.256, 3.168,}, + { 7.770, 5.568, 4.675, 4.177, 3.855, 3.627, 3.457, 3.324, 3.217, 3.129,}, + { 7.721, 5.526, 4.637, 4.140, 3.818, 3.591, 3.421, 3.288, 3.182, 3.094,}, + { 7.677, 5.488, 4.601, 4.106, 3.785, 3.558, 3.388, 3.256, 3.149, 3.062,}, + { 7.636, 5.453, 4.568, 4.074, 3.754, 3.528, 3.358, 3.226, 3.120, 3.032,}, + { 7.598, 5.420, 4.538, 4.045, 3.725, 3.499, 3.330, 3.198, 3.092, 3.005,}, + { 7.562, 5.390, 4.510, 4.018, 3.699, 3.473, 3.305, 3.173, 3.067, 2.979,}, + { 7.530, 5.362, 4.484, 3.993, 3.675, 3.449, 3.281, 3.149, 3.043, 2.955,}, + { 7.499, 5.336, 4.459, 3.969, 3.652, 3.427, 3.258, 3.127, 3.021, 2.934,}, + { 7.471, 5.312, 4.437, 3.948, 3.630, 3.406, 3.238, 3.106, 3.000, 2.913,}, + { 7.444, 5.289, 4.416, 3.927, 3.611, 3.386, 3.218, 3.087, 2.981, 2.894,}, + { 7.419, 5.268, 4.396, 3.908, 3.592, 3.368, 3.200, 3.069, 2.963, 2.876,}, + { 7.396, 5.248, 4.377, 3.890, 3.574, 3.351, 3.183, 3.052, 2.946, 2.859,}, + { 7.373, 5.229, 4.360, 3.873, 3.558, 3.334, 3.167, 3.036, 2.930, 2.843,}, + { 7.353, 5.211, 4.343, 3.858, 3.542, 3.319, 3.152, 3.021, 2.915, 2.828,}, + { 7.333, 5.194, 4.327, 3.843, 3.528, 3.305, 3.137, 3.006, 2.901, 2.814,}, + { 7.314, 5.179, 4.313, 3.828, 3.514, 3.291, 3.124, 2.993, 2.888, 2.801,}, + { 7.296, 5.163, 4.299, 3.815, 3.501, 3.278, 3.111, 2.980, 2.875, 2.788,}, + { 7.280, 5.149, 4.285, 3.802, 3.488, 3.266, 3.099, 2.968, 2.863, 2.776,}, + { 7.264, 5.136, 4.273, 3.790, 3.476, 3.254, 3.087, 2.957, 2.851, 2.764,}, + { 7.248, 5.123, 4.261, 3.778, 3.465, 3.243, 3.076, 2.946, 2.840, 2.754,}, + { 7.234, 5.110, 4.249, 3.767, 3.454, 3.232, 3.066, 2.935, 2.830, 2.743,}, + { 7.220, 5.099, 4.238, 3.757, 3.444, 3.222, 3.056, 2.925, 2.820, 2.733,}, + { 7.207, 5.087, 4.228, 3.747, 3.434, 3.213, 3.046, 2.916, 2.811, 2.724,}, + { 7.194, 5.077, 4.218, 3.737, 3.425, 3.204, 3.037, 2.907, 2.802, 2.715,}, + { 7.182, 5.066, 4.208, 3.728, 3.416, 3.195, 3.028, 2.898, 2.793, 2.706,}, + { 7.171, 5.057, 4.199, 3.720, 3.408, 3.186, 3.020, 2.890, 2.785, 2.698,}, + { 7.159, 5.047, 4.191, 3.711, 3.400, 3.178, 3.012, 2.882, 2.777, 2.690,}, + { 7.149, 5.038, 4.182, 3.703, 3.392, 3.171, 3.005, 2.874, 2.769, 2.683,}, + { 7.139, 5.030, 4.174, 3.695, 3.384, 3.163, 2.997, 2.867, 2.762, 2.675,}, + { 7.129, 5.021, 4.167, 3.688, 3.377, 3.156, 2.990, 2.860, 2.755, 2.668,}, + { 7.119, 5.013, 4.159, 3.681, 3.370, 3.149, 2.983, 2.853, 2.748, 2.662,}, + { 7.110, 5.006, 4.152, 3.674, 3.363, 3.143, 2.977, 2.847, 2.742, 2.655,}, + { 7.102, 4.998, 4.145, 3.667, 3.357, 3.136, 2.971, 2.841, 2.736, 2.649,}, + { 7.093, 4.991, 4.138, 3.661, 3.351, 3.130, 2.965, 2.835, 2.730, 2.643,}, + { 7.085, 4.984, 4.132, 3.655, 3.345, 3.124, 2.959, 2.829, 2.724, 2.637,}, + { 7.077, 4.977, 4.126, 3.649, 3.339, 3.119, 2.953, 2.823, 2.718, 2.632,}, + { 7.070, 4.971, 4.120, 3.643, 3.333, 3.113, 2.948, 2.818, 2.713, 2.626,}, + { 7.062, 4.965, 4.114, 3.638, 3.328, 3.108, 2.942, 2.813, 2.708, 2.621,}, + { 7.055, 4.959, 4.109, 3.632, 3.323, 3.103, 2.937, 2.808, 2.703, 2.616,}, + { 7.048, 4.953, 4.103, 3.627, 3.318, 3.098, 2.932, 2.803, 2.698, 2.611,}, + { 7.042, 4.947, 4.098, 3.622, 3.313, 3.093, 2.928, 2.798, 2.693, 2.607,}, + { 7.035, 4.942, 4.093, 3.618, 3.308, 3.088, 2.923, 2.793, 2.689, 2.602,}, + { 7.029, 4.937, 4.088, 3.613, 3.304, 3.084, 2.919, 2.789, 2.684, 2.598,}, + { 7.023, 4.932, 4.083, 3.608, 3.299, 3.080, 2.914, 2.785, 2.680, 2.593,}, + { 7.017, 4.927, 4.079, 3.604, 3.295, 3.075, 2.910, 2.781, 2.676, 2.589,}, + { 7.011, 4.922, 4.074, 3.600, 3.291, 3.071, 2.906, 2.777, 2.672, 2.585,}, + { 7.006, 4.917, 4.070, 3.596, 3.287, 3.067, 2.902, 2.773, 2.668, 2.581,}, + { 7.001, 4.913, 4.066, 3.591, 3.283, 3.063, 2.898, 2.769, 2.664, 2.578,}, + { 6.995, 4.908, 4.062, 3.588, 3.279, 3.060, 2.895, 2.765, 2.660, 2.574,}, + { 6.990, 4.904, 4.058, 3.584, 3.275, 3.056, 2.891, 2.762, 2.657, 2.570,}, + { 6.985, 4.900, 4.054, 3.580, 3.272, 3.052, 2.887, 2.758, 2.653, 2.567,}, + { 6.981, 4.896, 4.050, 3.577, 3.268, 3.049, 2.884, 2.755, 2.650, 2.563,}, + { 6.976, 4.892, 4.047, 3.573, 3.265, 3.046, 2.881, 2.751, 2.647, 2.560,}, + { 6.971, 4.888, 4.043, 3.570, 3.261, 3.042, 2.877, 2.748, 2.644, 2.557,}, + { 6.967, 4.884, 4.040, 3.566, 3.258, 3.039, 2.874, 2.745, 2.640, 2.554,}, + { 6.963, 4.881, 4.036, 3.563, 3.255, 3.036, 2.871, 2.742, 2.637, 2.551,}, + { 6.958, 4.877, 4.033, 3.560, 3.252, 3.033, 2.868, 2.739, 2.634, 2.548,}, + { 6.954, 4.874, 4.030, 3.557, 3.249, 3.030, 2.865, 2.736, 2.632, 2.545,}, + { 6.950, 4.870, 4.027, 3.554, 3.246, 3.027, 2.863, 2.733, 2.629, 2.542,}, + { 6.947, 4.867, 4.024, 3.551, 3.243, 3.025, 2.860, 2.731, 2.626, 2.539,}, + { 6.943, 4.864, 4.021, 3.548, 3.240, 3.022, 2.857, 2.728, 2.623, 2.537,}, + { 6.939, 4.861, 4.018, 3.545, 3.238, 3.019, 2.854, 2.725, 2.621, 2.534,}, + { 6.935, 4.858, 4.015, 3.543, 3.235, 3.017, 2.852, 2.723, 2.618, 2.532,}, + { 6.932, 4.855, 4.012, 3.540, 3.233, 3.014, 2.849, 2.720, 2.616, 2.529,}, + { 6.928, 4.852, 4.010, 3.538, 3.230, 3.012, 2.847, 2.718, 2.613, 2.527,}, + { 6.925, 4.849, 4.007, 3.535, 3.228, 3.009, 2.845, 2.715, 2.611, 2.524,}, + { 6.922, 4.846, 4.004, 3.533, 3.225, 3.007, 2.842, 2.713, 2.609, 2.522,}, + { 6.919, 4.844, 4.002, 3.530, 3.223, 3.004, 2.840, 2.711, 2.606, 2.520,}, + { 6.915, 4.841, 3.999, 3.528, 3.221, 3.002, 2.838, 2.709, 2.604, 2.518,}, + { 6.912, 4.838, 3.997, 3.525, 3.218, 3.000, 2.835, 2.706, 2.602, 2.515,}, + { 6.909, 4.836, 3.995, 3.523, 3.216, 2.998, 2.833, 2.704, 2.600, 2.513,}, + { 6.906, 4.833, 3.992, 3.521, 3.214, 2.996, 2.831, 2.702, 2.598, 2.511,}, + { 6.904, 4.831, 3.990, 3.519, 3.212, 2.994, 2.829, 2.700, 2.596, 2.509,}, + { 6.901, 4.829, 3.988, 3.517, 3.210, 2.992, 2.827, 2.698, 2.594, 2.507,}, + { 6.898, 4.826, 3.986, 3.515, 3.208, 2.990, 2.825, 2.696, 2.592, 2.505,}, + { 6.895, 4.824, 3.984, 3.513, 3.206, 2.988, 2.823, 2.694, 2.590, 2.503} +}; + +/** define the variance which will be used as a minimum variance for any + dimension of any feature. Since most features are calculated from numbers + with a precision no better than 1 in 128, the variance should never be + less than the square of this number for parameters whose range is 1. */ +#define MINVARIANCE 0.0004 + +/** define the absolute minimum number of samples which must be present in + order to accurately test hypotheses about underlying probability + distributions. Define separately the minimum samples that are needed + before a statistical analysis is attempted; this number should be + equal to MINSAMPLES but can be set to a lower number for early testing + when very few samples are available. */ +#define MINSAMPLESPERBUCKET 5 +#define MINSAMPLES (MINBUCKETS * MINSAMPLESPERBUCKET) +#define MINSAMPLESNEEDED 1 + +/** define the size of the table which maps normalized samples to + histogram buckets. Also define the number of standard deviations + in a normal distribution which are considered to be significant. + The mapping table will be defined in such a way that it covers + the specified number of standard deviations on either side of + the mean. BUCKETTABLESIZE should always be even. */ +#define BUCKETTABLESIZE 1024 +#define NORMALEXTENT 3.0 + +struct TEMPCLUSTER { + CLUSTER *Cluster; + CLUSTER *Neighbor; +}; + +using ClusterPair = tesseract::KDPairInc<float, TEMPCLUSTER*>; +using ClusterHeap = tesseract::GenericHeap<ClusterPair>; + +struct STATISTICS { + float AvgVariance; + float *CoVariance; + float *Min; // largest negative distance from the mean + float *Max; // largest positive distance from the mean +}; + +struct BUCKETS { + DISTRIBUTION Distribution; // distribution being tested for + uint32_t SampleCount; // # of samples in histogram + double Confidence; // confidence level of test + double ChiSquared; // test threshold + uint16_t NumberOfBuckets; // number of cells in histogram + uint16_t Bucket[BUCKETTABLESIZE]; // mapping to histogram buckets + uint32_t *Count; // frequency of occurrence histogram + float *ExpectedCount; // expected histogram +}; + +struct CHISTRUCT{ + uint16_t DegreesOfFreedom; + double Alpha; + double ChiSquared; +}; + +// For use with KDWalk / MakePotentialClusters +struct ClusteringContext { + ClusterHeap *heap; // heap used to hold temp clusters, "best" on top + TEMPCLUSTER *candidates; // array of potential clusters + KDTREE *tree; // kd-tree to be searched for neighbors + int32_t next; // next candidate to be used +}; + +using DENSITYFUNC = double (*)(int32_t); +using SOLVEFUNC = double (*)(CHISTRUCT*, double); + +#define Odd(N) ((N)%2) +#define Mirror(N,R) ((R) - (N) - 1) +#define Abs(N) (((N) < 0) ? (-(N)) : (N)) + +//--------------Global Data Definitions and Declarations---------------------- +/** the following variables describe a discrete normal distribution + which is used by NormalDensity() and NormalBucket(). The + constant NORMALEXTENT determines how many standard + deviations of the distribution are mapped onto the fixed + discrete range of x. x=0 is mapped to -NORMALEXTENT standard + deviations and x=BUCKETTABLESIZE is mapped to + +NORMALEXTENT standard deviations. */ +#define SqrtOf2Pi 2.506628275 +static const double kNormalStdDev = BUCKETTABLESIZE / (2.0 * NORMALEXTENT); +static const double kNormalVariance = + (BUCKETTABLESIZE * BUCKETTABLESIZE) / (4.0 * NORMALEXTENT * NORMALEXTENT); +static const double kNormalMagnitude = + (2.0 * NORMALEXTENT) / (SqrtOf2Pi * BUCKETTABLESIZE); +static const double kNormalMean = BUCKETTABLESIZE / 2; + +/** define lookup tables used to compute the number of histogram buckets + that should be used for a given number of samples. */ +#define LOOKUPTABLESIZE 8 +#define MAXDEGREESOFFREEDOM MAXBUCKETS + +static const uint32_t kCountTable[LOOKUPTABLESIZE] = { + MINSAMPLES, 200, 400, 600, 800, 1000, 1500, 2000 +}; // number of samples + +static const uint16_t kBucketsTable[LOOKUPTABLESIZE] = { + MINBUCKETS, 16, 20, 24, 27, 30, 35, MAXBUCKETS +}; // number of buckets + +/*------------------------------------------------------------------------- + Private Function Prototypes +--------------------------------------------------------------------------*/ +static void CreateClusterTree(CLUSTERER* Clusterer); + +static void MakePotentialClusters(ClusteringContext* context, CLUSTER* Cluster, + int32_t Level); + +static CLUSTER* FindNearestNeighbor(KDTREE*Tree, CLUSTER* Cluster, + float* Distance); + +static CLUSTER* MakeNewCluster(CLUSTERER* Clusterer, TEMPCLUSTER* TempCluster); + +static void ComputePrototypes(CLUSTERER* Clusterer, CLUSTERCONFIG* Config); + +static PROTOTYPE* MakePrototype(CLUSTERER* Clusterer, CLUSTERCONFIG* Config, + CLUSTER* Cluster); + +static PROTOTYPE* MakeDegenerateProto(uint16_t N, + CLUSTER* Cluster, STATISTICS* Statistics, + PROTOSTYLE Style, int32_t MinSamples); + +static PROTOTYPE* TestEllipticalProto(CLUSTERER* Clusterer, + CLUSTERCONFIG* Config, CLUSTER* Cluster, + STATISTICS* Statistics); + +static PROTOTYPE* MakeSphericalProto(CLUSTERER* Clusterer, + CLUSTER* Cluster, STATISTICS* Statistics, + BUCKETS* Buckets); + +static PROTOTYPE* MakeEllipticalProto(CLUSTERER* Clusterer, + CLUSTER* Cluster, STATISTICS* Statistics, + BUCKETS* Buckets); + +static PROTOTYPE* MakeMixedProto(CLUSTERER* Clusterer, + CLUSTER* Cluster, STATISTICS* Statistics, + BUCKETS* NormalBuckets, double Confidence); + +static void MakeDimRandom(uint16_t i, PROTOTYPE* Proto, PARAM_DESC* ParamDesc); + +static void MakeDimUniform(uint16_t i, PROTOTYPE* Proto, STATISTICS* Statistics); + +static STATISTICS* ComputeStatistics(int16_t N, PARAM_DESC ParamDesc[], + CLUSTER* Cluster); + +static PROTOTYPE* NewSphericalProto(uint16_t N, CLUSTER* Cluster, + STATISTICS* Statistics); + +static PROTOTYPE* NewEllipticalProto(int16_t N, CLUSTER* Cluster, + STATISTICS* Statistics); + +static PROTOTYPE* NewMixedProto(int16_t N, CLUSTER *Cluster, STATISTICS *Statistics); + +static PROTOTYPE* NewSimpleProto(int16_t N, CLUSTER *Cluster); + +static bool Independent(PARAM_DESC* ParamDesc, + int16_t N, float* CoVariance, float Independence); + +static BUCKETS *GetBuckets(CLUSTERER* clusterer, + DISTRIBUTION Distribution, + uint32_t SampleCount, + double Confidence); + +static BUCKETS *MakeBuckets(DISTRIBUTION Distribution, + uint32_t SampleCount, + double Confidence); + +static uint16_t OptimumNumberOfBuckets(uint32_t SampleCount); + +static double ComputeChiSquared(uint16_t DegreesOfFreedom, double Alpha); + +static double NormalDensity(int32_t x); + +static double UniformDensity(int32_t x); + +static double Integral(double f1, double f2, double Dx); + +static void FillBuckets(BUCKETS *Buckets, + CLUSTER *Cluster, + uint16_t Dim, + PARAM_DESC *ParamDesc, + float Mean, + float StdDev); + +static uint16_t NormalBucket(PARAM_DESC *ParamDesc, + float x, + float Mean, + float StdDev); + +static uint16_t UniformBucket(PARAM_DESC *ParamDesc, + float x, + float Mean, + float StdDev); + +static bool DistributionOK(BUCKETS* Buckets); + +static void FreeStatistics(STATISTICS *Statistics); + +static void FreeBuckets(BUCKETS *Buckets); + +static void FreeCluster(CLUSTER *Cluster); + +static uint16_t DegreesOfFreedom(DISTRIBUTION Distribution, uint16_t HistogramBuckets); + +static void AdjustBuckets(BUCKETS *Buckets, uint32_t NewSampleCount); + +static void InitBuckets(BUCKETS *Buckets); + +static int AlphaMatch(void *arg1, // CHISTRUCT *ChiStruct, + void *arg2); // CHISTRUCT *SearchKey); + +static CHISTRUCT *NewChiStruct(uint16_t DegreesOfFreedom, double Alpha); + +static double Solve(SOLVEFUNC Function, + void *FunctionParams, + double InitialGuess, + double Accuracy); + +static double ChiArea(CHISTRUCT *ChiParams, double x); + +static bool MultipleCharSamples(CLUSTERER* Clusterer, + CLUSTER* Cluster, + float MaxIllegal); + +static double InvertMatrix(const float* input, int size, float* inv); + +//--------------------------Public Code-------------------------------------- +/** + * This routine creates a new clusterer data structure, + * initializes it, and returns a pointer to it. + * + * @param SampleSize number of dimensions in feature space + * @param ParamDesc description of each dimension + * @return pointer to the new clusterer data structure + */ +CLUSTERER * +MakeClusterer (int16_t SampleSize, const PARAM_DESC ParamDesc[]) { + CLUSTERER *Clusterer; + int i; + + // allocate main clusterer data structure and init simple fields + Clusterer = static_cast<CLUSTERER *>(malloc (sizeof (CLUSTERER))); + Clusterer->SampleSize = SampleSize; + Clusterer->NumberOfSamples = 0; + Clusterer->NumChar = 0; + + // init fields which will not be used initially + Clusterer->Root = nullptr; + Clusterer->ProtoList = NIL_LIST; + + // maintain a copy of param descriptors in the clusterer data structure + Clusterer->ParamDesc = + static_cast<PARAM_DESC *>(malloc (SampleSize * sizeof (PARAM_DESC))); + for (i = 0; i < SampleSize; i++) { + Clusterer->ParamDesc[i].Circular = ParamDesc[i].Circular; + Clusterer->ParamDesc[i].NonEssential = ParamDesc[i].NonEssential; + Clusterer->ParamDesc[i].Min = ParamDesc[i].Min; + Clusterer->ParamDesc[i].Max = ParamDesc[i].Max; + Clusterer->ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min; + Clusterer->ParamDesc[i].HalfRange = Clusterer->ParamDesc[i].Range / 2; + Clusterer->ParamDesc[i].MidRange = + (ParamDesc[i].Max + ParamDesc[i].Min) / 2; + } + + // allocate a kd tree to hold the samples + Clusterer->KDTree = MakeKDTree (SampleSize, ParamDesc); + + // Initialize cache of histogram buckets to minimize recomputing them. + for (auto & d : Clusterer->bucket_cache) { + for (auto & c : d) + c = nullptr; + } + + return Clusterer; +} // MakeClusterer + +/** + * This routine creates a new sample data structure to hold + * the specified feature. This sample is added to the clusterer + * data structure (so that it knows which samples are to be + * clustered later), and a pointer to the sample is returned to + * the caller. + * + * @param Clusterer clusterer data structure to add sample to + * @param Feature feature to be added to clusterer + * @param CharID unique ident. of char that sample came from + * + * @return Pointer to the new sample data structure + */ +SAMPLE* MakeSample(CLUSTERER * Clusterer, const float* Feature, + int32_t CharID) { + SAMPLE *Sample; + int i; + + // see if the samples have already been clustered - if so trap an error + // Can't add samples after they have been clustered. + ASSERT_HOST(Clusterer->Root == nullptr); + + // allocate the new sample and initialize it + Sample = static_cast<SAMPLE *>(malloc (sizeof (SAMPLE) + + (Clusterer->SampleSize - + 1) * sizeof (float))); + Sample->Clustered = false; + Sample->Prototype = false; + Sample->SampleCount = 1; + Sample->Left = nullptr; + Sample->Right = nullptr; + Sample->CharID = CharID; + + for (i = 0; i < Clusterer->SampleSize; i++) + Sample->Mean[i] = Feature[i]; + + // add the sample to the KD tree - keep track of the total # of samples + Clusterer->NumberOfSamples++; + KDStore(Clusterer->KDTree, Sample->Mean, Sample); + if (CharID >= Clusterer->NumChar) + Clusterer->NumChar = CharID + 1; + + // execute hook for monitoring clustering operation + // (*SampleCreationHook)(Sample); + + return (Sample); +} // MakeSample + +/** + * This routine first checks to see if the samples in this + * clusterer have already been clustered before; if so, it does + * not bother to recreate the cluster tree. It simply recomputes + * the prototypes based on the new Config info. + * + * If the samples have not been clustered before, the + * samples in the KD tree are formed into a cluster tree and then + * the prototypes are computed from the cluster tree. + * + * In either case this routine returns a pointer to a + * list of prototypes that best represent the samples given + * the constraints specified in Config. + * + * @param Clusterer data struct containing samples to be clustered + * @param Config parameters which control clustering process + * + * @return Pointer to a list of prototypes + */ +LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { + //only create cluster tree if samples have never been clustered before + if (Clusterer->Root == nullptr) + CreateClusterTree(Clusterer); + + //deallocate the old prototype list if one exists + FreeProtoList (&Clusterer->ProtoList); + Clusterer->ProtoList = NIL_LIST; + + //compute prototypes starting at the root node in the tree + ComputePrototypes(Clusterer, Config); + // We don't need the cluster pointers in the protos any more, so null them + // out, which makes it safe to delete the clusterer. + LIST proto_list = Clusterer->ProtoList; + iterate(proto_list) { + auto *proto = reinterpret_cast<PROTOTYPE *>(first_node(proto_list)); + proto->Cluster = nullptr; + } + return Clusterer->ProtoList; +} // ClusterSamples + +/** + * This routine frees all of the memory allocated to the + * specified data structure. It will not, however, free + * the memory used by the prototype list. The pointers to + * the clusters for each prototype in the list will be set + * to nullptr to indicate that the cluster data structures no + * longer exist. Any sample lists that have been obtained + * via calls to GetSamples are no longer valid. + * @param Clusterer pointer to data structure to be freed + */ +void FreeClusterer(CLUSTERER *Clusterer) { + if (Clusterer != nullptr) { + free(Clusterer->ParamDesc); + if (Clusterer->KDTree != nullptr) + FreeKDTree (Clusterer->KDTree); + if (Clusterer->Root != nullptr) + FreeCluster (Clusterer->Root); + // Free up all used buckets structures. + for (auto & d : Clusterer->bucket_cache) { + for (auto & c : d) + if (c != nullptr) + FreeBuckets(c); + } + + free(Clusterer); + } +} // FreeClusterer + +/** + * This routine frees all of the memory allocated to the + * specified list of prototypes. The clusters which are + * pointed to by the prototypes are not freed. + * @param ProtoList pointer to list of prototypes to be freed + */ +void FreeProtoList(LIST *ProtoList) { + destroy_nodes(*ProtoList, FreePrototype); +} // FreeProtoList + +/** + * This routine deallocates the memory consumed by the specified + * prototype and modifies the corresponding cluster so that it + * is no longer marked as a prototype. The cluster is NOT + * deallocated by this routine. + * @param arg prototype data structure to be deallocated + */ +void FreePrototype(void *arg) { //PROTOTYPE *Prototype) + auto *Prototype = static_cast<PROTOTYPE *>(arg); + + // unmark the corresponding cluster (if there is one + if (Prototype->Cluster != nullptr) + Prototype->Cluster->Prototype = false; + + // deallocate the prototype statistics and then the prototype itself + free(Prototype->Distrib); + free(Prototype->Mean); + if (Prototype->Style != spherical) { + free(Prototype->Variance.Elliptical); + free(Prototype->Magnitude.Elliptical); + free(Prototype->Weight.Elliptical); + } + free(Prototype); +} // FreePrototype + +/** + * This routine is used to find all of the samples which + * belong to a cluster. It starts by removing the top + * cluster on the cluster list (SearchState). If this cluster is + * a leaf it is returned. Otherwise, the right subcluster + * is pushed on the list and we continue the search in the + * left subcluster. This continues until a leaf is found. + * If all samples have been found, nullptr is returned. + * InitSampleSearch() must be called + * before NextSample() to initialize the search. + * @param SearchState ptr to list containing clusters to be searched + * @return Pointer to the next leaf cluster (sample) or nullptr. + */ +CLUSTER *NextSample(LIST *SearchState) { + CLUSTER *Cluster; + + if (*SearchState == NIL_LIST) + return (nullptr); + Cluster = reinterpret_cast<CLUSTER *>first_node (*SearchState); + *SearchState = pop (*SearchState); + for (;;) { + if (Cluster->Left == nullptr) + return (Cluster); + *SearchState = push (*SearchState, Cluster->Right); + Cluster = Cluster->Left; + } +} // NextSample + +/** + * This routine returns the mean of the specified + * prototype in the indicated dimension. + * @param Proto prototype to return mean of + * @param Dimension dimension whose mean is to be returned + * @return Mean of Prototype in Dimension + */ +float Mean(PROTOTYPE *Proto, uint16_t Dimension) { + return (Proto->Mean[Dimension]); +} // Mean + +/** + * This routine returns the standard deviation of the + * prototype in the indicated dimension. + * @param Proto prototype to return standard deviation of + * @param Dimension dimension whose stddev is to be returned + * @return Standard deviation of Prototype in Dimension + */ +float StandardDeviation(PROTOTYPE *Proto, uint16_t Dimension) { + switch (Proto->Style) { + case spherical: + return (static_cast<float>(sqrt (static_cast<double>(Proto->Variance.Spherical)))); + case elliptical: + return (static_cast<float>(sqrt (static_cast<double>(Proto->Variance.Elliptical[Dimension])))); + case mixed: + switch (Proto->Distrib[Dimension]) { + case normal: + return (static_cast<float>(sqrt (static_cast<double>(Proto->Variance.Elliptical[Dimension])))); + case uniform: + case D_random: + return (Proto->Variance.Elliptical[Dimension]); + case DISTRIBUTION_COUNT: + ASSERT_HOST(!"Distribution count not allowed!"); + } + } + return 0.0f; +} // StandardDeviation + + +/*--------------------------------------------------------------------------- + Private Code +----------------------------------------------------------------------------*/ +/** + * This routine performs a bottoms-up clustering on the samples + * held in the kd-tree of the Clusterer data structure. The + * result is a cluster tree. Each node in the tree represents + * a cluster which conceptually contains a subset of the samples. + * More precisely, the cluster contains all of the samples which + * are contained in its two sub-clusters. The leaves of the + * tree are the individual samples themselves; they have no + * sub-clusters. The root node of the tree conceptually contains + * all of the samples. + * The Clusterer data structure is changed. + * @param Clusterer data structure holdings samples to be clustered + */ +static void CreateClusterTree(CLUSTERER *Clusterer) { + ClusteringContext context; + ClusterPair HeapEntry; + TEMPCLUSTER *PotentialCluster; + + // each sample and its nearest neighbor form a "potential" cluster + // save these in a heap with the "best" potential clusters on top + context.tree = Clusterer->KDTree; + context.candidates = static_cast<TEMPCLUSTER *>(malloc(Clusterer->NumberOfSamples * sizeof(TEMPCLUSTER))); + context.next = 0; + context.heap = new ClusterHeap(Clusterer->NumberOfSamples); + KDWalk(context.tree, reinterpret_cast<void_proc>(MakePotentialClusters), &context); + + // form potential clusters into actual clusters - always do "best" first + while (context.heap->Pop(&HeapEntry)) { + PotentialCluster = HeapEntry.data(); + + // if main cluster of potential cluster is already in another cluster + // then we don't need to worry about it + if (PotentialCluster->Cluster->Clustered) { + continue; + } + + // if main cluster is not yet clustered, but its nearest neighbor is + // then we must find a new nearest neighbor + else if (PotentialCluster->Neighbor->Clustered) { + PotentialCluster->Neighbor = + FindNearestNeighbor(context.tree, PotentialCluster->Cluster, + &HeapEntry.key()); + if (PotentialCluster->Neighbor != nullptr) { + context.heap->Push(&HeapEntry); + } + } + + // if neither cluster is already clustered, form permanent cluster + else { + PotentialCluster->Cluster = + MakeNewCluster(Clusterer, PotentialCluster); + PotentialCluster->Neighbor = + FindNearestNeighbor(context.tree, PotentialCluster->Cluster, + &HeapEntry.key()); + if (PotentialCluster->Neighbor != nullptr) { + context.heap->Push(&HeapEntry); + } + } + } + + // the root node in the cluster tree is now the only node in the kd-tree + Clusterer->Root = static_cast<CLUSTER *>RootOf(Clusterer->KDTree); + + // free up the memory used by the K-D tree, heap, and temp clusters + FreeKDTree(context.tree); + Clusterer->KDTree = nullptr; + delete context.heap; + free(context.candidates); +} // CreateClusterTree + +/** + * This routine is designed to be used in concert with the + * KDWalk routine. It will create a potential cluster for + * each sample in the kd-tree that is being walked. This + * potential cluster will then be pushed on the heap. + * @param context ClusteringContext (see definition above) + * @param Cluster current cluster being visited in kd-tree walk + * @param Level level of this cluster in the kd-tree + */ +static void MakePotentialClusters(ClusteringContext* context, + CLUSTER* Cluster, int32_t /*Level*/) { + ClusterPair HeapEntry; + int next = context->next; + context->candidates[next].Cluster = Cluster; + HeapEntry.data() = &(context->candidates[next]); + context->candidates[next].Neighbor = + FindNearestNeighbor(context->tree, + context->candidates[next].Cluster, + &HeapEntry.key()); + if (context->candidates[next].Neighbor != nullptr) { + context->heap->Push(&HeapEntry); + context->next++; + } +} // MakePotentialClusters + +/** + * This routine searches the specified kd-tree for the nearest + * neighbor of the specified cluster. It actually uses the + * kd routines to find the 2 nearest neighbors since one of them + * will be the original cluster. A pointer to the nearest + * neighbor is returned, if it can be found, otherwise nullptr is + * returned. The distance between the 2 nodes is placed + * in the specified variable. + * @param Tree kd-tree to search in for nearest neighbor + * @param Cluster cluster whose nearest neighbor is to be found + * @param Distance ptr to variable to report distance found + * @return Pointer to the nearest neighbor of Cluster, or nullptr + */ +static CLUSTER* +FindNearestNeighbor(KDTREE* Tree, CLUSTER* Cluster, float* Distance) +#define MAXNEIGHBORS 2 +#define MAXDISTANCE FLT_MAX +{ + CLUSTER *Neighbor[MAXNEIGHBORS]; + float Dist[MAXNEIGHBORS]; + int NumberOfNeighbors; + int32_t i; + CLUSTER *BestNeighbor; + + // find the 2 nearest neighbors of the cluster + KDNearestNeighborSearch(Tree, Cluster->Mean, MAXNEIGHBORS, MAXDISTANCE, + &NumberOfNeighbors, reinterpret_cast<void **>(Neighbor), Dist); + + // search for the nearest neighbor that is not the cluster itself + *Distance = MAXDISTANCE; + BestNeighbor = nullptr; + for (i = 0; i < NumberOfNeighbors; i++) { + if ((Dist[i] < *Distance) && (Neighbor[i] != Cluster)) { + *Distance = Dist[i]; + BestNeighbor = Neighbor[i]; + } + } + return BestNeighbor; +} // FindNearestNeighbor + +/** + * This routine creates a new permanent cluster from the + * clusters specified in TempCluster. The 2 clusters in + * TempCluster are marked as "clustered" and deleted from + * the kd-tree. The new cluster is then added to the kd-tree. + * @param Clusterer current clustering environment + * @param TempCluster potential cluster to make permanent + * @return Pointer to the new permanent cluster + */ +static CLUSTER* MakeNewCluster(CLUSTERER* Clusterer, + TEMPCLUSTER* TempCluster) { + CLUSTER *Cluster; + + // allocate the new cluster and initialize it + Cluster = static_cast<CLUSTER *>(malloc( + sizeof(CLUSTER) + (Clusterer->SampleSize - 1) * sizeof(float))); + Cluster->Clustered = false; + Cluster->Prototype = false; + Cluster->Left = TempCluster->Cluster; + Cluster->Right = TempCluster->Neighbor; + Cluster->CharID = -1; + + // mark the old clusters as "clustered" and delete them from the kd-tree + Cluster->Left->Clustered = true; + Cluster->Right->Clustered = true; + KDDelete(Clusterer->KDTree, Cluster->Left->Mean, Cluster->Left); + KDDelete(Clusterer->KDTree, Cluster->Right->Mean, Cluster->Right); + + // compute the mean and sample count for the new cluster + Cluster->SampleCount = + MergeClusters(Clusterer->SampleSize, Clusterer->ParamDesc, + Cluster->Left->SampleCount, Cluster->Right->SampleCount, + Cluster->Mean, Cluster->Left->Mean, Cluster->Right->Mean); + + // add the new cluster to the KD tree + KDStore(Clusterer->KDTree, Cluster->Mean, Cluster); + return Cluster; +} // MakeNewCluster + +/** + * This routine merges two clusters into one larger cluster. + * To do this it computes the number of samples in the new + * cluster and the mean of the new cluster. The ParamDesc + * information is used to ensure that circular dimensions + * are handled correctly. + * @param N # of dimensions (size of arrays) + * @param ParamDesc array of dimension descriptions + * @param n1, n2 number of samples in each old cluster + * @param m array to hold mean of new cluster + * @param m1, m2 arrays containing means of old clusters + * @return The number of samples in the new cluster. + */ +int32_t MergeClusters(int16_t N, + PARAM_DESC ParamDesc[], + int32_t n1, + int32_t n2, + float m[], + float m1[], float m2[]) { + int32_t i, n; + + n = n1 + n2; + for (i = N; i > 0; i--, ParamDesc++, m++, m1++, m2++) { + if (ParamDesc->Circular) { + // if distance between means is greater than allowed + // reduce upper point by one "rotation" to compute mean + // then normalize the mean back into the accepted range + if ((*m2 - *m1) > ParamDesc->HalfRange) { + *m = (n1 * *m1 + n2 * (*m2 - ParamDesc->Range)) / n; + if (*m < ParamDesc->Min) + *m += ParamDesc->Range; + } + else if ((*m1 - *m2) > ParamDesc->HalfRange) { + *m = (n1 * (*m1 - ParamDesc->Range) + n2 * *m2) / n; + if (*m < ParamDesc->Min) + *m += ParamDesc->Range; + } + else + *m = (n1 * *m1 + n2 * *m2) / n; + } + else + *m = (n1 * *m1 + n2 * *m2) / n; + } + return n; +} // MergeClusters + +/** + * This routine decides which clusters in the cluster tree + * should be represented by prototypes, forms a list of these + * prototypes, and places the list in the Clusterer data + * structure. + * @param Clusterer data structure holding cluster tree + * @param Config parameters used to control prototype generation + */ +static void ComputePrototypes(CLUSTERER* Clusterer, CLUSTERCONFIG* Config) { + LIST ClusterStack = NIL_LIST; + CLUSTER *Cluster; + PROTOTYPE *Prototype; + + // use a stack to keep track of clusters waiting to be processed + // initially the only cluster on the stack is the root cluster + if (Clusterer->Root != nullptr) + ClusterStack = push (NIL_LIST, Clusterer->Root); + + // loop until we have analyzed all clusters which are potential prototypes + while (ClusterStack != NIL_LIST) { + // remove the next cluster to be analyzed from the stack + // try to make a prototype from the cluster + // if successful, put it on the proto list, else split the cluster + Cluster = reinterpret_cast<CLUSTER *>first_node (ClusterStack); + ClusterStack = pop (ClusterStack); + Prototype = MakePrototype(Clusterer, Config, Cluster); + if (Prototype != nullptr) { + Clusterer->ProtoList = push (Clusterer->ProtoList, Prototype); + } + else { + ClusterStack = push (ClusterStack, Cluster->Right); + ClusterStack = push (ClusterStack, Cluster->Left); + } + } +} // ComputePrototypes + +/** + * This routine attempts to create a prototype from the + * specified cluster that conforms to the distribution + * specified in Config. If there are too few samples in the + * cluster to perform a statistical analysis, then a prototype + * is generated but labelled as insignificant. If the + * dimensions of the cluster are not independent, no prototype + * is generated and nullptr is returned. If a prototype can be + * found that matches the desired distribution then a pointer + * to it is returned, otherwise nullptr is returned. + * @param Clusterer data structure holding cluster tree + * @param Config parameters used to control prototype generation + * @param Cluster cluster to be made into a prototype + * @return Pointer to new prototype or nullptr + */ +static PROTOTYPE* MakePrototype(CLUSTERER* Clusterer, CLUSTERCONFIG* Config, + CLUSTER* Cluster) { + STATISTICS *Statistics; + PROTOTYPE *Proto; + BUCKETS *Buckets; + + // filter out clusters which contain samples from the same character + if (MultipleCharSamples (Clusterer, Cluster, Config->MaxIllegal)) + return nullptr; + + // compute the covariance matrix and ranges for the cluster + Statistics = + ComputeStatistics(Clusterer->SampleSize, Clusterer->ParamDesc, Cluster); + + // check for degenerate clusters which need not be analyzed further + // note that the MinSamples test assumes that all clusters with multiple + // character samples have been removed (as above) + Proto = MakeDegenerateProto( + Clusterer->SampleSize, Cluster, Statistics, Config->ProtoStyle, + static_cast<int32_t>(Config->MinSamples * Clusterer->NumChar)); + if (Proto != nullptr) { + FreeStatistics(Statistics); + return Proto; + } + // check to ensure that all dimensions are independent + if (!Independent(Clusterer->ParamDesc, Clusterer->SampleSize, + Statistics->CoVariance, Config->Independence)) { + FreeStatistics(Statistics); + return nullptr; + } + + if (HOTELLING && Config->ProtoStyle == elliptical) { + Proto = TestEllipticalProto(Clusterer, Config, Cluster, Statistics); + if (Proto != nullptr) { + FreeStatistics(Statistics); + return Proto; + } + } + + // create a histogram data structure used to evaluate distributions + Buckets = GetBuckets(Clusterer, normal, Cluster->SampleCount, + Config->Confidence); + + // create a prototype based on the statistics and test it + switch (Config->ProtoStyle) { + case spherical: + Proto = MakeSphericalProto(Clusterer, Cluster, Statistics, Buckets); + break; + case elliptical: + Proto = MakeEllipticalProto(Clusterer, Cluster, Statistics, Buckets); + break; + case mixed: + Proto = MakeMixedProto(Clusterer, Cluster, Statistics, Buckets, + Config->Confidence); + break; + case automatic: + Proto = MakeSphericalProto(Clusterer, Cluster, Statistics, Buckets); + if (Proto != nullptr) + break; + Proto = MakeEllipticalProto(Clusterer, Cluster, Statistics, Buckets); + if (Proto != nullptr) + break; + Proto = MakeMixedProto(Clusterer, Cluster, Statistics, Buckets, + Config->Confidence); + break; + } + FreeStatistics(Statistics); + return Proto; +} // MakePrototype + +/** + * This routine checks for clusters which are degenerate and + * therefore cannot be analyzed in a statistically valid way. + * A cluster is defined as degenerate if it does not have at + * least MINSAMPLESNEEDED samples in it. If the cluster is + * found to be degenerate, a prototype of the specified style + * is generated and marked as insignificant. A cluster is + * also degenerate if it does not have at least MinSamples + * samples in it. + * + * If the cluster is not degenerate, nullptr is returned. + * + * @param N number of dimensions + * @param Cluster cluster being analyzed + * @param Statistics statistical info about cluster + * @param Style type of prototype to be generated + * @param MinSamples minimum number of samples in a cluster + * @return Pointer to degenerate prototype or nullptr. + */ +static PROTOTYPE* MakeDegenerateProto( //this was MinSample + uint16_t N, + CLUSTER *Cluster, + STATISTICS *Statistics, + PROTOSTYLE Style, + int32_t MinSamples) { + PROTOTYPE *Proto = nullptr; + + if (MinSamples < MINSAMPLESNEEDED) + MinSamples = MINSAMPLESNEEDED; + + if (Cluster->SampleCount < MinSamples) { + switch (Style) { + case spherical: + Proto = NewSphericalProto (N, Cluster, Statistics); + break; + case elliptical: + case automatic: + Proto = NewEllipticalProto (N, Cluster, Statistics); + break; + case mixed: + Proto = NewMixedProto (N, Cluster, Statistics); + break; + } + Proto->Significant = false; + } + return (Proto); +} // MakeDegenerateProto + +/** + * This routine tests the specified cluster to see if ** + * there is a statistically significant difference between + * the sub-clusters that would be made if the cluster were to + * be split. If not, then a new prototype is formed and + * returned to the caller. If there is, then nullptr is returned + * to the caller. + * @param Clusterer data struct containing samples being clustered + * @param Config provides the magic number of samples that make a good cluster + * @param Cluster cluster to be made into an elliptical prototype + * @param Statistics statistical info about cluster + * @return Pointer to new elliptical prototype or nullptr. + */ +static PROTOTYPE* TestEllipticalProto(CLUSTERER* Clusterer, + CLUSTERCONFIG *Config, CLUSTER* Cluster, + STATISTICS* Statistics) { + // Fraction of the number of samples used as a range around 1 within + // which a cluster has the magic size that allows a boost to the + // FTable by kFTableBoostMargin, thus allowing clusters near the + // magic size (equal to the number of sample characters) to be more + // likely to stay together. + const double kMagicSampleMargin = 0.0625; + const double kFTableBoostMargin = 2.0; + + int N = Clusterer->SampleSize; + CLUSTER* Left = Cluster->Left; + CLUSTER* Right = Cluster->Right; + if (Left == nullptr || Right == nullptr) + return nullptr; + int TotalDims = Left->SampleCount + Right->SampleCount; + if (TotalDims < N + 1 || TotalDims < 2) + return nullptr; + std::vector<float> Covariance(static_cast<size_t>(N) * N); + std::vector<float> Inverse(static_cast<size_t>(N) * N); + std::vector<float> Delta(N); + // Compute a new covariance matrix that only uses essential features. + for (int i = 0; i < N; ++i) { + int row_offset = i * N; + if (!Clusterer->ParamDesc[i].NonEssential) { + for (int j = 0; j < N; ++j) { + if (!Clusterer->ParamDesc[j].NonEssential) + Covariance[j + row_offset] = Statistics->CoVariance[j + row_offset]; + else + Covariance[j + row_offset] = 0.0f; + } + } else { + for (int j = 0; j < N; ++j) { + if (i == j) + Covariance[j + row_offset] = 1.0f; + else + Covariance[j + row_offset] = 0.0f; + } + } + } + double err = InvertMatrix(&Covariance[0], N, &Inverse[0]); + if (err > 1) { + tprintf("Clustering error: Matrix inverse failed with error %g\n", err); + } + int EssentialN = 0; + for (int dim = 0; dim < N; ++dim) { + if (!Clusterer->ParamDesc[dim].NonEssential) { + Delta[dim] = Left->Mean[dim] - Right->Mean[dim]; + ++EssentialN; + } else { + Delta[dim] = 0.0f; + } + } + // Compute Hotelling's T-squared. + double Tsq = 0.0; + for (int x = 0; x < N; ++x) { + double temp = 0.0; + for (int y = 0; y < N; ++y) { + temp += static_cast<double>(Inverse[y + N * x]) * Delta[y]; + } + Tsq += Delta[x] * temp; + } + // Changed this function to match the formula in + // Statistical Methods in Medical Research p 473 + // By Peter Armitage, Geoffrey Berry, J. N. S. Matthews. + // Tsq *= Left->SampleCount * Right->SampleCount / TotalDims; + double F = Tsq * (TotalDims - EssentialN - 1) / ((TotalDims - 2)*EssentialN); + int Fx = EssentialN; + if (Fx > FTABLE_X) + Fx = FTABLE_X; + --Fx; + int Fy = TotalDims - EssentialN - 1; + if (Fy > FTABLE_Y) + Fy = FTABLE_Y; + --Fy; + double FTarget = FTable[Fy][Fx]; + if (Config->MagicSamples > 0 && + TotalDims >= Config->MagicSamples * (1.0 - kMagicSampleMargin) && + TotalDims <= Config->MagicSamples * (1.0 + kMagicSampleMargin)) { + // Give magic-sized clusters a magic FTable boost. + FTarget += kFTableBoostMargin; + } + if (F < FTarget) { + return NewEllipticalProto (Clusterer->SampleSize, Cluster, Statistics); + } + return nullptr; +} + +/** + * This routine tests the specified cluster to see if it can + * be approximated by a spherical normal distribution. If it + * can be, then a new prototype is formed and returned to the + * caller. If it can't be, then nullptr is returned to the caller. + * @param Clusterer data struct containing samples being clustered + * @param Cluster cluster to be made into a spherical prototype + * @param Statistics statistical info about cluster + * @param Buckets histogram struct used to analyze distribution + * @return Pointer to new spherical prototype or nullptr. + */ +static PROTOTYPE* MakeSphericalProto(CLUSTERER* Clusterer, + CLUSTER* Cluster, STATISTICS* Statistics, + BUCKETS* Buckets) { + PROTOTYPE *Proto = nullptr; + int i; + + // check that each dimension is a normal distribution + for (i = 0; i < Clusterer->SampleSize; i++) { + if (Clusterer->ParamDesc[i].NonEssential) + continue; + + FillBuckets (Buckets, Cluster, i, &(Clusterer->ParamDesc[i]), + Cluster->Mean[i], + sqrt (static_cast<double>(Statistics->AvgVariance))); + if (!DistributionOK (Buckets)) + break; + } + // if all dimensions matched a normal distribution, make a proto + if (i >= Clusterer->SampleSize) + Proto = NewSphericalProto (Clusterer->SampleSize, Cluster, Statistics); + return (Proto); +} // MakeSphericalProto + +/** + * This routine tests the specified cluster to see if it can + * be approximated by an elliptical normal distribution. If it + * can be, then a new prototype is formed and returned to the + * caller. If it can't be, then nullptr is returned to the caller. + * @param Clusterer data struct containing samples being clustered + * @param Cluster cluster to be made into an elliptical prototype + * @param Statistics statistical info about cluster + * @param Buckets histogram struct used to analyze distribution + * @return Pointer to new elliptical prototype or nullptr. + */ +static PROTOTYPE* MakeEllipticalProto(CLUSTERER* Clusterer, + CLUSTER* Cluster, STATISTICS* Statistics, + BUCKETS* Buckets) { + PROTOTYPE *Proto = nullptr; + int i; + + // check that each dimension is a normal distribution + for (i = 0; i < Clusterer->SampleSize; i++) { + if (Clusterer->ParamDesc[i].NonEssential) + continue; + + FillBuckets (Buckets, Cluster, i, &(Clusterer->ParamDesc[i]), + Cluster->Mean[i], + sqrt (static_cast<double>(Statistics-> + CoVariance[i * (Clusterer->SampleSize + 1)]))); + if (!DistributionOK (Buckets)) + break; + } + // if all dimensions matched a normal distribution, make a proto + if (i >= Clusterer->SampleSize) + Proto = NewEllipticalProto (Clusterer->SampleSize, Cluster, Statistics); + return (Proto); +} // MakeEllipticalProto + +/** + * This routine tests each dimension of the specified cluster to + * see what distribution would best approximate that dimension. + * Each dimension is compared to the following distributions + * in order: normal, random, uniform. If each dimension can + * be represented by one of these distributions, + * then a new prototype is formed and returned to the + * caller. If it can't be, then nullptr is returned to the caller. + * @param Clusterer data struct containing samples being clustered + * @param Cluster cluster to be made into a prototype + * @param Statistics statistical info about cluster + * @param NormalBuckets histogram struct used to analyze distribution + * @param Confidence confidence level for alternate distributions + * @return Pointer to new mixed prototype or nullptr. + */ +static PROTOTYPE* MakeMixedProto(CLUSTERER* Clusterer, + CLUSTER* Cluster, STATISTICS* Statistics, + BUCKETS* NormalBuckets, double Confidence) { + PROTOTYPE *Proto; + int i; + BUCKETS *UniformBuckets = nullptr; + BUCKETS *RandomBuckets = nullptr; + + // create a mixed proto to work on - initially assume all dimensions normal*/ + Proto = NewMixedProto (Clusterer->SampleSize, Cluster, Statistics); + + // find the proper distribution for each dimension + for (i = 0; i < Clusterer->SampleSize; i++) { + if (Clusterer->ParamDesc[i].NonEssential) + continue; + + FillBuckets (NormalBuckets, Cluster, i, &(Clusterer->ParamDesc[i]), + Proto->Mean[i], + sqrt (static_cast<double>(Proto->Variance.Elliptical[i]))); + if (DistributionOK (NormalBuckets)) + continue; + + if (RandomBuckets == nullptr) + RandomBuckets = + GetBuckets(Clusterer, D_random, Cluster->SampleCount, Confidence); + MakeDimRandom (i, Proto, &(Clusterer->ParamDesc[i])); + FillBuckets (RandomBuckets, Cluster, i, &(Clusterer->ParamDesc[i]), + Proto->Mean[i], Proto->Variance.Elliptical[i]); + if (DistributionOK (RandomBuckets)) + continue; + + if (UniformBuckets == nullptr) + UniformBuckets = + GetBuckets(Clusterer, uniform, Cluster->SampleCount, Confidence); + MakeDimUniform(i, Proto, Statistics); + FillBuckets (UniformBuckets, Cluster, i, &(Clusterer->ParamDesc[i]), + Proto->Mean[i], Proto->Variance.Elliptical[i]); + if (DistributionOK (UniformBuckets)) + continue; + break; + } + // if any dimension failed to match a distribution, discard the proto + if (i < Clusterer->SampleSize) { + FreePrototype(Proto); + Proto = nullptr; + } + return (Proto); +} // MakeMixedProto + +/** + * This routine alters the ith dimension of the specified + * mixed prototype to be D_random. + * @param i index of dimension to be changed + * @param Proto prototype whose dimension is to be altered + * @param ParamDesc description of specified dimension + */ +static void MakeDimRandom(uint16_t i, PROTOTYPE* Proto, PARAM_DESC* ParamDesc) { + Proto->Distrib[i] = D_random; + Proto->Mean[i] = ParamDesc->MidRange; + Proto->Variance.Elliptical[i] = ParamDesc->HalfRange; + + // subtract out the previous magnitude of this dimension from the total + Proto->TotalMagnitude /= Proto->Magnitude.Elliptical[i]; + Proto->Magnitude.Elliptical[i] = 1.0 / ParamDesc->Range; + Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i]; + Proto->LogMagnitude = log (static_cast<double>(Proto->TotalMagnitude)); + + // note that the proto Weight is irrelevant for D_random protos +} // MakeDimRandom + +/** + * This routine alters the ith dimension of the specified + * mixed prototype to be uniform. + * @param i index of dimension to be changed + * @param Proto prototype whose dimension is to be altered + * @param Statistics statistical info about prototype + */ +static void MakeDimUniform(uint16_t i, PROTOTYPE* Proto, STATISTICS* Statistics) { + Proto->Distrib[i] = uniform; + Proto->Mean[i] = Proto->Cluster->Mean[i] + + (Statistics->Min[i] + Statistics->Max[i]) / 2; + Proto->Variance.Elliptical[i] = + (Statistics->Max[i] - Statistics->Min[i]) / 2; + if (Proto->Variance.Elliptical[i] < MINVARIANCE) + Proto->Variance.Elliptical[i] = MINVARIANCE; + + // subtract out the previous magnitude of this dimension from the total + Proto->TotalMagnitude /= Proto->Magnitude.Elliptical[i]; + Proto->Magnitude.Elliptical[i] = + 1.0 / (2.0 * Proto->Variance.Elliptical[i]); + Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i]; + Proto->LogMagnitude = log (static_cast<double>(Proto->TotalMagnitude)); + + // note that the proto Weight is irrelevant for uniform protos +} // MakeDimUniform + +/** + * This routine searches the cluster tree for all leaf nodes + * which are samples in the specified cluster. It computes + * a full covariance matrix for these samples as well as + * keeping track of the ranges (min and max) for each + * dimension. A special data structure is allocated to + * return this information to the caller. An incremental + * algorithm for computing statistics is not used because + * it will not work with circular dimensions. + * @param N number of dimensions + * @param ParamDesc array of dimension descriptions + * @param Cluster cluster whose stats are to be computed + * @return Pointer to new data structure containing statistics + */ +static STATISTICS* +ComputeStatistics (int16_t N, PARAM_DESC ParamDesc[], CLUSTER * Cluster) { + STATISTICS *Statistics; + int i, j; + float *CoVariance; + float *Distance; + LIST SearchState; + SAMPLE *Sample; + uint32_t SampleCountAdjustedForBias; + + // allocate memory to hold the statistics results + Statistics = static_cast<STATISTICS *>(malloc (sizeof (STATISTICS))); + Statistics->CoVariance = static_cast<float *>(malloc(sizeof(float) * N * N)); + Statistics->Min = static_cast<float *>(malloc (N * sizeof (float))); + Statistics->Max = static_cast<float *>(malloc (N * sizeof (float))); + + // allocate temporary memory to hold the sample to mean distances + Distance = static_cast<float *>(malloc (N * sizeof (float))); + + // initialize the statistics + Statistics->AvgVariance = 1.0; + CoVariance = Statistics->CoVariance; + for (i = 0; i < N; i++) { + Statistics->Min[i] = 0.0; + Statistics->Max[i] = 0.0; + for (j = 0; j < N; j++, CoVariance++) + *CoVariance = 0; + } + // find each sample in the cluster and merge it into the statistics + InitSampleSearch(SearchState, Cluster); + while ((Sample = NextSample (&SearchState)) != nullptr) { + for (i = 0; i < N; i++) { + Distance[i] = Sample->Mean[i] - Cluster->Mean[i]; + if (ParamDesc[i].Circular) { + if (Distance[i] > ParamDesc[i].HalfRange) + Distance[i] -= ParamDesc[i].Range; + if (Distance[i] < -ParamDesc[i].HalfRange) + Distance[i] += ParamDesc[i].Range; + } + if (Distance[i] < Statistics->Min[i]) + Statistics->Min[i] = Distance[i]; + if (Distance[i] > Statistics->Max[i]) + Statistics->Max[i] = Distance[i]; + } + CoVariance = Statistics->CoVariance; + for (i = 0; i < N; i++) + for (j = 0; j < N; j++, CoVariance++) + *CoVariance += Distance[i] * Distance[j]; + } + // normalize the variances by the total number of samples + // use SampleCount-1 instead of SampleCount to get an unbiased estimate + // also compute the geometic mean of the diagonal variances + // ensure that clusters with only 1 sample are handled correctly + if (Cluster->SampleCount > 1) + SampleCountAdjustedForBias = Cluster->SampleCount - 1; + else + SampleCountAdjustedForBias = 1; + CoVariance = Statistics->CoVariance; + for (i = 0; i < N; i++) + for (j = 0; j < N; j++, CoVariance++) { + *CoVariance /= SampleCountAdjustedForBias; + if (j == i) { + if (*CoVariance < MINVARIANCE) + *CoVariance = MINVARIANCE; + Statistics->AvgVariance *= *CoVariance; + } + } + Statistics->AvgVariance = static_cast<float>(pow(static_cast<double>(Statistics->AvgVariance), + 1.0 / N)); + + // release temporary memory and return + free(Distance); + return (Statistics); +} // ComputeStatistics + +/** + * This routine creates a spherical prototype data structure to + * approximate the samples in the specified cluster. + * Spherical prototypes have a single variance which is + * common across all dimensions. All dimensions are normally + * distributed and independent. + * @param N number of dimensions + * @param Cluster cluster to be made into a spherical prototype + * @param Statistics statistical info about samples in cluster + * @return Pointer to a new spherical prototype data structure + */ +static PROTOTYPE* NewSphericalProto(uint16_t N, CLUSTER* Cluster, + STATISTICS* Statistics) { + PROTOTYPE *Proto; + + Proto = NewSimpleProto (N, Cluster); + + Proto->Variance.Spherical = Statistics->AvgVariance; + if (Proto->Variance.Spherical < MINVARIANCE) + Proto->Variance.Spherical = MINVARIANCE; + + Proto->Magnitude.Spherical = + 1.0 / sqrt(2.0 * M_PI * Proto->Variance.Spherical); + Proto->TotalMagnitude = static_cast<float>(pow(static_cast<double>(Proto->Magnitude.Spherical), + static_cast<double>(N))); + Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical; + Proto->LogMagnitude = log (static_cast<double>(Proto->TotalMagnitude)); + + return (Proto); +} // NewSphericalProto + +/** + * This routine creates an elliptical prototype data structure to + * approximate the samples in the specified cluster. + * Elliptical prototypes have a variance for each dimension. + * All dimensions are normally distributed and independent. + * @param N number of dimensions + * @param Cluster cluster to be made into an elliptical prototype + * @param Statistics statistical info about samples in cluster + * @return Pointer to a new elliptical prototype data structure + */ +static PROTOTYPE* NewEllipticalProto(int16_t N, CLUSTER* Cluster, + STATISTICS* Statistics) { + PROTOTYPE *Proto; + float *CoVariance; + int i; + + Proto = NewSimpleProto (N, Cluster); + Proto->Variance.Elliptical = static_cast<float *>(malloc (N * sizeof (float))); + Proto->Magnitude.Elliptical = static_cast<float *>(malloc (N * sizeof (float))); + Proto->Weight.Elliptical = static_cast<float *>(malloc (N * sizeof (float))); + + CoVariance = Statistics->CoVariance; + Proto->TotalMagnitude = 1.0; + for (i = 0; i < N; i++, CoVariance += N + 1) { + Proto->Variance.Elliptical[i] = *CoVariance; + if (Proto->Variance.Elliptical[i] < MINVARIANCE) + Proto->Variance.Elliptical[i] = MINVARIANCE; + + Proto->Magnitude.Elliptical[i] = + 1.0 / sqrt(2.0 * M_PI * Proto->Variance.Elliptical[i]); + Proto->Weight.Elliptical[i] = 1.0 / Proto->Variance.Elliptical[i]; + Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i]; + } + Proto->LogMagnitude = log (static_cast<double>(Proto->TotalMagnitude)); + Proto->Style = elliptical; + return (Proto); +} // NewEllipticalProto + +/** + * This routine creates a mixed prototype data structure to + * approximate the samples in the specified cluster. + * Mixed prototypes can have different distributions for + * each dimension. All dimensions are independent. The + * structure is initially filled in as though it were an + * elliptical prototype. The actual distributions of the + * dimensions can be altered by other routines. + * @param N number of dimensions + * @param Cluster cluster to be made into a mixed prototype + * @param Statistics statistical info about samples in cluster + * @return Pointer to a new mixed prototype data structure + */ +static PROTOTYPE* NewMixedProto(int16_t N, CLUSTER* Cluster, + STATISTICS* Statistics) { + PROTOTYPE *Proto; + int i; + + Proto = NewEllipticalProto (N, Cluster, Statistics); + Proto->Distrib = static_cast<DISTRIBUTION *>(malloc (N * sizeof (DISTRIBUTION))); + + for (i = 0; i < N; i++) { + Proto->Distrib[i] = normal; + } + Proto->Style = mixed; + return (Proto); +} // NewMixedProto + +/** + * This routine allocates memory to hold a simple prototype + * data structure, i.e. one without independent distributions + * and variances for each dimension. + * @param N number of dimensions + * @param Cluster cluster to be made into a prototype + * @return Pointer to new simple prototype + */ +static PROTOTYPE *NewSimpleProto(int16_t N, CLUSTER *Cluster) { + PROTOTYPE *Proto; + int i; + + Proto = static_cast<PROTOTYPE *>(malloc (sizeof (PROTOTYPE))); + Proto->Mean = static_cast<float *>(malloc (N * sizeof (float))); + + for (i = 0; i < N; i++) + Proto->Mean[i] = Cluster->Mean[i]; + Proto->Distrib = nullptr; + + Proto->Significant = true; + Proto->Merged = false; + Proto->Style = spherical; + Proto->NumSamples = Cluster->SampleCount; + Proto->Cluster = Cluster; + Proto->Cluster->Prototype = true; + return (Proto); +} // NewSimpleProto + +/** + * This routine returns true if the specified covariance + * matrix indicates that all N dimensions are independent of + * one another. One dimension is judged to be independent of + * another when the magnitude of the corresponding correlation + * coefficient is + * less than the specified Independence factor. The + * correlation coefficient is calculated as: (see Duda and + * Hart, pg. 247) + * coeff[ij] = stddev[ij] / sqrt (stddev[ii] * stddev[jj]) + * The covariance matrix is assumed to be symmetric (which + * should always be true). + * @param ParamDesc descriptions of each feature space dimension + * @param N number of dimensions + * @param CoVariance ptr to a covariance matrix + * @param Independence max off-diagonal correlation coefficient + * @return true if dimensions are independent, false otherwise + */ +static bool +Independent(PARAM_DESC* ParamDesc, + int16_t N, float* CoVariance, float Independence) { + int i, j; + float *VARii; // points to ith on-diagonal element + float *VARjj; // points to jth on-diagonal element + float CorrelationCoeff; + + VARii = CoVariance; + for (i = 0; i < N; i++, VARii += N + 1) { + if (ParamDesc[i].NonEssential) + continue; + + VARjj = VARii + N + 1; + CoVariance = VARii + 1; + for (j = i + 1; j < N; j++, CoVariance++, VARjj += N + 1) { + if (ParamDesc[j].NonEssential) + continue; + + if ((*VARii == 0.0) || (*VARjj == 0.0)) + CorrelationCoeff = 0.0; + else + CorrelationCoeff = + sqrt (sqrt (*CoVariance * *CoVariance / (*VARii * *VARjj))); + if (CorrelationCoeff > Independence) + return false; + } + } + return true; +} // Independent + +/** + * This routine returns a histogram data structure which can + * be used by other routines to place samples into histogram + * buckets, and then apply a goodness of fit test to the + * histogram data to determine if the samples belong to the + * specified probability distribution. The routine keeps + * a list of bucket data structures which have already been + * created so that it minimizes the computation time needed + * to create a new bucket. + * @param clusterer which keeps a bucket_cache for us. + * @param Distribution type of probability distribution to test for + * @param SampleCount number of samples that are available + * @param Confidence probability of a Type I error + * @return Bucket data structure + */ +static BUCKETS *GetBuckets(CLUSTERER* clusterer, + DISTRIBUTION Distribution, + uint32_t SampleCount, + double Confidence) { + // Get an old bucket structure with the same number of buckets. + uint16_t NumberOfBuckets = OptimumNumberOfBuckets(SampleCount); + BUCKETS *Buckets = + clusterer->bucket_cache[Distribution][NumberOfBuckets - MINBUCKETS]; + + // If a matching bucket structure is not found, make one and save it. + if (Buckets == nullptr) { + Buckets = MakeBuckets(Distribution, SampleCount, Confidence); + clusterer->bucket_cache[Distribution][NumberOfBuckets - MINBUCKETS] = + Buckets; + } else { + // Just adjust the existing buckets. + if (SampleCount != Buckets->SampleCount) + AdjustBuckets(Buckets, SampleCount); + if (Confidence != Buckets->Confidence) { + Buckets->Confidence = Confidence; + Buckets->ChiSquared = ComputeChiSquared( + DegreesOfFreedom(Distribution, Buckets->NumberOfBuckets), + Confidence); + } + InitBuckets(Buckets); + } + return Buckets; +} // GetBuckets + +/** + * This routine creates a histogram data structure which can + * be used by other routines to place samples into histogram + * buckets, and then apply a goodness of fit test to the + * histogram data to determine if the samples belong to the + * specified probability distribution. The buckets are + * allocated in such a way that the expected frequency of + * samples in each bucket is approximately the same. In + * order to make this possible, a mapping table is + * computed which maps "normalized" samples into the + * appropriate bucket. + * @param Distribution type of probability distribution to test for + * @param SampleCount number of samples that are available + * @param Confidence probability of a Type I error + * @return Pointer to new histogram data structure + */ +static BUCKETS *MakeBuckets(DISTRIBUTION Distribution, + uint32_t SampleCount, + double Confidence) { + const DENSITYFUNC DensityFunction[] = + { NormalDensity, UniformDensity, UniformDensity }; + int i, j; + BUCKETS *Buckets; + double BucketProbability; + double NextBucketBoundary; + double Probability; + double ProbabilityDelta; + double LastProbDensity; + double ProbDensity; + uint16_t CurrentBucket; + bool Symmetrical; + + // allocate memory needed for data structure + Buckets = static_cast<BUCKETS *>(malloc(sizeof(BUCKETS))); + Buckets->NumberOfBuckets = OptimumNumberOfBuckets(SampleCount); + Buckets->SampleCount = SampleCount; + Buckets->Confidence = Confidence; + Buckets->Count = + static_cast<uint32_t *>(malloc(Buckets->NumberOfBuckets * sizeof(uint32_t))); + Buckets->ExpectedCount = static_cast<float *>( + malloc(Buckets->NumberOfBuckets * sizeof(float))); + + // initialize simple fields + Buckets->Distribution = Distribution; + for (i = 0; i < Buckets->NumberOfBuckets; i++) { + Buckets->Count[i] = 0; + Buckets->ExpectedCount[i] = 0.0; + } + + // all currently defined distributions are symmetrical + Symmetrical = true; + Buckets->ChiSquared = ComputeChiSquared( + DegreesOfFreedom(Distribution, Buckets->NumberOfBuckets), Confidence); + + if (Symmetrical) { + // allocate buckets so that all have approx. equal probability + BucketProbability = 1.0 / static_cast<double>(Buckets->NumberOfBuckets); + + // distribution is symmetric so fill in upper half then copy + CurrentBucket = Buckets->NumberOfBuckets / 2; + if (Odd (Buckets->NumberOfBuckets)) + NextBucketBoundary = BucketProbability / 2; + else + NextBucketBoundary = BucketProbability; + + Probability = 0.0; + LastProbDensity = + (*DensityFunction[static_cast<int>(Distribution)]) (BUCKETTABLESIZE / 2); + for (i = BUCKETTABLESIZE / 2; i < BUCKETTABLESIZE; i++) { + ProbDensity = (*DensityFunction[static_cast<int>(Distribution)]) (i + 1); + ProbabilityDelta = Integral (LastProbDensity, ProbDensity, 1.0); + Probability += ProbabilityDelta; + if (Probability > NextBucketBoundary) { + if (CurrentBucket < Buckets->NumberOfBuckets - 1) + CurrentBucket++; + NextBucketBoundary += BucketProbability; + } + Buckets->Bucket[i] = CurrentBucket; + Buckets->ExpectedCount[CurrentBucket] += + static_cast<float>(ProbabilityDelta * SampleCount); + LastProbDensity = ProbDensity; + } + // place any leftover probability into the last bucket + Buckets->ExpectedCount[CurrentBucket] += + static_cast<float>((0.5 - Probability) * SampleCount); + + // copy upper half of distribution to lower half + for (i = 0, j = BUCKETTABLESIZE - 1; i < j; i++, j--) + Buckets->Bucket[i] = + Mirror(Buckets->Bucket[j], Buckets->NumberOfBuckets); + + // copy upper half of expected counts to lower half + for (i = 0, j = Buckets->NumberOfBuckets - 1; i <= j; i++, j--) + Buckets->ExpectedCount[i] += Buckets->ExpectedCount[j]; + } + return Buckets; +} // MakeBuckets + +/** + * This routine computes the optimum number of histogram + * buckets that should be used in a chi-squared goodness of + * fit test for the specified number of samples. The optimum + * number is computed based on Table 4.1 on pg. 147 of + * "Measurement and Analysis of Random Data" by Bendat & Piersol. + * Linear interpolation is used to interpolate between table + * values. The table is intended for a 0.05 level of + * significance (alpha). This routine assumes that it is + * equally valid for other alpha's, which may not be true. + * @param SampleCount number of samples to be tested + * @return Optimum number of histogram buckets + */ +static uint16_t OptimumNumberOfBuckets(uint32_t SampleCount) { + uint8_t Last, Next; + float Slope; + + if (SampleCount < kCountTable[0]) + return kBucketsTable[0]; + + for (Last = 0, Next = 1; Next < LOOKUPTABLESIZE; Last++, Next++) { + if (SampleCount <= kCountTable[Next]) { + Slope = static_cast<float>(kBucketsTable[Next] - kBucketsTable[Last]) / + static_cast<float>(kCountTable[Next] - kCountTable[Last]); + return (static_cast<uint16_t>(kBucketsTable[Last] + + Slope * (SampleCount - kCountTable[Last]))); + } + } + return kBucketsTable[Last]; +} // OptimumNumberOfBuckets + +/** + * This routine computes the chi-squared value which will + * leave a cumulative probability of Alpha in the right tail + * of a chi-squared distribution with the specified number of + * degrees of freedom. Alpha must be between 0 and 1. + * DegreesOfFreedom must be even. The routine maintains an + * array of lists. Each list corresponds to a different + * number of degrees of freedom. Each entry in the list + * corresponds to a different alpha value and its corresponding + * chi-squared value. Therefore, once a particular chi-squared + * value is computed, it is stored in the list and never + * needs to be computed again. + * @param DegreesOfFreedom determines shape of distribution + * @param Alpha probability of right tail + * @return Desired chi-squared value + */ +static double +ComputeChiSquared (uint16_t DegreesOfFreedom, double Alpha) +#define CHIACCURACY 0.01 +#define MINALPHA (1e-200) +{ + static LIST ChiWith[MAXDEGREESOFFREEDOM + 1]; + + CHISTRUCT *OldChiSquared; + CHISTRUCT SearchKey; + + // limit the minimum alpha that can be used - if alpha is too small + // it may not be possible to compute chi-squared. + Alpha = ClipToRange(Alpha, MINALPHA, 1.0); + if (Odd (DegreesOfFreedom)) + DegreesOfFreedom++; + + /* find the list of chi-squared values which have already been computed + for the specified number of degrees of freedom. Search the list for + the desired chi-squared. */ + SearchKey.Alpha = Alpha; + OldChiSquared = reinterpret_cast<CHISTRUCT *>first_node (search (ChiWith[DegreesOfFreedom], + &SearchKey, AlphaMatch)); + + if (OldChiSquared == nullptr) { + OldChiSquared = NewChiStruct (DegreesOfFreedom, Alpha); + OldChiSquared->ChiSquared = Solve (ChiArea, OldChiSquared, + static_cast<double>(DegreesOfFreedom), + CHIACCURACY); + ChiWith[DegreesOfFreedom] = push (ChiWith[DegreesOfFreedom], + OldChiSquared); + } + else { + // further optimization might move OldChiSquared to front of list + } + + return (OldChiSquared->ChiSquared); + +} // ComputeChiSquared + +/** + * This routine computes the probability density function + * of a discrete normal distribution defined by the global + * variables kNormalMean, kNormalVariance, and kNormalMagnitude. + * Normal magnitude could, of course, be computed in terms of + * the normal variance but it is precomputed for efficiency. + * @param x number to compute the normal probability density for + * @note Globals: + * kNormalMean mean of a discrete normal distribution + * kNormalVariance variance of a discrete normal distribution + * kNormalMagnitude magnitude of a discrete normal distribution + * @return The value of the normal distribution at x. + */ +static double NormalDensity(int32_t x) { + double Distance; + + Distance = x - kNormalMean; + return kNormalMagnitude * exp(-0.5 * Distance * Distance / kNormalVariance); +} // NormalDensity + +/** + * This routine computes the probability density function + * of a uniform distribution at the specified point. The + * range of the distribution is from 0 to BUCKETTABLESIZE. + * @param x number to compute the uniform probability density for + * @return The value of the uniform distribution at x. + */ +static double UniformDensity(int32_t x) { + constexpr auto UniformDistributionDensity = 1.0 / BUCKETTABLESIZE; + + if ((x >= 0) && (x <= BUCKETTABLESIZE)) { + return UniformDistributionDensity; + } else { + return 0.0; + } +} // UniformDensity + +/** + * This routine computes a trapezoidal approximation to the + * integral of a function over a small delta in x. + * @param f1 value of function at x1 + * @param f2 value of function at x2 + * @param Dx x2 - x1 (should always be positive) + * @return Approximation of the integral of the function from x1 to x2. + */ +static double Integral(double f1, double f2, double Dx) { + return (f1 + f2) * Dx / 2.0; +} // Integral + +/** + * This routine counts the number of cluster samples which + * fall within the various histogram buckets in Buckets. Only + * one dimension of each sample is examined. The exact meaning + * of the Mean and StdDev parameters depends on the + * distribution which is being analyzed (this info is in the + * Buckets data structure). For normal distributions, Mean + * and StdDev have the expected meanings. For uniform and + * random distributions the Mean is the center point of the + * range and the StdDev is 1/2 the range. A dimension with + * zero standard deviation cannot be statistically analyzed. + * In this case, a pseudo-analysis is used. + * The Buckets data structure is filled in. + * @param Buckets histogram buckets to count samples + * @param Cluster cluster whose samples are being analyzed + * @param Dim dimension of samples which is being analyzed + * @param ParamDesc description of the dimension + * @param Mean "mean" of the distribution + * @param StdDev "standard deviation" of the distribution + */ +static void FillBuckets(BUCKETS *Buckets, + CLUSTER *Cluster, + uint16_t Dim, + PARAM_DESC *ParamDesc, + float Mean, + float StdDev) { + uint16_t BucketID; + int i; + LIST SearchState; + SAMPLE *Sample; + + // initialize the histogram bucket counts to 0 + for (i = 0; i < Buckets->NumberOfBuckets; i++) + Buckets->Count[i] = 0; + + if (StdDev == 0.0) { + /* if the standard deviation is zero, then we can't statistically + analyze the cluster. Use a pseudo-analysis: samples exactly on + the mean are distributed evenly across all buckets. Samples greater + than the mean are placed in the last bucket; samples less than the + mean are placed in the first bucket. */ + + InitSampleSearch(SearchState, Cluster); + i = 0; + while ((Sample = NextSample (&SearchState)) != nullptr) { + if (Sample->Mean[Dim] > Mean) + BucketID = Buckets->NumberOfBuckets - 1; + else if (Sample->Mean[Dim] < Mean) + BucketID = 0; + else + BucketID = i; + Buckets->Count[BucketID] += 1; + i++; + if (i >= Buckets->NumberOfBuckets) + i = 0; + } + } + else { + // search for all samples in the cluster and add to histogram buckets + InitSampleSearch(SearchState, Cluster); + while ((Sample = NextSample (&SearchState)) != nullptr) { + switch (Buckets->Distribution) { + case normal: + BucketID = NormalBucket (ParamDesc, Sample->Mean[Dim], + Mean, StdDev); + break; + case D_random: + case uniform: + BucketID = UniformBucket (ParamDesc, Sample->Mean[Dim], + Mean, StdDev); + break; + default: + BucketID = 0; + } + Buckets->Count[Buckets->Bucket[BucketID]] += 1; + } + } +} // FillBuckets + +/** + * This routine determines which bucket x falls into in the + * discrete normal distribution defined by kNormalMean + * and kNormalStdDev. x values which exceed the range of + * the discrete distribution are clipped. + * @param ParamDesc used to identify circular dimensions + * @param x value to be normalized + * @param Mean mean of normal distribution + * @param StdDev standard deviation of normal distribution + * @return Bucket number into which x falls + */ +static uint16_t NormalBucket(PARAM_DESC *ParamDesc, + float x, + float Mean, + float StdDev) { + float X; + + // wraparound circular parameters if necessary + if (ParamDesc->Circular) { + if (x - Mean > ParamDesc->HalfRange) + x -= ParamDesc->Range; + else if (x - Mean < -ParamDesc->HalfRange) + x += ParamDesc->Range; + } + + X = ((x - Mean) / StdDev) * kNormalStdDev + kNormalMean; + if (X < 0) + return 0; + if (X > BUCKETTABLESIZE - 1) + return (static_cast<uint16_t>(BUCKETTABLESIZE - 1)); + return static_cast<uint16_t>(floor(static_cast<double>(X))); +} // NormalBucket + +/** + * This routine determines which bucket x falls into in the + * discrete uniform distribution defined by + * BUCKETTABLESIZE. x values which exceed the range of + * the discrete distribution are clipped. + * @param ParamDesc used to identify circular dimensions + * @param x value to be normalized + * @param Mean center of range of uniform distribution + * @param StdDev 1/2 the range of the uniform distribution + * @return Bucket number into which x falls + */ +static uint16_t UniformBucket(PARAM_DESC *ParamDesc, + float x, + float Mean, + float StdDev) { + float X; + + // wraparound circular parameters if necessary + if (ParamDesc->Circular) { + if (x - Mean > ParamDesc->HalfRange) + x -= ParamDesc->Range; + else if (x - Mean < -ParamDesc->HalfRange) + x += ParamDesc->Range; + } + + X = ((x - Mean) / (2 * StdDev) * BUCKETTABLESIZE + BUCKETTABLESIZE / 2.0); + if (X < 0) + return 0; + if (X > BUCKETTABLESIZE - 1) + return static_cast<uint16_t>(BUCKETTABLESIZE - 1); + return static_cast<uint16_t>(floor(static_cast<double>(X))); +} // UniformBucket + +/** + * This routine performs a chi-square goodness of fit test + * on the histogram data in the Buckets data structure. + * true is returned if the histogram matches the probability + * distribution which was specified when the Buckets + * structure was originally created. Otherwise false is + * returned. + * @param Buckets histogram data to perform chi-square test on + * @return true if samples match distribution, false otherwise + */ +static bool DistributionOK(BUCKETS* Buckets) { + float FrequencyDifference; + float TotalDifference; + int i; + + // compute how well the histogram matches the expected histogram + TotalDifference = 0.0; + for (i = 0; i < Buckets->NumberOfBuckets; i++) { + FrequencyDifference = Buckets->Count[i] - Buckets->ExpectedCount[i]; + TotalDifference += (FrequencyDifference * FrequencyDifference) / + Buckets->ExpectedCount[i]; + } + + // test to see if the difference is more than expected + if (TotalDifference > Buckets->ChiSquared) + return false; + else + return true; +} // DistributionOK + +/** + * This routine frees the memory used by the statistics + * data structure. + * @param Statistics pointer to data structure to be freed + */ +static void FreeStatistics(STATISTICS *Statistics) { + free(Statistics->CoVariance); + free(Statistics->Min); + free(Statistics->Max); + free(Statistics); +} // FreeStatistics + +/** + * This routine properly frees the memory used by a BUCKETS. + * + * @param buckets pointer to data structure to be freed + */ +static void FreeBuckets(BUCKETS *buckets) { + free(buckets->Count); + free(buckets->ExpectedCount); + free(buckets); +} // FreeBuckets + +/** + * This routine frees the memory consumed by the specified + * cluster and all of its subclusters. This is done by + * recursive calls to FreeCluster(). + * + * @param Cluster pointer to cluster to be freed + */ +static void FreeCluster(CLUSTER *Cluster) { + if (Cluster != nullptr) { + FreeCluster (Cluster->Left); + FreeCluster (Cluster->Right); + free(Cluster); + } +} // FreeCluster + +/** + * This routine computes the degrees of freedom that should + * be used in a chi-squared test with the specified number of + * histogram buckets. The result is always rounded up to + * the next even number so that the value of chi-squared can be + * computed more easily. This will cause the value of + * chi-squared to be higher than the optimum value, resulting + * in the chi-square test being more lenient than optimum. + * @param Distribution distribution being tested for + * @param HistogramBuckets number of buckets in chi-square test + * @return The number of degrees of freedom for a chi-square test + */ +static uint16_t DegreesOfFreedom(DISTRIBUTION Distribution, uint16_t HistogramBuckets) { + static uint8_t DegreeOffsets[] = { 3, 3, 1 }; + + uint16_t AdjustedNumBuckets; + + AdjustedNumBuckets = HistogramBuckets - DegreeOffsets[static_cast<int>(Distribution)]; + if (Odd (AdjustedNumBuckets)) + AdjustedNumBuckets++; + return (AdjustedNumBuckets); + +} // DegreesOfFreedom + +/** + * This routine multiplies each ExpectedCount histogram entry + * by NewSampleCount/OldSampleCount so that the histogram + * is now adjusted to the new sample count. + * @param Buckets histogram data structure to adjust + * @param NewSampleCount new sample count to adjust to + */ +static void AdjustBuckets(BUCKETS *Buckets, uint32_t NewSampleCount) { + int i; + double AdjustFactor; + + AdjustFactor = ((static_cast<double>(NewSampleCount)) / + (static_cast<double>(Buckets->SampleCount))); + + for (i = 0; i < Buckets->NumberOfBuckets; i++) { + Buckets->ExpectedCount[i] *= AdjustFactor; + } + + Buckets->SampleCount = NewSampleCount; + +} // AdjustBuckets + +/** + * This routine sets the bucket counts in the specified histogram + * to zero. + * @param Buckets histogram data structure to init + */ +static void InitBuckets(BUCKETS *Buckets) { + int i; + + for (i = 0; i < Buckets->NumberOfBuckets; i++) { + Buckets->Count[i] = 0; + } + +} // InitBuckets + +/** + * This routine is used to search a list of structures which + * hold pre-computed chi-squared values for a chi-squared + * value whose corresponding alpha field matches the alpha + * field of SearchKey. + * + * It is called by the list search routines. + * + * @param arg1 chi-squared struct being tested for a match + * @param arg2 chi-squared struct that is the search key + * @return true if ChiStruct's Alpha matches SearchKey's Alpha + */ +static int AlphaMatch(void *arg1, //CHISTRUCT *ChiStruct, + void *arg2) { //CHISTRUCT *SearchKey) + auto *ChiStruct = static_cast<CHISTRUCT *>(arg1); + auto *SearchKey = static_cast<CHISTRUCT *>(arg2); + + return (ChiStruct->Alpha == SearchKey->Alpha); + +} // AlphaMatch + +/** + * This routine allocates a new data structure which is used + * to hold a chi-squared value along with its associated + * number of degrees of freedom and alpha value. + * + * @param DegreesOfFreedom degrees of freedom for new chi value + * @param Alpha confidence level for new chi value + * @return newly allocated data structure + */ +static CHISTRUCT *NewChiStruct(uint16_t DegreesOfFreedom, double Alpha) { + CHISTRUCT *NewChiStruct; + + NewChiStruct = static_cast<CHISTRUCT *>(malloc (sizeof (CHISTRUCT))); + NewChiStruct->DegreesOfFreedom = DegreesOfFreedom; + NewChiStruct->Alpha = Alpha; + return (NewChiStruct); + +} // NewChiStruct + +/** + * This routine attempts to find an x value at which Function + * goes to zero (i.e. a root of the function). It will only + * work correctly if a solution actually exists and there + * are no extrema between the solution and the InitialGuess. + * The algorithms used are extremely primitive. + * + * @param Function function whose zero is to be found + * @param FunctionParams arbitrary data to pass to function + * @param InitialGuess point to start solution search at + * @param Accuracy maximum allowed error + * @return Solution of function (x for which f(x) = 0). + */ +static double +Solve (SOLVEFUNC Function, +void *FunctionParams, double InitialGuess, double Accuracy) +#define INITIALDELTA 0.1 +#define DELTARATIO 0.1 +{ + double x; + double f; + double Slope; + double Delta; + double NewDelta; + double xDelta; + double LastPosX, LastNegX; + + x = InitialGuess; + Delta = INITIALDELTA; + LastPosX = FLT_MAX; + LastNegX = -FLT_MAX; + f = (*Function) (static_cast<CHISTRUCT *>(FunctionParams), x); + while (Abs (LastPosX - LastNegX) > Accuracy) { + // keep track of outer bounds of current estimate + if (f < 0) + LastNegX = x; + else + LastPosX = x; + + // compute the approx. slope of f(x) at the current point + Slope = + ((*Function) (static_cast<CHISTRUCT *>(FunctionParams), x + Delta) - f) / Delta; + + // compute the next solution guess */ + xDelta = f / Slope; + x -= xDelta; + + // reduce the delta used for computing slope to be a fraction of + //the amount moved to get to the new guess + NewDelta = Abs (xDelta) * DELTARATIO; + if (NewDelta < Delta) + Delta = NewDelta; + + // compute the value of the function at the new guess + f = (*Function) (static_cast<CHISTRUCT *>(FunctionParams), x); + } + return (x); + +} // Solve + +/** + * This routine computes the area under a chi density curve + * from 0 to x, minus the desired area under the curve. The + * number of degrees of freedom of the chi curve is specified + * in the ChiParams structure. The desired area is also + * specified in the ChiParams structure as Alpha (or 1 minus + * the desired area). This routine is intended to be passed + * to the Solve() function to find the value of chi-squared + * which will yield a desired area under the right tail of + * the chi density curve. The function will only work for + * even degrees of freedom. The equations are based on + * integrating the chi density curve in parts to obtain + * a series that can be used to compute the area under the + * curve. + * @param ChiParams contains degrees of freedom and alpha + * @param x value of chi-squared to evaluate + * @return Error between actual and desired area under the chi curve. + */ +static double ChiArea(CHISTRUCT *ChiParams, double x) { + int i, N; + double SeriesTotal; + double Denominator; + double PowerOfx; + + N = ChiParams->DegreesOfFreedom / 2 - 1; + SeriesTotal = 1; + Denominator = 1; + PowerOfx = 1; + for (i = 1; i <= N; i++) { + Denominator *= 2 * i; + PowerOfx *= x; + SeriesTotal += PowerOfx / Denominator; + } + return ((SeriesTotal * exp (-0.5 * x)) - ChiParams->Alpha); + +} // ChiArea + +/** + * This routine looks at all samples in the specified cluster. + * It computes a running estimate of the percentage of the + * characters which have more than 1 sample in the cluster. + * When this percentage exceeds MaxIllegal, true is returned. + * Otherwise false is returned. The CharID + * fields must contain integers which identify the training + * characters which were used to generate the sample. One + * integer is used for each sample. The NumChar field in + * the Clusterer must contain the number of characters in the + * training set. All CharID fields must be between 0 and + * NumChar-1. The main function of this routine is to help + * identify clusters which need to be split further, i.e. if + * numerous training characters have 2 or more features which are + * contained in the same cluster, then the cluster should be + * split. + * + * @param Clusterer data structure holding cluster tree + * @param Cluster cluster containing samples to be tested + * @param MaxIllegal max percentage of samples allowed to have + * more than 1 feature in the cluster + * @return true if the cluster should be split, false otherwise. + */ +static bool +MultipleCharSamples(CLUSTERER* Clusterer, + CLUSTER* Cluster, float MaxIllegal) +#define ILLEGAL_CHAR 2 +{ + static std::vector<uint8_t> CharFlags; + LIST SearchState; + SAMPLE *Sample; + int32_t CharID; + int32_t NumCharInCluster; + int32_t NumIllegalInCluster; + float PercentIllegal; + + // initial estimate assumes that no illegal chars exist in the cluster + NumCharInCluster = Cluster->SampleCount; + NumIllegalInCluster = 0; + + if (Clusterer->NumChar > CharFlags.size()) { + CharFlags.resize(Clusterer->NumChar); + } + + for (auto& CharFlag : CharFlags) + CharFlag = false; + + // find each sample in the cluster and check if we have seen it before + InitSampleSearch(SearchState, Cluster); + while ((Sample = NextSample (&SearchState)) != nullptr) { + CharID = Sample->CharID; + if (CharFlags[CharID] == false) { + CharFlags[CharID] = true; + } + else { + if (CharFlags[CharID] == true) { + NumIllegalInCluster++; + CharFlags[CharID] = ILLEGAL_CHAR; + } + NumCharInCluster--; + PercentIllegal = static_cast<float>(NumIllegalInCluster) / NumCharInCluster; + if (PercentIllegal > MaxIllegal) { + destroy(SearchState); + return true; + } + } + } + return false; + +} // MultipleCharSamples + +/** + * Compute the inverse of a matrix using LU decomposition with partial pivoting. + * The return value is the sum of norms of the off-diagonal terms of the + * product of a and inv. (A measure of the error.) + */ +static double InvertMatrix(const float* input, int size, float* inv) { + // Allocate memory for the 2D arrays. + GENERIC_2D_ARRAY<double> U(size, size, 0.0); + GENERIC_2D_ARRAY<double> U_inv(size, size, 0.0); + GENERIC_2D_ARRAY<double> L(size, size, 0.0); + + // Initialize the working matrices. U starts as input, L as I and U_inv as O. + int row; + int col; + for (row = 0; row < size; row++) { + for (col = 0; col < size; col++) { + U[row][col] = input[row*size + col]; + L[row][col] = row == col ? 1.0 : 0.0; + U_inv[row][col] = 0.0; + } + } + + // Compute forward matrix by inversion by LU decomposition of input. + for (col = 0; col < size; ++col) { + // Find best pivot + int best_row = 0; + double best_pivot = -1.0; + for (row = col; row < size; ++row) { + if (Abs(U[row][col]) > best_pivot) { + best_pivot = Abs(U[row][col]); + best_row = row; + } + } + // Exchange pivot rows. + if (best_row != col) { + for (int k = 0; k < size; ++k) { + double tmp = U[best_row][k]; + U[best_row][k] = U[col][k]; + U[col][k] = tmp; + tmp = L[best_row][k]; + L[best_row][k] = L[col][k]; + L[col][k] = tmp; + } + } + // Now do the pivot itself. + for (row = col + 1; row < size; ++row) { + double ratio = -U[row][col] / U[col][col]; + for (int j = col; j < size; ++j) { + U[row][j] += U[col][j] * ratio; + } + for (int k = 0; k < size; ++k) { + L[row][k] += L[col][k] * ratio; + } + } + } + // Next invert U. + for (col = 0; col < size; ++col) { + U_inv[col][col] = 1.0 / U[col][col]; + for (row = col - 1; row >= 0; --row) { + double total = 0.0; + for (int k = col; k > row; --k) { + total += U[row][k] * U_inv[k][col]; + } + U_inv[row][col] = -total / U[row][row]; + } + } + // Now the answer is U_inv.L. + for (row = 0; row < size; row++) { + for (col = 0; col < size; col++) { + double sum = 0.0; + for (int k = row; k < size; ++k) { + sum += U_inv[row][k] * L[k][col]; + } + inv[row*size + col] = sum; + } + } + // Check matrix product. + double error_sum = 0.0; + for (row = 0; row < size; row++) { + for (col = 0; col < size; col++) { + double sum = 0.0; + for (int k = 0; k < size; ++k) { + sum += static_cast<double>(input[row * size + k]) * inv[k * size + col]; + } + if (row != col) { + error_sum += Abs(sum); + } + } + } + return error_sum; +} + +} // namespace tesseract diff --git a/tesseract/src/classify/cluster.h b/tesseract/src/classify/cluster.h new file mode 100644 index 00000000..8a6a270a --- /dev/null +++ b/tesseract/src/classify/cluster.h @@ -0,0 +1,138 @@ +/****************************************************************************** + ** Filename: cluster.h + ** Purpose: Definition of feature space clustering routines + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + *****************************************************************************/ + +#ifndef CLUSTER_H +#define CLUSTER_H + +#include "kdtree.h" +#include "oldlist.h" + +namespace tesseract { + +struct BUCKETS; + +#define MINBUCKETS 5 +#define MAXBUCKETS 39 + +/*---------------------------------------------------------------------- + Types +----------------------------------------------------------------------*/ +typedef struct sample { + bool Clustered : 1; // true if included in a higher cluster + bool Prototype : 1; // true if cluster represented by a proto + unsigned SampleCount : 30; // number of samples in this cluster + struct sample* Left; // ptr to left sub-cluster + struct sample* Right; // ptr to right sub-cluster + int32_t CharID; // identifier of char sample came from + float Mean[1]; // mean of cluster - SampleSize floats +} CLUSTER; + +using SAMPLE = CLUSTER; // can refer to as either sample or cluster + +typedef enum { spherical, elliptical, mixed, automatic } PROTOSTYLE; + +typedef struct { // parameters to control clustering + PROTOSTYLE ProtoStyle; // specifies types of protos to be made + float MinSamples; // min # of samples per proto - % of total + float MaxIllegal; // max percentage of samples in a cluster which + // have more than 1 feature in that cluster + float Independence; // desired independence between dimensions + double Confidence; // desired confidence in prototypes created + int MagicSamples; // Ideal number of samples in a cluster. +} CLUSTERCONFIG; + +typedef enum { normal, uniform, D_random, DISTRIBUTION_COUNT } DISTRIBUTION; + +typedef union { + float Spherical; + float* Elliptical; +} FLOATUNION; + +typedef struct { + bool Significant : 1; // true if prototype is significant + bool Merged : 1; // Merged after clustering so do not output + // but kept for display purposes. If it has no + // samples then it was actually merged. + // Otherwise it matched an already significant + // cluster. + unsigned Style : 2; // spherical, elliptical, or mixed + unsigned NumSamples : 28; // number of samples in the cluster + CLUSTER* Cluster; // ptr to cluster which made prototype + DISTRIBUTION* Distrib; // different distribution for each dimension + float* Mean; // prototype mean + float TotalMagnitude; // total magnitude over all dimensions + float LogMagnitude; // log base e of TotalMagnitude + FLOATUNION Variance; // prototype variance + FLOATUNION Magnitude; // magnitude of density function + FLOATUNION Weight; // weight of density function +} PROTOTYPE; + +typedef struct { + int16_t SampleSize; // number of parameters per sample + PARAM_DESC* ParamDesc; // description of each parameter + int32_t NumberOfSamples; // total number of samples being clustered + KDTREE* KDTree; // for optimal nearest neighbor searching + CLUSTER* Root; // ptr to root cluster of cluster tree + LIST ProtoList; // list of prototypes + int32_t NumChar; // # of characters represented by samples + // cache of reusable histograms by distribution type and number of buckets. + BUCKETS* bucket_cache[DISTRIBUTION_COUNT][MAXBUCKETS + 1 - MINBUCKETS]; +} CLUSTERER; + +typedef struct { + int32_t NumSamples; // number of samples in list + int32_t MaxNumSamples; // maximum size of list + SAMPLE* Sample[1]; // array of ptrs to sample data structures +} SAMPLELIST; + +// low level cluster tree analysis routines. +#define InitSampleSearch(S, C) \ + (((C) == nullptr) ? (S = NIL_LIST) : (S = push(NIL_LIST, (C)))) + +/*-------------------------------------------------------------------------- + Public Function Prototypes +--------------------------------------------------------------------------*/ +TESS_API +CLUSTERER* MakeClusterer(int16_t SampleSize, const PARAM_DESC ParamDesc[]); + +TESS_API +SAMPLE* MakeSample(CLUSTERER* Clusterer, const float* Feature, int32_t CharID); + +TESS_API +LIST ClusterSamples(CLUSTERER* Clusterer, CLUSTERCONFIG* Config); + +TESS_API +void FreeClusterer(CLUSTERER* Clusterer); + +TESS_API +void FreeProtoList(LIST* ProtoList); + +void FreePrototype(void* arg); // PROTOTYPE *Prototype); + +CLUSTER* NextSample(LIST* SearchState); + +float Mean(PROTOTYPE* Proto, uint16_t Dimension); + +float StandardDeviation(PROTOTYPE* Proto, uint16_t Dimension); + +TESS_API +int32_t MergeClusters(int16_t N, PARAM_DESC ParamDesc[], int32_t n1, int32_t n2, + float m[], float m1[], float m2[]); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/classify/clusttool.cpp b/tesseract/src/classify/clusttool.cpp new file mode 100644 index 00000000..4227a1f0 --- /dev/null +++ b/tesseract/src/classify/clusttool.cpp @@ -0,0 +1,319 @@ +/****************************************************************************** + ** Filename: clusttool.cpp + ** Purpose: Misc. tools for use with the clustering routines + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + *****************************************************************************/ + +#define _USE_MATH_DEFINES // for M_PI + +#include "clusttool.h" + +#include <cmath> // for M_PI, std::isnan +#include <locale> // for std::locale::classic +#include <sstream> // for std::stringstream + +namespace tesseract { + +//---------------Global Data Definitions and Declarations-------------------- +#define TOKENSIZE 80 ///< max size of tokens read from an input file +#define QUOTED_TOKENSIZE "79" +#define MAXSAMPLESIZE 65535 ///< max num of dimensions in feature space + +/** + * This routine reads N floats from the specified text file + * and places them into Buffer. If Buffer is nullptr, a buffer + * is created and passed back to the caller. If EOF is + * encountered before any floats can be read, nullptr is + * returned. + * @param fp open text file to read floats from + * @param N number of floats to read + * @param Buffer pointer to buffer to place floats into + * @return Pointer to buffer holding floats or nullptr if EOF + * @note Globals: None + */ +static float *ReadNFloats(TFile *fp, uint16_t N, float Buffer[]) { + const int kMaxLineSize = 1024; + char line[kMaxLineSize]; + if (fp->FGets(line, kMaxLineSize) == nullptr) { + tprintf("Hit EOF in ReadNFloats!\n"); + return nullptr; + } + bool needs_free = false; + + if (Buffer == nullptr) { + Buffer = static_cast<float *>(malloc(N * sizeof(float))); + needs_free = true; + } + + std::stringstream stream(line); + // Use "C" locale (needed for float values Buffer[i]). + stream.imbue(std::locale::classic()); + for (uint16_t i = 0; i < N; i++) { + float f = NAN; + stream >> f; + if (std::isnan(f)) { + tprintf("Read of %u floats failed!\n", N); + if (needs_free) free(Buffer); + return nullptr; + } + Buffer[i] = f; + } + return Buffer; +} + +/** + * This routine writes a text representation of N floats from + * an array to a file. All of the floats are placed on one line. + * @param File open text file to write N floats to + * @param N number of floats to write + * @param Array array of floats to write + */ +static void WriteNFloats(FILE * File, uint16_t N, float Array[]) { + for (int i = 0; i < N; i++) + fprintf(File, " %9.6f", Array[i]); + fprintf(File, "\n"); +} + +/** + * This routine writes to the specified text file a word + * which represents the ProtoStyle. It does not append + * a carriage return to the end. + * @param File open text file to write prototype style to + * @param ProtoStyle prototype style to write + */ +static void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) { + switch (ProtoStyle) { + case spherical: + fprintf (File, "spherical"); + break; + case elliptical: + fprintf (File, "elliptical"); + break; + case mixed: + fprintf (File, "mixed"); + break; + case automatic: + fprintf (File, "automatic"); + break; + } +} + +/** + * This routine reads a single integer from the specified + * file and checks to ensure that it is between 0 and + * MAXSAMPLESIZE. + * @param fp open text file to read sample size from + * @return Sample size + * @note Globals: None + */ +uint16_t ReadSampleSize(TFile *fp) { + int SampleSize = 0; + + const int kMaxLineSize = 100; + char line[kMaxLineSize]; + ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr); + ASSERT_HOST(sscanf(line, "%d", &SampleSize) == 1); + ASSERT_HOST(SampleSize >= 0 && SampleSize <= MAXSAMPLESIZE); + return SampleSize; +} + +/** + * This routine reads textual descriptions of sets of parameters + * which describe the characteristics of feature dimensions. + * + * @param fp open text file to read N parameter descriptions from + * @param N number of parameter descriptions to read + * @return Pointer to an array of parameter descriptors. + * @note Globals: None + */ +PARAM_DESC *ReadParamDesc(TFile *fp, uint16_t N) { + PARAM_DESC *ParamDesc; + + ParamDesc = static_cast<PARAM_DESC *>(malloc (N * sizeof (PARAM_DESC))); + for (int i = 0; i < N; i++) { + const int kMaxLineSize = TOKENSIZE * 4; + char line[kMaxLineSize]; + ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr); + std::istringstream stream(line); + // Use "C" locale (needed for float values Min, Max). + stream.imbue(std::locale::classic()); + std::string linear_token; + stream >> linear_token; + std::string essential_token; + stream >> essential_token; + stream >> ParamDesc[i].Min; + stream >> ParamDesc[i].Max; + ASSERT_HOST(!stream.fail()); + ParamDesc[i].Circular = (linear_token[0] == 'c'); + ParamDesc[i].NonEssential = (essential_token[0] != 'e'); + ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min; + ParamDesc[i].HalfRange = ParamDesc[i].Range / 2; + ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2; + } + return (ParamDesc); +} + +/** + * This routine reads a textual description of a prototype from + * the specified file. + * + * @param fp open text file to read prototype from + * @param N number of dimensions used in prototype + * @return List of prototypes + * @note Globals: None + */ +PROTOTYPE *ReadPrototype(TFile *fp, uint16_t N) { + char sig_token[TOKENSIZE], shape_token[TOKENSIZE]; + PROTOTYPE *Proto; + int SampleCount; + int i; + + const int kMaxLineSize = TOKENSIZE * 4; + char line[kMaxLineSize]; + if (fp->FGets(line, kMaxLineSize) == nullptr || + sscanf(line, "%" QUOTED_TOKENSIZE "s %" QUOTED_TOKENSIZE "s %d", + sig_token, shape_token, &SampleCount) != 3) { + tprintf("Invalid prototype: %s\n", line); + return nullptr; + } + Proto = static_cast<PROTOTYPE *>(malloc(sizeof(PROTOTYPE))); + Proto->Cluster = nullptr; + Proto->Significant = (sig_token[0] == 's'); + + switch (shape_token[0]) { + case 's': + Proto->Style = spherical; + break; + case 'e': + Proto->Style = elliptical; + break; + case 'a': + Proto->Style = automatic; + break; + default: + tprintf("Invalid prototype style specification:%s\n", shape_token); + Proto->Style = elliptical; + } + + ASSERT_HOST(SampleCount >= 0); + Proto->NumSamples = SampleCount; + + Proto->Mean = ReadNFloats(fp, N, nullptr); + ASSERT_HOST(Proto->Mean != nullptr); + + switch (Proto->Style) { + case spherical: + ASSERT_HOST(ReadNFloats(fp, 1, &(Proto->Variance.Spherical)) != nullptr); + Proto->Magnitude.Spherical = + 1.0 / sqrt(2.0 * M_PI * Proto->Variance.Spherical); + Proto->TotalMagnitude = pow(Proto->Magnitude.Spherical, static_cast<float>(N)); + Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude)); + Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical; + Proto->Distrib = nullptr; + break; + case elliptical: + Proto->Variance.Elliptical = ReadNFloats(fp, N, nullptr); + ASSERT_HOST(Proto->Variance.Elliptical != nullptr); + Proto->Magnitude.Elliptical = static_cast<float *>(malloc(N * sizeof(float))); + Proto->Weight.Elliptical = static_cast<float *>(malloc(N * sizeof(float))); + Proto->TotalMagnitude = 1.0; + for (i = 0; i < N; i++) { + Proto->Magnitude.Elliptical[i] = + 1.0 / sqrt(2.0 * M_PI * Proto->Variance.Elliptical[i]); + Proto->Weight.Elliptical[i] = 1.0 / Proto->Variance.Elliptical[i]; + Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i]; + } + Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude)); + Proto->Distrib = nullptr; + break; + default: + free(Proto); + tprintf("Invalid prototype style\n"); + return nullptr; + } + return Proto; +} + +/** + * This routine writes an array of dimension descriptors to + * the specified text file. + * @param File open text file to write param descriptors to + * @param N number of param descriptors to write + * @param ParamDesc array of param descriptors to write + */ +void WriteParamDesc(FILE *File, uint16_t N, const PARAM_DESC ParamDesc[]) { + int i; + + for (i = 0; i < N; i++) { + if (ParamDesc[i].Circular) + fprintf (File, "circular "); + else + fprintf (File, "linear "); + + if (ParamDesc[i].NonEssential) + fprintf (File, "non-essential "); + else + fprintf (File, "essential "); + + fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max); + } +} + +/** + * This routine writes a textual description of a prototype + * to the specified text file. + * @param File open text file to write prototype to + * @param N number of dimensions in feature space + * @param Proto prototype to write out + */ +void WritePrototype(FILE *File, uint16_t N, PROTOTYPE *Proto) { + int i; + + if (Proto->Significant) + fprintf (File, "significant "); + else + fprintf (File, "insignificant "); + WriteProtoStyle (File, static_cast<PROTOSTYLE>(Proto->Style)); + fprintf (File, "%6d\n\t", Proto->NumSamples); + WriteNFloats (File, N, Proto->Mean); + fprintf (File, "\t"); + + switch (Proto->Style) { + case spherical: + WriteNFloats (File, 1, &(Proto->Variance.Spherical)); + break; + case elliptical: + WriteNFloats (File, N, Proto->Variance.Elliptical); + break; + case mixed: + for (i = 0; i < N; i++) + switch (Proto->Distrib[i]) { + case normal: + fprintf (File, " %9s", "normal"); + break; + case uniform: + fprintf (File, " %9s", "uniform"); + break; + case D_random: + fprintf (File, " %9s", "random"); + break; + case DISTRIBUTION_COUNT: + ASSERT_HOST(!"Distribution count not allowed!"); + } + fprintf (File, "\n\t"); + WriteNFloats (File, N, Proto->Variance.Elliptical); + } +} + +} // namespace tesseract diff --git a/tesseract/src/classify/clusttool.h b/tesseract/src/classify/clusttool.h new file mode 100644 index 00000000..ead65618 --- /dev/null +++ b/tesseract/src/classify/clusttool.h @@ -0,0 +1,43 @@ +/****************************************************************************** + ** Filename: clusttool.h + ** Purpose: Definition of clustering utility tools + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#ifndef TESSERACT_CLASSIFY_CLUSTTOOL_H_ +#define TESSERACT_CLASSIFY_CLUSTTOOL_H_ + +#include "cluster.h" + +#include "serialis.h" + +#include <cstdio> + +namespace tesseract { + +uint16_t ReadSampleSize(tesseract::TFile *fp); + +PARAM_DESC *ReadParamDesc(tesseract::TFile *fp, uint16_t N); + +PROTOTYPE *ReadPrototype(tesseract::TFile *fp, uint16_t N); + +TESS_API +void WriteParamDesc(FILE *File, uint16_t N, const PARAM_DESC ParamDesc[]); + +TESS_API +void WritePrototype(FILE *File, uint16_t N, PROTOTYPE *Proto); + +} // namespace tesseract + +#endif // TESSERACT_CLASSIFY_CLUSTTOOL_H_ diff --git a/tesseract/src/classify/cutoffs.cpp b/tesseract/src/classify/cutoffs.cpp new file mode 100644 index 00000000..f75788d8 --- /dev/null +++ b/tesseract/src/classify/cutoffs.cpp @@ -0,0 +1,73 @@ +/****************************************************************************** + ** Filename: cutoffs.c + ** Purpose: Routines to manipulate an array of class cutoffs. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ +/*---------------------------------------------------------------------------- + Include Files and Type Defines +----------------------------------------------------------------------------*/ + +#include <cstdio> +#include <sstream> // for std::istringstream +#include <string> // for std::string + +#include "classify.h" +#include "helpers.h" +#include "serialis.h" +#include <tesseract/unichar.h> + +#define MAX_CUTOFF 1000 + +namespace tesseract { +/** + * Open file, read in all of the class-id/cutoff pairs + * and insert them into the Cutoffs array. Cutoffs are + * indexed in the array by class id. Unused entries in the + * array are set to an arbitrarily high cutoff value. + * @param fp file containing cutoff definitions + * @param Cutoffs array to put cutoffs into + */ +void Classify::ReadNewCutoffs(TFile* fp, uint16_t* Cutoffs) { + int Cutoff; + + if (shape_table_ != nullptr) { + if (!shapetable_cutoffs_.DeSerialize(fp)) { + tprintf("Error during read of shapetable pffmtable!\n"); + } + } + for (int i = 0; i < MAX_NUM_CLASSES; i++) + Cutoffs[i] = MAX_CUTOFF; + + const int kMaxLineSize = 100; + char line[kMaxLineSize]; + while (fp->FGets(line, kMaxLineSize) != nullptr) { + std::string Class; + CLASS_ID ClassId; + std::istringstream stream(line); + stream.imbue(std::locale::classic()); + stream >> Class >> Cutoff; + if (stream.fail()) { + break; + } + if (Class.compare("NULL") == 0) { + ClassId = unicharset.unichar_to_id(" "); + } else { + ClassId = unicharset.unichar_to_id(Class.c_str()); + } + ASSERT_HOST(ClassId >= 0 && ClassId < MAX_NUM_CLASSES); + Cutoffs[ClassId] = Cutoff; + } +} + +} // namespace tesseract diff --git a/tesseract/src/classify/featdefs.cpp b/tesseract/src/classify/featdefs.cpp new file mode 100644 index 00000000..54647431 --- /dev/null +++ b/tesseract/src/classify/featdefs.cpp @@ -0,0 +1,280 @@ +/****************************************************************************** + ** Filename: featdefs.cpp + ** Purpose: Definitions of currently defined feature types. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#include "featdefs.h" + +#include "picofeat.h" // for PicoFeatureLength +#include "scanutils.h" + +#include <cstring> +#include <cstdio> + +namespace tesseract { + +#define PICO_FEATURE_LENGTH 0.05 + +/*----------------------------------------------------------------------------- + Global Data Definitions and Declarations +-----------------------------------------------------------------------------*/ +const char* const kMicroFeatureType = "mf"; +const char* const kCNFeatureType = "cn"; +const char* const kIntFeatureType = "if"; +const char* const kGeoFeatureType = "tb"; + +// Define all of the parameters for the MicroFeature type. +StartParamDesc(MicroFeatureParams) +DefineParam(0, 0, -0.5, 0.5) +DefineParam(0, 0, -0.25, 0.75) +DefineParam(0, 1, 0.0, 1.0) +DefineParam(1, 0, 0.0, 1.0) +DefineParam (0, 1, -0.5, 0.5) +DefineParam (0, 1, -0.5, 0.5) +EndParamDesc +// Now define the feature type itself (see features.h for parameters). +DefineFeature(MicroFeatureDesc, 5, 1, kMicroFeatureType, MicroFeatureParams) + +// Define all of the parameters for the NormFeat type. +StartParamDesc (CharNormParams) +DefineParam(0, 0, -0.25, 0.75) +DefineParam(0, 1, 0.0, 1.0) +DefineParam(0, 0, 0.0, 1.0) +DefineParam(0, 0, 0.0, 1.0) +EndParamDesc +// Now define the feature type itself (see features.h for parameters). +DefineFeature(CharNormDesc, 4, 0, kCNFeatureType, CharNormParams) + +// Define all of the parameters for the IntFeature type +StartParamDesc(IntFeatParams) +DefineParam(0, 0, 0.0, 255.0) +DefineParam(0, 0, 0.0, 255.0) +DefineParam(1, 0, 0.0, 255.0) +EndParamDesc +// Now define the feature type itself (see features.h for parameters). +DefineFeature(IntFeatDesc, 2, 1, kIntFeatureType, IntFeatParams) + +// Define all of the parameters for the GeoFeature type +StartParamDesc(GeoFeatParams) +DefineParam(0, 0, 0.0, 255.0) +DefineParam(0, 0, 0.0, 255.0) +DefineParam(0, 0, 0.0, 255.0) +EndParamDesc +// Now define the feature type itself (see features.h for parameters). +DefineFeature(GeoFeatDesc, 3, 0, kGeoFeatureType, GeoFeatParams) + +// Other features used for training the adaptive classifier, but not used +// during normal training, therefore not in the DescDefs array. + +// Define all of the parameters for the PicoFeature type +// define knob that can be used to adjust pico-feature length. +float PicoFeatureLength = PICO_FEATURE_LENGTH; +StartParamDesc(PicoFeatParams) +DefineParam(0, 0, -0.25, 0.75) +DefineParam(1, 0, 0.0, 1.0) +DefineParam(0, 0, -0.5, 0.5) +EndParamDesc +// Now define the feature type itself (see features.h for parameters). +DefineFeature(PicoFeatDesc, 2, 1, "pf", PicoFeatParams) + +// Define all of the parameters for the OutlineFeature type. +StartParamDesc(OutlineFeatParams) +DefineParam(0, 0, -0.5, 0.5) +DefineParam(0, 0, -0.25, 0.75) +DefineParam(0, 0, 0.0, 1.0) +DefineParam(1, 0, 0.0, 1.0) +EndParamDesc +// Now define the feature type itself (see features.h for parameters). +DefineFeature(OutlineFeatDesc, 3, 1, "of", OutlineFeatParams) + +// MUST be kept in-sync with ExtractorDefs in fxdefs.cpp. +static const FEATURE_DESC_STRUCT *DescDefs[NUM_FEATURE_TYPES] = { + &MicroFeatureDesc, + &CharNormDesc, + &IntFeatDesc, + &GeoFeatDesc +}; + +/*----------------------------------------------------------------------------- + Public Code +-----------------------------------------------------------------------------*/ +void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs) { + featuredefs->NumFeatureTypes = NUM_FEATURE_TYPES; + for (int i = 0; i < NUM_FEATURE_TYPES; ++i) { + featuredefs->FeatureDesc[i] = DescDefs[i]; + } +} + +/*---------------------------------------------------------------------------*/ +/** + * Release the memory consumed by the specified character + * description and all of the features in that description. + * + * @param CharDesc character description to be deallocated + * + * Globals: + * - none + */ +void FreeCharDescription(CHAR_DESC CharDesc) { + if (CharDesc) { + for (size_t i = 0; i < CharDesc->NumFeatureSets; i++) + FreeFeatureSet (CharDesc->FeatureSets[i]); + free(CharDesc); + } +} /* FreeCharDescription */ + + +/*---------------------------------------------------------------------------*/ +/** + * Allocate a new character description, initialize its + * feature sets to be empty, and return it. + * + * Globals: + * - none + * + * @return New character description structure. + */ +CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs) { + CHAR_DESC CharDesc; + CharDesc = static_cast<CHAR_DESC>(malloc (sizeof (CHAR_DESC_STRUCT))); + CharDesc->NumFeatureSets = FeatureDefs.NumFeatureTypes; + + for (size_t i = 0; i < CharDesc->NumFeatureSets; i++) + CharDesc->FeatureSets[i] = nullptr; + + return (CharDesc); +} /* NewCharDescription */ + +/*---------------------------------------------------------------------------*/ +/** + * Appends a textual representation of CharDesc to str. + * The format used is to write out the number of feature + * sets which will be written followed by a representation of + * each feature set. + * + * Each set starts with the short name for that feature followed + * by a description of the feature set. Feature sets which are + * not present are not written. + * + * @param FeatureDefs definitions of feature types/extractors + * @param str string to append CharDesc to + * @param CharDesc character description to write to File + */ +void WriteCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs, + CHAR_DESC CharDesc, STRING* str) { + int NumSetsToWrite = 0; + + for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) + if (CharDesc->FeatureSets[Type]) + NumSetsToWrite++; + + str->add_str_int(" ", NumSetsToWrite); + *str += "\n"; + for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) { + if (CharDesc->FeatureSets[Type]) { + *str += FeatureDefs.FeatureDesc[Type]->ShortName; + *str += " "; + WriteFeatureSet(CharDesc->FeatureSets[Type], str); + } + } +} /* WriteCharDescription */ + +// Return whether all of the fields of the given feature set +// are well defined (not inf or nan). +bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, + CHAR_DESC CharDesc) { + bool anything_written = false; + bool well_formed = true; + for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) { + if (CharDesc->FeatureSets[Type]) { + for (int i = 0; i < CharDesc->FeatureSets[Type]->NumFeatures; i++) { + FEATURE feat = CharDesc->FeatureSets[Type]->Features[i]; + for (int p = 0; p < feat->Type->NumParams; p++) { + if (std::isnan(feat->Params[p]) || std::isinf(feat->Params[p])) + well_formed = false; + else + anything_written = true; + } + } + } else { + return false; + } + } + return anything_written && well_formed; +} /* ValidCharDescription */ + +/*---------------------------------------------------------------------------*/ +/** + * Read a character description from File, and return + * a data structure containing this information. The data + * is formatted as follows: + * @verbatim + NumberOfSets + ShortNameForSet1 Set1 + ShortNameForSet2 Set2 + ... + @endverbatim + * + * Globals: + * - none + * + * @param FeatureDefs definitions of feature types/extractors + * @param File open text file to read character description from + * @return Character description read from File. + */ +CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, + FILE *File) { + int NumSetsToRead; + char ShortName[FEAT_NAME_SIZE]; + CHAR_DESC CharDesc; + int Type; + + ASSERT_HOST(tfscanf(File, "%d", &NumSetsToRead) == 1); + ASSERT_HOST(NumSetsToRead >= 0); + ASSERT_HOST(NumSetsToRead <= FeatureDefs.NumFeatureTypes); + + CharDesc = NewCharDescription(FeatureDefs); + for (; NumSetsToRead > 0; NumSetsToRead--) { + tfscanf(File, "%s", ShortName); + Type = ShortNameToFeatureType(FeatureDefs, ShortName); + CharDesc->FeatureSets[Type] = + ReadFeatureSet (File, FeatureDefs.FeatureDesc[Type]); + } + return CharDesc; +} + +/*---------------------------------------------------------------------------*/ +/** + * Search through all features currently defined and return + * the feature type for the feature with the specified short + * name. Trap an error if the specified name is not found. + * + * Globals: + * - none + * + * @param FeatureDefs definitions of feature types/extractors + * @param ShortName short name of a feature type + * @return Feature type which corresponds to ShortName. + */ +uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, + const char *ShortName) { + for (int i = 0; i < FeatureDefs.NumFeatureTypes; i++) + if (!strcmp ((FeatureDefs.FeatureDesc[i]->ShortName), ShortName)) + return static_cast<uint32_t>(i); + ASSERT_HOST(!"Illegal short name for a feature"); + return 0; +} + +} // namespace tesseract diff --git a/tesseract/src/classify/featdefs.h b/tesseract/src/classify/featdefs.h new file mode 100644 index 00000000..eb8c66fe --- /dev/null +++ b/tesseract/src/classify/featdefs.h @@ -0,0 +1,87 @@ +/****************************************************************************** + ** Filename: featdefs.h + ** Purpose: Definitions of currently defined feature types. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#ifndef FEATDEFS_H +#define FEATDEFS_H + +#include "ocrfeatures.h" + +namespace tesseract { + +/* Enumerate the different types of features currently defined. */ +#define NUM_FEATURE_TYPES 4 +extern TESS_API const char* const kMicroFeatureType; +extern TESS_API const char* const kCNFeatureType; +extern TESS_API const char* const kIntFeatureType; +extern TESS_API const char* const kGeoFeatureType; + +/* A character is described by multiple sets of extracted features. Each + set contains a number of features of a particular type, for example, a + set of bays, or a set of closures, or a set of microfeatures. Each + feature consists of a number of parameters. All features within a + feature set contain the same number of parameters.*/ + +struct CHAR_DESC_STRUCT { + uint32_t NumFeatureSets; + FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]; +}; +using CHAR_DESC = CHAR_DESC_STRUCT *; + +struct FEATURE_DEFS_STRUCT { + int32_t NumFeatureTypes; + const FEATURE_DESC_STRUCT* FeatureDesc[NUM_FEATURE_TYPES]; +}; +using FEATURE_DEFS = FEATURE_DEFS_STRUCT *; + +/*---------------------------------------------------------------------- + Generic functions for manipulating character descriptions +----------------------------------------------------------------------*/ +TESS_API +void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs); + +TESS_API +void FreeCharDescription(CHAR_DESC CharDesc); + +CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs); + +bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, + CHAR_DESC CharDesc); + +void WriteCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs, + CHAR_DESC CharDesc, STRING* str); + +TESS_API +CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, + FILE *File); + +TESS_API +uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, + const char *ShortName); + +/**---------------------------------------------------------------------------- + Global Data Definitions and Declarations +----------------------------------------------------------------------------**/ +extern const FEATURE_DESC_STRUCT MicroFeatureDesc; +extern TESS_API const FEATURE_DESC_STRUCT PicoFeatDesc; +extern const FEATURE_DESC_STRUCT CharNormDesc; +extern const FEATURE_DESC_STRUCT OutlineFeatDesc; +extern const FEATURE_DESC_STRUCT IntFeatDesc; +extern const FEATURE_DESC_STRUCT GeoFeatDesc; + +} // namespace tesseract + +#endif diff --git a/tesseract/src/classify/float2int.cpp b/tesseract/src/classify/float2int.cpp new file mode 100644 index 00000000..1b48779b --- /dev/null +++ b/tesseract/src/classify/float2int.cpp @@ -0,0 +1,109 @@ +/****************************************************************************** + ** Filename: float2int.cpp + ** Purpose: Routines for converting float features to int features + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#include "float2int.h" + +#include "normmatch.h" +#include "mfoutline.h" +#include "classify.h" +#include "picofeat.h" + +#include "helpers.h" + +#define MAX_INT_CHAR_NORM (INT_CHAR_NORM_RANGE - 1) + +/*---------------------------------------------------------------------------*/ +namespace tesseract { + +/** + * For each class in the unicharset, clears the corresponding + * entry in char_norm_array. char_norm_array is indexed by unichar_id. + * + * Globals: + * - none + * + * @param char_norm_array array to be cleared + */ +void Classify::ClearCharNormArray(uint8_t* char_norm_array) { + memset(char_norm_array, 0, sizeof(*char_norm_array) * unicharset.size()); +} /* ClearCharNormArray */ + + +/*---------------------------------------------------------------------------*/ +/** + * For each class in unicharset, computes the match between + * norm_feature and the normalization protos for that class. + * Converts this number to the range from 0 - 255 and stores it + * into char_norm_array. CharNormArray is indexed by unichar_id. + * + * Globals: + * - PreTrainedTemplates current set of built-in templates + * + * @param norm_feature character normalization feature + * @param[out] char_norm_array place to put results of size unicharset.size() + */ +void Classify::ComputeIntCharNormArray(const FEATURE_STRUCT& norm_feature, + uint8_t* char_norm_array) { + for (int i = 0; i < unicharset.size(); i++) { + if (i < PreTrainedTemplates->NumClasses) { + int norm_adjust = static_cast<int>(INT_CHAR_NORM_RANGE * + ComputeNormMatch(i, norm_feature, false)); + char_norm_array[i] = ClipToRange(norm_adjust, 0, MAX_INT_CHAR_NORM); + } else { + // Classes with no templates (eg. ambigs & ligatures) default + // to worst match. + char_norm_array[i] = MAX_INT_CHAR_NORM; + } + } +} /* ComputeIntCharNormArray */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine converts each floating point pico-feature + * in Features into integer format and saves it into + * IntFeatures. + * + * Globals: + * - none + * + * @param Features floating point pico-features to be converted + * @param[out] IntFeatures array to put converted features into + */ +void Classify::ComputeIntFeatures(FEATURE_SET Features, + INT_FEATURE_ARRAY IntFeatures) { + float YShift; + + if (classify_norm_method == baseline) + YShift = BASELINE_Y_SHIFT; + else + YShift = Y_SHIFT; + + for (int Fid = 0; Fid < Features->NumFeatures; Fid++) { + FEATURE Feature = Features->Features[Fid]; + + IntFeatures[Fid].X = + Bucket8For(Feature->Params[PicoFeatX], X_SHIFT, INT_FEAT_RANGE); + IntFeatures[Fid].Y = + Bucket8For(Feature->Params[PicoFeatY], YShift, INT_FEAT_RANGE); + IntFeatures[Fid].Theta = CircBucketFor(Feature->Params[PicoFeatDir], + ANGLE_SHIFT, INT_FEAT_RANGE); + IntFeatures[Fid].CP_misses = 0; + } +} /* ComputeIntFeatures */ + +} // namespace tesseract diff --git a/tesseract/src/classify/float2int.h b/tesseract/src/classify/float2int.h new file mode 100644 index 00000000..70a05ab6 --- /dev/null +++ b/tesseract/src/classify/float2int.h @@ -0,0 +1,30 @@ +/****************************************************************************** + ** Filename: float2int.h + ** Purpose: Routines for converting float features to int features + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#ifndef FLOAT2INT_H +#define FLOAT2INT_H + +/*----------------------------------------------------------------------------- + Include Files and Type Defines +-----------------------------------------------------------------------------*/ +#include "intmatcher.h" +#include "ocrfeatures.h" + +#define INT_FEAT_RANGE 256 +#define BASELINE_Y_SHIFT (0.25) + +#endif diff --git a/tesseract/src/classify/fpoint.cpp b/tesseract/src/classify/fpoint.cpp new file mode 100644 index 00000000..333b1fc7 --- /dev/null +++ b/tesseract/src/classify/fpoint.cpp @@ -0,0 +1,54 @@ +/****************************************************************************** + ** Filename: fpoint.cpp + ** Purpose: Abstract data type for a 2D point (floating point coords) + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ +/*---------------------------------------------------------------------------- + Include Files and Type Defines +----------------------------------------------------------------------------*/ +#define _USE_MATH_DEFINES // for M_PI +#include "fpoint.h" +#include <cstdio> +#include <cmath> // for M_PI + +/*---------------------------------------------------------------------------- + Public Code +----------------------------------------------------------------------------*/ + +float DistanceBetween(FPOINT A, FPOINT B) { + const double xd = XDelta(A, B); + const double yd = YDelta(A, B); + return sqrt(static_cast<double>(xd * xd + yd * yd)); +} + +/** + * Return the angle from Point1 to Point2 normalized to + * lie in the range 0 to FullScale (where FullScale corresponds + * to 2*pi or 360 degrees). + * @param Point1 points to compute angle between + * @param Point2 points to compute angle between + * @param FullScale value to associate with 2*pi + * @return angle + */ +float NormalizedAngleFrom(FPOINT *Point1, FPOINT *Point2, float FullScale) { + float NumRadsInCircle = 2.0 * M_PI; + + float Angle = AngleFrom (*Point1, *Point2); + if (Angle < 0.0) + Angle += NumRadsInCircle; + Angle *= FullScale / NumRadsInCircle; + if (Angle < 0.0 || Angle >= FullScale) + Angle = 0.0; + return (Angle); +} diff --git a/tesseract/src/classify/fpoint.h b/tesseract/src/classify/fpoint.h new file mode 100644 index 00000000..93f5a20f --- /dev/null +++ b/tesseract/src/classify/fpoint.h @@ -0,0 +1,53 @@ +/****************************************************************************** + ** Filename: fpoint.h + ** Purpose: Abstract data type for 2D points (floating point coords) + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#ifndef FPOINT_H +#define FPOINT_H + +/**---------------------------------------------------------------------------- + Include Files and Type Defines +----------------------------------------------------------------------------**/ +#include <cmath> +#include <cstdio> + +/* define data structure to hold 2D points or vectors using floating point */ +typedef struct { + float x, y; +} FPOINT; +using FVECTOR = FPOINT; + +/**---------------------------------------------------------------------------- + Macros +----------------------------------------------------------------------------**/ +/* macros for computing miscellaneous functions of 2 points */ +#define XDelta(A, B) ((B).x - (A).x) +#define YDelta(A, B) ((B).y - (A).y) +#define SlopeFrom(A, B) (YDelta(A, B) / XDelta(A, B)) +#define AngleFrom(A, B) (atan2((double)YDelta(A, B), (double)XDelta(A, B))) + +#define XIntersectionOf(A, B, X) (SlopeFrom(A, B) * ((X)-A.x) + A.y) + +/*------------------------------------------------------------------------- + Public Function Prototypes +---------------------------------------------------------------------------*/ + +float DistanceBetween(FPOINT A, FPOINT B); + +float NormalizedAngleFrom(FPOINT* Point1, FPOINT* Point2, float FullScale); + +#endif diff --git a/tesseract/src/classify/intfeaturespace.cpp b/tesseract/src/classify/intfeaturespace.cpp new file mode 100644 index 00000000..9ddd9777 --- /dev/null +++ b/tesseract/src/classify/intfeaturespace.cpp @@ -0,0 +1,124 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// Author: rays@google.com (Ray Smith) +/////////////////////////////////////////////////////////////////////// +// File: intfeaturespace.cpp +// Description: Indexed feature space based on INT_FEATURE_STRUCT. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#define _USE_MATH_DEFINES // for M_PI +#include "intfeaturespace.h" +#include <cmath> // for M_PI +#include "intfx.h" + +namespace tesseract { + +IntFeatureSpace::IntFeatureSpace() + : x_buckets_(0), y_buckets_(0), theta_buckets_(0) { +} + +void IntFeatureSpace::Init(uint8_t xbuckets, uint8_t ybuckets, uint8_t thetabuckets) { + x_buckets_ = xbuckets; + y_buckets_ = ybuckets; + theta_buckets_ = thetabuckets; +} + +// Serializes the feature space definition to the given file. +// Returns false on error. +bool IntFeatureSpace::Serialize(FILE* fp) const { + if (fwrite(&x_buckets_, sizeof(x_buckets_), 1, fp) != 1) + return false; + if (fwrite(&y_buckets_, sizeof(y_buckets_), 1, fp) != 1) + return false; + if (fwrite(&theta_buckets_, sizeof(theta_buckets_), 1, fp) != 1) + return false; + return true; +} + +// Returns an INT_FEATURE_STRUCT corresponding to the given index. +// This is the inverse of the Index member. +INT_FEATURE_STRUCT IntFeatureSpace::PositionFromIndex(int index) const { + return PositionFromBuckets(index / (y_buckets_ * theta_buckets_), + index / theta_buckets_ % y_buckets_, + index % theta_buckets_); +} + +// Bulk calls to Index. Maps the given array of features to a vector of +// int32_t indices in the same order as the input. +void IntFeatureSpace::IndexFeatures(const INT_FEATURE_STRUCT* features, + int num_features, + GenericVector<int>* mapped_features) const { + mapped_features->truncate(0); + for (int f = 0; f < num_features; ++f) + mapped_features->push_back(Index(features[f])); +} + +// Bulk calls to Index. Maps the given array of features to a vector of +// sorted int32_t indices. +void IntFeatureSpace::IndexAndSortFeatures( + const INT_FEATURE_STRUCT* features, int num_features, + GenericVector<int>* sorted_features) const { + sorted_features->truncate(0); + for (int f = 0; f < num_features; ++f) + sorted_features->push_back(Index(features[f])); + sorted_features->sort(); +} + +// Returns a feature space index for the given x,y position in a display +// window, or -1 if the feature is a miss. +int IntFeatureSpace::XYToFeatureIndex(int x, int y) const { + // Round the x,y position to a feature. Search for a valid theta. + INT_FEATURE_STRUCT feature(x, y, 0); + int index = -1; + for (int theta = 0; theta <= UINT8_MAX && index < 0; ++theta) { + feature.Theta = theta; + index = Index(feature); + } + if (index < 0) { + tprintf("(%d,%d) does not exist in feature space!\n", x, y); + return -1; + } + feature = PositionFromIndex(index); + tprintf("Click at (%d, %d) ->(%d, %d), ->(%d, %d)\n", + x, y, feature.X, feature.Y, x - feature.X, y - feature.Y); + // Get the relative position of x,y from the rounded feature. + x -= feature.X; + y -= feature.Y; + if (x != 0 || y != 0) { + double angle = atan2(static_cast<double>(y), static_cast<double>(x)) + M_PI; + angle *= kIntFeatureExtent / (2.0 * M_PI); + feature.Theta = static_cast<uint8_t>(angle + 0.5); + index = Index(feature); + if (index < 0) { + tprintf("Feature failed to map to a valid index:"); + feature.print(); + return -1; + } + feature = PositionFromIndex(index); + } + feature.print(); + return index; +} + +// Returns an INT_FEATURE_STRUCT corresponding to the given bucket coords. +INT_FEATURE_STRUCT IntFeatureSpace::PositionFromBuckets(int x, + int y, + int theta) const { + INT_FEATURE_STRUCT pos( + (x * kIntFeatureExtent + kIntFeatureExtent / 2) / x_buckets_, + (y * kIntFeatureExtent + kIntFeatureExtent / 2) / y_buckets_, + DivRounded(theta * kIntFeatureExtent, theta_buckets_)); + return pos; +} + +} // namespace tesseract. diff --git a/tesseract/src/classify/intfeaturespace.h b/tesseract/src/classify/intfeaturespace.h new file mode 100644 index 00000000..3f21e4d3 --- /dev/null +++ b/tesseract/src/classify/intfeaturespace.h @@ -0,0 +1,104 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// Author: rays@google.com (Ray Smith) +/////////////////////////////////////////////////////////////////////// +// File: intfeaturespace.h +// Description: Indexed feature space based on INT_FEATURE_STRUCT. +// Created: Wed Mar 24 10:55:30 PDT 2010 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_CLASSIFY_INTFEATURESPACE_H_ +#define TESSERACT_CLASSIFY_INTFEATURESPACE_H_ + +#include "genericvector.h" +#include "intproto.h" + +// Extent of x,y,theta in the input feature space. [0,255]. +const int kIntFeatureExtent = 256; +// Extent of x,y,theta dimensions in the quantized feature space. +const int kBoostXYBuckets = 16; +const int kBoostDirBuckets = 16; + +namespace tesseract { + +class IndexMap; + +// Down-sampling quantization of the INT_FEATURE_STRUCT feature space and +// conversion to a single scalar index value, used as a binary feature space. +class TESS_API IntFeatureSpace { + public: + IntFeatureSpace(); + // Default copy constructors and assignment OK! + + // Setup the feature space with the given dimensions. + void Init(uint8_t xbuckets, uint8_t ybuckets, uint8_t thetabuckets); + + // Serializes the feature space definition to the given file. + // Returns false on error. + bool Serialize(FILE* fp) const; + + // Returns the total size of the feature space. + int Size() const { + return static_cast<int>(x_buckets_) * y_buckets_ * theta_buckets_; + } + // Returns an INT_FEATURE_STRUCT corresponding to the given index. + // This is the inverse of the Index member. + INT_FEATURE_STRUCT PositionFromIndex(int index) const; + + // Returns a 1-dimensional index corresponding to the given feature value. + // Range is [0, Size()-1]. Inverse of PositionFromIndex member. + int Index(const INT_FEATURE_STRUCT& f) const { + return (XBucket(f.X) * y_buckets_ + YBucket(f.Y)) * theta_buckets_ + + ThetaBucket(f.Theta); + } + // Bulk calls to Index. Maps the given array of features to a vector of + // int32_t indices in the same order as the input. + void IndexFeatures(const INT_FEATURE_STRUCT* features, int num_features, + GenericVector<int>* mapped_features) const; + // Bulk calls to Index. Maps the given array of features to a vector of + // sorted int32_t indices. + void IndexAndSortFeatures(const INT_FEATURE_STRUCT* features, + int num_features, + GenericVector<int>* sorted_features) const; + // Returns a feature space index for the given x,y position in a display + // window, or -1 if the feature is a miss. + int XYToFeatureIndex(int x, int y) const; + + protected: + // Converters to generate indices for individual feature dimensions. + int XBucket(int x) const { + int bucket = x * x_buckets_ / kIntFeatureExtent; + return ClipToRange(bucket, 0, static_cast<int>(x_buckets_) - 1); + } + int YBucket(int y) const { + int bucket = y * y_buckets_ / kIntFeatureExtent; + return ClipToRange(bucket, 0, static_cast<int>(y_buckets_) - 1); + } + // Use DivRounded for theta so that exactly vertical and horizontal are in + // the middle of a bucket. The Modulo takes care of the wrap-around. + int ThetaBucket(int theta) const { + int bucket = DivRounded(theta * theta_buckets_, kIntFeatureExtent); + return Modulo(bucket, theta_buckets_); + } + // Returns an INT_FEATURE_STRUCT corresponding to the given buckets. + INT_FEATURE_STRUCT PositionFromBuckets(int x, int y, int theta) const; + + // Feature space definition - serialized. + uint8_t x_buckets_; + uint8_t y_buckets_; + uint8_t theta_buckets_; +}; + +} // namespace tesseract. + +#endif // TESSERACT_CLASSIFY_INTFEATURESPACE_H_ diff --git a/tesseract/src/classify/intfx.cpp b/tesseract/src/classify/intfx.cpp new file mode 100644 index 00000000..062b0f1e --- /dev/null +++ b/tesseract/src/classify/intfx.cpp @@ -0,0 +1,488 @@ +/****************************************************************************** + ** Filename: intfx.c + ** Purpose: Integer character normalization & feature extraction + ** Author: Robert Moss, rays@google.com (Ray Smith) + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + *****************************************************************************/ +/**---------------------------------------------------------------------------- + Include Files and Type Defines +----------------------------------------------------------------------------**/ + +#define _USE_MATH_DEFINES // for M_PI + +#include "intfx.h" + +#include "classify.h" +#include "intmatcher.h" +#include "linlsq.h" +#include "normalis.h" +#include "statistc.h" +#include "trainingsample.h" + +#include "helpers.h" + +#include "allheaders.h" + +#include <cmath> // for M_PI +#include <mutex> // for std::mutex + +namespace tesseract { + +/**---------------------------------------------------------------------------- + Global Data Definitions and Declarations +----------------------------------------------------------------------------**/ +// Look up table for cos and sin to turn the intfx feature angle to a vector. +// Protected by atan_table_mutex. +// The entries are in binary degrees where a full circle is 256 binary degrees. +static float cos_table[INT_CHAR_NORM_RANGE]; +static float sin_table[INT_CHAR_NORM_RANGE]; + +/**---------------------------------------------------------------------------- + Public Code +----------------------------------------------------------------------------**/ + +void InitIntegerFX() { + // Guards write access to AtanTable so we don't create it more than once. + static std::mutex atan_table_mutex; + static bool atan_table_init = false; + std::lock_guard<std::mutex> guard(atan_table_mutex); + if (!atan_table_init) { + for (int i = 0; i < INT_CHAR_NORM_RANGE; ++i) { + cos_table[i] = cos(i * 2 * M_PI / INT_CHAR_NORM_RANGE + M_PI); + sin_table[i] = sin(i * 2 * M_PI / INT_CHAR_NORM_RANGE + M_PI); + } + atan_table_init = true; + } +} + +// Returns a vector representing the direction of a feature with the given +// theta direction in an INT_FEATURE_STRUCT. +FCOORD FeatureDirection(uint8_t theta) { + return FCOORD(cos_table[theta], sin_table[theta]); +} + +// Generates a TrainingSample from a TBLOB. Extracts features and sets +// the bounding box, so classifiers that operate on the image can work. +// TODO(rays) Make BlobToTrainingSample a member of Classify now that +// the FlexFx and FeatureDescription code have been removed and LearnBlob +// is now a member of Classify. +TrainingSample* BlobToTrainingSample( + const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info, + std::vector<INT_FEATURE_STRUCT>* bl_features) { + std::vector<INT_FEATURE_STRUCT> cn_features; + Classify::ExtractFeatures(blob, nonlinear_norm, bl_features, + &cn_features, fx_info, nullptr); + // TODO(rays) Use blob->PreciseBoundingBox() instead. + TBOX box = blob.bounding_box(); + TrainingSample* sample = nullptr; + int num_features = fx_info->NumCN; + if (num_features > 0) { + sample = TrainingSample::CopyFromFeatures(*fx_info, box, &cn_features[0], + num_features); + } + if (sample != nullptr) { + // Set the bounding box (in original image coordinates) in the sample. + TPOINT topleft, botright; + topleft.x = box.left(); + topleft.y = box.top(); + botright.x = box.right(); + botright.y = box.bottom(); + TPOINT original_topleft, original_botright; + blob.denorm().DenormTransform(nullptr, topleft, &original_topleft); + blob.denorm().DenormTransform(nullptr, botright, &original_botright); + sample->set_bounding_box(TBOX(original_topleft.x, original_botright.y, + original_botright.x, original_topleft.y)); + } + return sample; +} + +// Computes the DENORMS for bl(baseline) and cn(character) normalization +// during feature extraction. The input denorm describes the current state +// of the blob, which is usually a baseline-normalized word. +// The Transforms setup are as follows: +// Baseline Normalized (bl) Output: +// We center the grapheme by aligning the x-coordinate of its centroid with +// x=128 and leaving the already-baseline-normalized y as-is. +// +// Character Normalized (cn) Output: +// We align the grapheme's centroid at the origin and scale it +// asymmetrically in x and y so that the 2nd moments are a standard value +// (51.2) ie the result is vaguely square. +// If classify_nonlinear_norm is true: +// A non-linear normalization is setup that attempts to evenly distribute +// edges across x and y. +// +// Some of the fields of fx_info are also setup: +// Length: Total length of outline. +// Rx: Rounded y second moment. (Reversed by convention.) +// Ry: rounded x second moment. +// Xmean: Rounded x center of mass of the blob. +// Ymean: Rounded y center of mass of the blob. +void Classify::SetupBLCNDenorms(const TBLOB& blob, bool nonlinear_norm, + DENORM* bl_denorm, DENORM* cn_denorm, + INT_FX_RESULT_STRUCT* fx_info) { + // Compute 1st and 2nd moments of the original outline. + FCOORD center, second_moments; + int length = blob.ComputeMoments(¢er, &second_moments); + if (fx_info != nullptr) { + fx_info->Length = length; + fx_info->Rx = IntCastRounded(second_moments.y()); + fx_info->Ry = IntCastRounded(second_moments.x()); + + fx_info->Xmean = IntCastRounded(center.x()); + fx_info->Ymean = IntCastRounded(center.y()); + } + // Setup the denorm for Baseline normalization. + bl_denorm->SetupNormalization(nullptr, nullptr, &blob.denorm(), center.x(), 128.0f, + 1.0f, 1.0f, 128.0f, 128.0f); + // Setup the denorm for character normalization. + if (nonlinear_norm) { + GenericVector<GenericVector<int> > x_coords; + GenericVector<GenericVector<int> > y_coords; + TBOX box; + blob.GetPreciseBoundingBox(&box); + box.pad(1, 1); + blob.GetEdgeCoords(box, &x_coords, &y_coords); + cn_denorm->SetupNonLinear(&blob.denorm(), box, UINT8_MAX, UINT8_MAX, + 0.0f, 0.0f, x_coords, y_coords); + } else { + cn_denorm->SetupNormalization(nullptr, nullptr, &blob.denorm(), + center.x(), center.y(), + 51.2f / second_moments.x(), + 51.2f / second_moments.y(), + 128.0f, 128.0f); + } +} + +// Helper normalizes the direction, assuming that it is at the given +// unnormed_pos, using the given denorm, starting at the root_denorm. +static uint8_t NormalizeDirection(uint8_t dir, const FCOORD& unnormed_pos, + const DENORM& denorm, + const DENORM* root_denorm) { + // Convert direction to a vector. + FCOORD unnormed_end; + unnormed_end.from_direction(dir); + unnormed_end += unnormed_pos; + FCOORD normed_pos, normed_end; + denorm.NormTransform(root_denorm, unnormed_pos, &normed_pos); + denorm.NormTransform(root_denorm, unnormed_end, &normed_end); + normed_end -= normed_pos; + return normed_end.to_direction(); +} + +// Helper returns the mean direction vector from the given stats. Use the +// mean direction from dirs if there is information available, otherwise, use +// the fit_vector from point_diffs. +static FCOORD MeanDirectionVector(const LLSQ& point_diffs, const LLSQ& dirs, + const FCOORD& start_pt, + const FCOORD& end_pt) { + FCOORD fit_vector; + if (dirs.count() > 0) { + // There were directions, so use them. To avoid wrap-around problems, we + // have 2 accumulators in dirs: x for normal directions and y for + // directions offset by 128. We will use the one with the least variance. + FCOORD mean_pt = dirs.mean_point(); + double mean_dir = 0.0; + if (dirs.x_variance() <= dirs.y_variance()) { + mean_dir = mean_pt.x(); + } else { + mean_dir = mean_pt.y() + 128; + } + fit_vector.from_direction(Modulo(IntCastRounded(mean_dir), 256)); + } else { + // There were no directions, so we rely on the vector_fit to the points. + // Since the vector_fit is 180 degrees ambiguous, we align with the + // supplied feature_dir by making the scalar product non-negative. + FCOORD feature_dir(end_pt - start_pt); + fit_vector = point_diffs.vector_fit(); + if (fit_vector.x() == 0.0f && fit_vector.y() == 0.0f) { + // There was only a single point. Use feature_dir directly. + fit_vector = feature_dir; + } else { + // Sometimes the least mean squares fit is wrong, due to the small sample + // of points and scaling. Use a 90 degree rotated vector if that matches + // feature_dir better. + FCOORD fit_vector2 = !fit_vector; + // The fit_vector is 180 degrees ambiguous, so resolve the ambiguity by + // insisting that the scalar product with the feature_dir should be +ve. + if (fit_vector % feature_dir < 0.0) + fit_vector = -fit_vector; + if (fit_vector2 % feature_dir < 0.0) + fit_vector2 = -fit_vector2; + // Even though fit_vector2 has a higher mean squared error, it might be + // a better fit, so use it if the dot product with feature_dir is bigger. + if (fit_vector2 % feature_dir > fit_vector % feature_dir) + fit_vector = fit_vector2; + } + } + return fit_vector; +} + +// Helper computes one or more features corresponding to the given points. +// Emitted features are on the line defined by: +// start_pt + lambda * (end_pt - start_pt) for scalar lambda. +// Features are spaced at feature_length intervals. +static int ComputeFeatures(const FCOORD& start_pt, const FCOORD& end_pt, + double feature_length, + std::vector<INT_FEATURE_STRUCT>* features) { + FCOORD feature_vector(end_pt - start_pt); + if (feature_vector.x() == 0.0f && feature_vector.y() == 0.0f) return 0; + // Compute theta for the feature based on its direction. + uint8_t theta = feature_vector.to_direction(); + // Compute the number of features and lambda_step. + double target_length = feature_vector.length(); + int num_features = IntCastRounded(target_length / feature_length); + if (num_features == 0) return 0; + // Divide the length evenly into num_features pieces. + double lambda_step = 1.0 / num_features; + double lambda = lambda_step / 2.0; + for (int f = 0; f < num_features; ++f, lambda += lambda_step) { + FCOORD feature_pt(start_pt); + feature_pt += feature_vector * lambda; + INT_FEATURE_STRUCT feature(feature_pt, theta); + features->push_back(feature); + } + return num_features; +} + +// Gathers outline points and their directions from start_index into dirs by +// stepping along the outline and normalizing the coordinates until the +// required feature_length has been collected or end_index is reached. +// On input pos must point to the position corresponding to start_index and on +// return pos is updated to the current raw position, and pos_normed is set to +// the normed version of pos. +// Since directions wrap-around, they need special treatment to get the mean. +// Provided the cluster of directions doesn't straddle the wrap-around point, +// the simple mean works. If they do, then, unless the directions are wildly +// varying, the cluster rotated by 180 degrees will not straddle the wrap- +// around point, so mean(dir + 180 degrees) - 180 degrees will work. Since +// LLSQ conveniently stores the mean of 2 variables, we use it to store +// dir and dir+128 (128 is 180 degrees) and then use the resulting mean +// with the least variance. +static int GatherPoints(const C_OUTLINE* outline, double feature_length, + const DENORM& denorm, const DENORM* root_denorm, + int start_index, int end_index, + ICOORD* pos, FCOORD* pos_normed, + LLSQ* points, LLSQ* dirs) { + int step_length = outline->pathlength(); + ICOORD step = outline->step(start_index % step_length); + // Prev_normed is the start point of this collection and will be set on the + // first iteration, and on later iterations used to determine the length + // that has been collected. + FCOORD prev_normed; + points->clear(); + dirs->clear(); + int num_points = 0; + int index; + for (index = start_index; index <= end_index; ++index, *pos += step) { + step = outline->step(index % step_length); + int edge_weight = outline->edge_strength_at_index(index % step_length); + if (edge_weight == 0) { + // This point has conflicting gradient and step direction, so ignore it. + continue; + } + // Get the sub-pixel precise location and normalize. + FCOORD f_pos = outline->sub_pixel_pos_at_index(*pos, index % step_length); + denorm.NormTransform(root_denorm, f_pos, pos_normed); + if (num_points == 0) { + // The start of this segment. + prev_normed = *pos_normed; + } else { + FCOORD offset = *pos_normed - prev_normed; + float length = offset.length(); + if (length > feature_length) { + // We have gone far enough from the start. We will use this point in + // the next set so return what we have so far. + return index; + } + } + points->add(pos_normed->x(), pos_normed->y(), edge_weight); + int direction = outline->direction_at_index(index % step_length); + if (direction >= 0) { + direction = NormalizeDirection(direction, f_pos, denorm, root_denorm); + // Use both the direction and direction +128 so we are not trying to + // take the mean of something straddling the wrap-around point. + dirs->add(direction, Modulo(direction + 128, 256)); + } + ++num_points; + } + return index; +} + +// Extracts Tesseract features and appends them to the features vector. +// Startpt to lastpt, inclusive, MUST have the same src_outline member, +// which may be nullptr. The vector from lastpt to its next is included in +// the feature extraction. Hidden edges should be excluded by the caller. +// If force_poly is true, the features will be extracted from the polygonal +// approximation even if more accurate data is available. +static void ExtractFeaturesFromRun( + const EDGEPT* startpt, const EDGEPT* lastpt, + const DENORM& denorm, double feature_length, bool force_poly, + std::vector<INT_FEATURE_STRUCT>* features) { + const EDGEPT* endpt = lastpt->next; + const C_OUTLINE* outline = startpt->src_outline; + if (outline != nullptr && !force_poly) { + // Detailed information is available. We have to normalize only from + // the root_denorm to denorm. + const DENORM* root_denorm = denorm.RootDenorm(); + int total_features = 0; + // Get the features from the outline. + int step_length = outline->pathlength(); + int start_index = startpt->start_step; + // pos is the integer coordinates of the binary image steps. + ICOORD pos = outline->position_at_index(start_index); + // We use an end_index that allows us to use a positive increment, but that + // may be beyond the bounds of the outline steps/ due to wrap-around, to + // so we use % step_length everywhere, except for start_index. + int end_index = lastpt->start_step + lastpt->step_count; + if (end_index <= start_index) + end_index += step_length; + LLSQ prev_points; + LLSQ prev_dirs; + FCOORD prev_normed_pos = outline->sub_pixel_pos_at_index(pos, start_index); + denorm.NormTransform(root_denorm, prev_normed_pos, &prev_normed_pos); + LLSQ points; + LLSQ dirs; + FCOORD normed_pos(0.0f, 0.0f); + int index = GatherPoints(outline, feature_length, denorm, root_denorm, + start_index, end_index, &pos, &normed_pos, + &points, &dirs); + while (index <= end_index) { + // At each iteration we nominally have 3 accumulated sets of points and + // dirs: prev_points/dirs, points/dirs, next_points/dirs and sum them + // into sum_points/dirs, but we don't necessarily get any features out, + // so if that is the case, we keep accumulating instead of rotating the + // accumulators. + LLSQ next_points; + LLSQ next_dirs; + FCOORD next_normed_pos(0.0f, 0.0f); + index = GatherPoints(outline, feature_length, denorm, root_denorm, + index, end_index, &pos, &next_normed_pos, + &next_points, &next_dirs); + LLSQ sum_points(prev_points); + // TODO(rays) find out why it is better to use just dirs and next_dirs + // in sum_dirs, instead of using prev_dirs as well. + LLSQ sum_dirs(dirs); + sum_points.add(points); + sum_points.add(next_points); + sum_dirs.add(next_dirs); + bool made_features = false; + // If we have some points, we can try making some features. + if (sum_points.count() > 0) { + // We have gone far enough from the start. Make a feature and restart. + FCOORD fit_pt = sum_points.mean_point(); + FCOORD fit_vector = MeanDirectionVector(sum_points, sum_dirs, + prev_normed_pos, normed_pos); + // The segment to which we fit features is the line passing through + // fit_pt in direction of fit_vector that starts nearest to + // prev_normed_pos and ends nearest to normed_pos. + FCOORD start_pos = prev_normed_pos.nearest_pt_on_line(fit_pt, + fit_vector); + FCOORD end_pos = normed_pos.nearest_pt_on_line(fit_pt, fit_vector); + // Possible correction to match the adjacent polygon segment. + if (total_features == 0 && startpt != endpt) { + FCOORD poly_pos(startpt->pos.x, startpt->pos.y); + denorm.LocalNormTransform(poly_pos, &start_pos); + } + if (index > end_index && startpt != endpt) { + FCOORD poly_pos(endpt->pos.x, endpt->pos.y); + denorm.LocalNormTransform(poly_pos, &end_pos); + } + int num_features = ComputeFeatures(start_pos, end_pos, feature_length, + features); + if (num_features > 0) { + // We made some features so shuffle the accumulators. + prev_points = points; + prev_dirs = dirs; + prev_normed_pos = normed_pos; + points = next_points; + dirs = next_dirs; + made_features = true; + total_features += num_features; + } + // The end of the next set becomes the end next time around. + normed_pos = next_normed_pos; + } + if (!made_features) { + // We didn't make any features, so keep the prev accumulators and + // add the next ones into the current. + points.add(next_points); + dirs.add(next_dirs); + } + } + } else { + // There is no outline, so we are forced to use the polygonal approximation. + const EDGEPT* pt = startpt; + do { + FCOORD start_pos(pt->pos.x, pt->pos.y); + FCOORD end_pos(pt->next->pos.x, pt->next->pos.y); + denorm.LocalNormTransform(start_pos, &start_pos); + denorm.LocalNormTransform(end_pos, &end_pos); + ComputeFeatures(start_pos, end_pos, feature_length, features); + } while ((pt = pt->next) != endpt); + } +} + +// Extracts sets of 3-D features of length kStandardFeatureLength (=12.8), as +// (x,y) position and angle as measured counterclockwise from the vector +// <-1, 0>, from blob using two normalizations defined by bl_denorm and +// cn_denorm. See SetpuBLCNDenorms for definitions. +// If outline_cn_counts is not nullptr, on return it contains the cumulative +// number of cn features generated for each outline in the blob (in order). +// Thus after the first outline, there were (*outline_cn_counts)[0] features, +// after the second outline, there were (*outline_cn_counts)[1] features etc. +void Classify::ExtractFeatures(const TBLOB& blob, + bool nonlinear_norm, + std::vector<INT_FEATURE_STRUCT>* bl_features, + std::vector<INT_FEATURE_STRUCT>* cn_features, + INT_FX_RESULT_STRUCT* results, + GenericVector<int>* outline_cn_counts) { + DENORM bl_denorm, cn_denorm; + tesseract::Classify::SetupBLCNDenorms(blob, nonlinear_norm, + &bl_denorm, &cn_denorm, results); + if (outline_cn_counts != nullptr) + outline_cn_counts->truncate(0); + // Iterate the outlines. + for (TESSLINE* ol = blob.outlines; ol != nullptr; ol = ol->next) { + // Iterate the polygon. + EDGEPT* loop_pt = ol->FindBestStartPt(); + EDGEPT* pt = loop_pt; + if (pt == nullptr) continue; + do { + if (pt->IsHidden()) continue; + // Find a run of equal src_outline. + EDGEPT* last_pt = pt; + do { + last_pt = last_pt->next; + } while (last_pt != loop_pt && !last_pt->IsHidden() && + last_pt->src_outline == pt->src_outline); + last_pt = last_pt->prev; + // Until the adaptive classifier can be weaned off polygon segments, + // we have to force extraction from the polygon for the bl_features. + ExtractFeaturesFromRun(pt, last_pt, bl_denorm, kStandardFeatureLength, + true, bl_features); + ExtractFeaturesFromRun(pt, last_pt, cn_denorm, kStandardFeatureLength, + false, cn_features); + pt = last_pt; + } while ((pt = pt->next) != loop_pt); + if (outline_cn_counts != nullptr) + outline_cn_counts->push_back(cn_features->size()); + } + results->NumBL = bl_features->size(); + results->NumCN = cn_features->size(); + results->YBottom = blob.bounding_box().bottom(); + results->YTop = blob.bounding_box().top(); + results->Width = blob.bounding_box().width(); +} + +} // namespace tesseract diff --git a/tesseract/src/classify/intfx.h b/tesseract/src/classify/intfx.h new file mode 100644 index 00000000..f4f8fd1a --- /dev/null +++ b/tesseract/src/classify/intfx.h @@ -0,0 +1,68 @@ +/****************************************************************************** + ** Filename: intfx.h + ** Purpose: Interface to high level integer feature extractor. + ** Author: Robert Moss + ** History: Tue May 21 15:51:57 MDT 1991, RWM, Created. + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ +#ifndef INTFX_H +#define INTFX_H + +#include "blobs.h" +#include "intproto.h" +#include "normalis.h" + +#include <cmath> + +namespace tesseract { + +class DENORM; + +class TrainingSample; + +struct INT_FX_RESULT_STRUCT { + int32_t Length; // total length of all outlines + int16_t Xmean, Ymean; // center of mass of all outlines + int16_t Rx, Ry; // radius of gyration + int16_t NumBL, NumCN; // number of features extracted + int16_t Width; // Width of blob in BLN coords. + uint8_t YBottom; // Bottom of blob in BLN coords. + uint8_t YTop; // Top of blob in BLN coords. +}; + +// The standard feature length +const double kStandardFeatureLength = 64.0 / 5; + +/**---------------------------------------------------------------------------- + Public Function Prototypes +----------------------------------------------------------------------------**/ +TESS_API +void InitIntegerFX(); + +// Returns a vector representing the direction of a feature with the given +// theta direction in an INT_FEATURE_STRUCT. +TESS_API +FCOORD FeatureDirection(uint8_t theta); + +// Generates a TrainingSample from a TBLOB. Extracts features and sets +// the bounding box, so classifiers that operate on the image can work. +// TODO(rays) BlobToTrainingSample must remain a global function until +// the FlexFx and FeatureDescription code can be removed and LearnBlob +// made a member of Classify. +TrainingSample* BlobToTrainingSample( + const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info, + std::vector<INT_FEATURE_STRUCT>* bl_features); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/classify/intmatcher.cpp b/tesseract/src/classify/intmatcher.cpp new file mode 100644 index 00000000..b78c700f --- /dev/null +++ b/tesseract/src/classify/intmatcher.cpp @@ -0,0 +1,1226 @@ +/****************************************************************************** + ** Filename: intmatcher.cpp + ** Purpose: Generic high level classification routines. + ** Author: Robert Moss + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +// Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "intmatcher.h" + +#include "fontinfo.h" +#include "intproto.h" +#include "scrollview.h" +#include "float2int.h" +#include "classify.h" +#include "shapetable.h" + +#include "helpers.h" + +#include <cassert> +#include <cmath> + +namespace tesseract { + +/*---------------------------------------------------------------------------- + Global Data Definitions and Declarations +----------------------------------------------------------------------------*/ +// Parameters of the sigmoid used to convert similarity to evidence in the +// similarity_evidence_table_ that is used to convert distance metric to an +// 8 bit evidence value in the secondary matcher. (See IntMatcher::Init). +const float IntegerMatcher::kSEExponentialMultiplier = 0.0f; +const float IntegerMatcher::kSimilarityCenter = 0.0075f; + +static const uint8_t offset_table[] = { + 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, + 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, + 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, + 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, + 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, + 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, + 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, + 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, + 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, + 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, + 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 +}; + +static const uint8_t next_table[] = { + 0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e, + 0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, 0x10, 0x18, 0x18, 0x1a, + 0x18, 0x1c, 0x1c, 0x1e, 0, 0x20, 0x20, 0x22, 0x20, 0x24, 0x24, 0x26, + 0x20, 0x28, 0x28, 0x2a, 0x28, 0x2c, 0x2c, 0x2e, 0x20, 0x30, 0x30, 0x32, + 0x30, 0x34, 0x34, 0x36, 0x30, 0x38, 0x38, 0x3a, 0x38, 0x3c, 0x3c, 0x3e, + 0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, 0x40, 0x48, 0x48, 0x4a, + 0x48, 0x4c, 0x4c, 0x4e, 0x40, 0x50, 0x50, 0x52, 0x50, 0x54, 0x54, 0x56, + 0x50, 0x58, 0x58, 0x5a, 0x58, 0x5c, 0x5c, 0x5e, 0x40, 0x60, 0x60, 0x62, + 0x60, 0x64, 0x64, 0x66, 0x60, 0x68, 0x68, 0x6a, 0x68, 0x6c, 0x6c, 0x6e, + 0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, 0x70, 0x78, 0x78, 0x7a, + 0x78, 0x7c, 0x7c, 0x7e, 0, 0x80, 0x80, 0x82, 0x80, 0x84, 0x84, 0x86, + 0x80, 0x88, 0x88, 0x8a, 0x88, 0x8c, 0x8c, 0x8e, 0x80, 0x90, 0x90, 0x92, + 0x90, 0x94, 0x94, 0x96, 0x90, 0x98, 0x98, 0x9a, 0x98, 0x9c, 0x9c, 0x9e, + 0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, 0xa0, 0xa8, 0xa8, 0xaa, + 0xa8, 0xac, 0xac, 0xae, 0xa0, 0xb0, 0xb0, 0xb2, 0xb0, 0xb4, 0xb4, 0xb6, + 0xb0, 0xb8, 0xb8, 0xba, 0xb8, 0xbc, 0xbc, 0xbe, 0x80, 0xc0, 0xc0, 0xc2, + 0xc0, 0xc4, 0xc4, 0xc6, 0xc0, 0xc8, 0xc8, 0xca, 0xc8, 0xcc, 0xcc, 0xce, + 0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, 0xd0, 0xd8, 0xd8, 0xda, + 0xd8, 0xdc, 0xdc, 0xde, 0xc0, 0xe0, 0xe0, 0xe2, 0xe0, 0xe4, 0xe4, 0xe6, + 0xe0, 0xe8, 0xe8, 0xea, 0xe8, 0xec, 0xec, 0xee, 0xe0, 0xf0, 0xf0, 0xf2, + 0xf0, 0xf4, 0xf4, 0xf6, 0xf0, 0xf8, 0xf8, 0xfa, 0xf8, 0xfc, 0xfc, 0xfe +}; + +// See http://b/19318793 (#6) for a complete discussion. + +/** + * Sort Key array in ascending order using heap sort + * algorithm. Also sort Index array that is tied to + * the key array. + * @param n Number of elements to sort + * @param ra Key array [1..n] + * @param rb Index array [1..n] + */ +static void +HeapSort (int n, int ra[], int rb[]) { + int i, rra, rrb; + int l, j, ir; + + l = (n >> 1) + 1; + ir = n; + for (;;) { + if (l > 1) { + rra = ra[--l]; + rrb = rb[l]; + } + else { + rra = ra[ir]; + rrb = rb[ir]; + ra[ir] = ra[1]; + rb[ir] = rb[1]; + if (--ir == 1) { + ra[1] = rra; + rb[1] = rrb; + return; + } + } + i = l; + j = l << 1; + while (j <= ir) { + if (j < ir && ra[j] < ra[j + 1]) + ++j; + if (rra < ra[j]) { + ra[i] = ra[j]; + rb[i] = rb[j]; + j += (i = j); + } + else + j = ir + 1; + } + ra[i] = rra; + rb[i] = rrb; + } +} + +// Encapsulation of the intermediate data and computations made by the class +// pruner. The class pruner implements a simple linear classifier on binary +// features by heavily quantizing the feature space, and applying +// NUM_BITS_PER_CLASS (2)-bit weights to the features. Lack of resolution in +// weights is compensated by a non-constant bias that is dependent on the +// number of features present. +class ClassPruner { + public: + ClassPruner(int max_classes) { + // The unrolled loop in ComputeScores means that the array sizes need to + // be rounded up so that the array is big enough to accommodate the extra + // entries accessed by the unrolling. Each pruner word is of sized + // BITS_PER_WERD and each entry is NUM_BITS_PER_CLASS, so there are + // BITS_PER_WERD / NUM_BITS_PER_CLASS entries. + // See ComputeScores. + max_classes_ = max_classes; + rounded_classes_ = RoundUp( + max_classes, WERDS_PER_CP_VECTOR * BITS_PER_WERD / NUM_BITS_PER_CLASS); + class_count_ = new int[rounded_classes_]; + norm_count_ = new int[rounded_classes_]; + sort_key_ = new int[rounded_classes_ + 1]; + sort_index_ = new int[rounded_classes_ + 1]; + for (int i = 0; i < rounded_classes_; i++) { + class_count_[i] = 0; + } + pruning_threshold_ = 0; + num_features_ = 0; + num_classes_ = 0; + } + + ~ClassPruner() { + delete []class_count_; + delete []norm_count_; + delete []sort_key_; + delete []sort_index_; + } + + /// Computes the scores for every class in the character set, by summing the + /// weights for each feature and stores the sums internally in class_count_. + void ComputeScores(const INT_TEMPLATES_STRUCT* int_templates, + int num_features, const INT_FEATURE_STRUCT* features) { + num_features_ = num_features; + int num_pruners = int_templates->NumClassPruners; + for (int f = 0; f < num_features; ++f) { + const INT_FEATURE_STRUCT* feature = &features[f]; + // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS. + int x = feature->X * NUM_CP_BUCKETS >> 8; + int y = feature->Y * NUM_CP_BUCKETS >> 8; + int theta = feature->Theta * NUM_CP_BUCKETS >> 8; + int class_id = 0; + // Each CLASS_PRUNER_STRUCT only covers CLASSES_PER_CP(32) classes, so + // we need a collection of them, indexed by pruner_set. + for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) { + // Look up quantized feature in a 3-D array, an array of weights for + // each class. + const uint32_t* pruner_word_ptr = + int_templates->ClassPruners[pruner_set]->p[x][y][theta]; + for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) { + uint32_t pruner_word = *pruner_word_ptr++; + // This inner loop is unrolled to speed up the ClassPruner. + // Currently gcc would not unroll it unless it is set to O3 + // level of optimization or -funroll-loops is specified. + /* + uint32_t class_mask = (1 << NUM_BITS_PER_CLASS) - 1; + for (int bit = 0; bit < BITS_PER_WERD/NUM_BITS_PER_CLASS; bit++) { + class_count_[class_id++] += pruner_word & class_mask; + pruner_word >>= NUM_BITS_PER_CLASS; + } + */ + class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; + pruner_word >>= NUM_BITS_PER_CLASS; + class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; + pruner_word >>= NUM_BITS_PER_CLASS; + class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; + pruner_word >>= NUM_BITS_PER_CLASS; + class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; + pruner_word >>= NUM_BITS_PER_CLASS; + class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; + pruner_word >>= NUM_BITS_PER_CLASS; + class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; + pruner_word >>= NUM_BITS_PER_CLASS; + class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; + pruner_word >>= NUM_BITS_PER_CLASS; + class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; + pruner_word >>= NUM_BITS_PER_CLASS; + class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; + pruner_word >>= NUM_BITS_PER_CLASS; + class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; + pruner_word >>= NUM_BITS_PER_CLASS; + class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; + pruner_word >>= NUM_BITS_PER_CLASS; + class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; + pruner_word >>= NUM_BITS_PER_CLASS; + class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; + pruner_word >>= NUM_BITS_PER_CLASS; + class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; + pruner_word >>= NUM_BITS_PER_CLASS; + class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; + pruner_word >>= NUM_BITS_PER_CLASS; + class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; + } + } + } + } + + /// Adjusts the scores according to the number of expected features. Used + /// in lieu of a constant bias, this penalizes classes that expect more + /// features than there are present. Thus an actual c will score higher for c + /// than e, even though almost all the features match e as well as c, because + /// e expects more features to be present. + void AdjustForExpectedNumFeatures(const uint16_t* expected_num_features, + int cutoff_strength) { + for (int class_id = 0; class_id < max_classes_; ++class_id) { + if (num_features_ < expected_num_features[class_id]) { + int deficit = expected_num_features[class_id] - num_features_; + class_count_[class_id] -= class_count_[class_id] * deficit / + (num_features_ * cutoff_strength + deficit); + } + } + } + + /// Zeros the scores for classes disabled in the unicharset. + /// Implements the black-list to recognize a subset of the character set. + void DisableDisabledClasses(const UNICHARSET& unicharset) { + for (int class_id = 0; class_id < max_classes_; ++class_id) { + if (!unicharset.get_enabled(class_id)) + class_count_[class_id] = 0; // This char is disabled! + } + } + + /** Zeros the scores of fragments. */ + void DisableFragments(const UNICHARSET& unicharset) { + for (int class_id = 0; class_id < max_classes_; ++class_id) { + // Do not include character fragments in the class pruner + // results if disable_character_fragments is true. + if (unicharset.get_fragment(class_id)) { + class_count_[class_id] = 0; + } + } + } + + /// Normalizes the counts for xheight, putting the normalized result in + /// norm_count_. Applies a simple subtractive penalty for incorrect vertical + /// position provided by the normalization_factors array, indexed by + /// character class, and scaled by the norm_multiplier. + void NormalizeForXheight(int norm_multiplier, + const uint8_t* normalization_factors) { + for (int class_id = 0; class_id < max_classes_; class_id++) { + norm_count_[class_id] = class_count_[class_id] - + ((norm_multiplier * normalization_factors[class_id]) >> 8); + } + } + + /** The nop normalization copies the class_count_ array to norm_count_. */ + void NoNormalization() { + for (int class_id = 0; class_id < max_classes_; class_id++) { + norm_count_[class_id] = class_count_[class_id]; + } + } + + /// Prunes the classes using <the maximum count> * pruning_factor/256 as a + /// threshold for keeping classes. If max_of_non_fragments, then ignore + /// fragments in computing the maximum count. + void PruneAndSort(int pruning_factor, int keep_this, + bool max_of_non_fragments, const UNICHARSET& unicharset) { + int max_count = 0; + for (int c = 0; c < max_classes_; ++c) { + if (norm_count_[c] > max_count && + // This additional check is added in order to ensure that + // the classifier will return at least one non-fragmented + // character match. + // TODO(daria): verify that this helps accuracy and does not + // hurt performance. + (!max_of_non_fragments || !unicharset.get_fragment(c))) { + max_count = norm_count_[c]; + } + } + // Prune Classes. + pruning_threshold_ = (max_count * pruning_factor) >> 8; + // Select Classes. + if (pruning_threshold_ < 1) + pruning_threshold_ = 1; + num_classes_ = 0; + for (int class_id = 0; class_id < max_classes_; class_id++) { + if (norm_count_[class_id] >= pruning_threshold_ || + class_id == keep_this) { + ++num_classes_; + sort_index_[num_classes_] = class_id; + sort_key_[num_classes_] = norm_count_[class_id]; + } + } + + // Sort Classes using Heapsort Algorithm. + if (num_classes_ > 1) + HeapSort(num_classes_, sort_key_, sort_index_); + } + + /** Prints debug info on the class pruner matches for the pruned classes only. + */ + void DebugMatch(const Classify& classify, + const INT_TEMPLATES_STRUCT* int_templates, + const INT_FEATURE_STRUCT* features) const { + int num_pruners = int_templates->NumClassPruners; + int max_num_classes = int_templates->NumClasses; + for (int f = 0; f < num_features_; ++f) { + const INT_FEATURE_STRUCT* feature = &features[f]; + tprintf("F=%3d(%d,%d,%d),", f, feature->X, feature->Y, feature->Theta); + // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS. + int x = feature->X * NUM_CP_BUCKETS >> 8; + int y = feature->Y * NUM_CP_BUCKETS >> 8; + int theta = feature->Theta * NUM_CP_BUCKETS >> 8; + int class_id = 0; + for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) { + // Look up quantized feature in a 3-D array, an array of weights for + // each class. + const uint32_t* pruner_word_ptr = + int_templates->ClassPruners[pruner_set]->p[x][y][theta]; + for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) { + uint32_t pruner_word = *pruner_word_ptr++; + for (int word_class = 0; word_class < 16 && + class_id < max_num_classes; ++word_class, ++class_id) { + if (norm_count_[class_id] >= pruning_threshold_) { + tprintf(" %s=%d,", + classify.ClassIDToDebugStr(int_templates, + class_id, 0).c_str(), + pruner_word & CLASS_PRUNER_CLASS_MASK); + } + pruner_word >>= NUM_BITS_PER_CLASS; + } + } + tprintf("\n"); + } + } + } + + /** Prints a summary of the pruner result. */ + void SummarizeResult(const Classify& classify, + const INT_TEMPLATES_STRUCT* int_templates, + const uint16_t* expected_num_features, + int norm_multiplier, + const uint8_t* normalization_factors) const { + tprintf("CP:%d classes, %d features:\n", num_classes_, num_features_); + for (int i = 0; i < num_classes_; ++i) { + int class_id = sort_index_[num_classes_ - i]; + STRING class_string = classify.ClassIDToDebugStr(int_templates, + class_id, 0); + tprintf("%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n", + class_string.c_str(), + class_count_[class_id], + expected_num_features[class_id], + (norm_multiplier * normalization_factors[class_id]) >> 8, + sort_key_[num_classes_ - i], + 100.0 - 100.0 * sort_key_[num_classes_ - i] / + (CLASS_PRUNER_CLASS_MASK * num_features_)); + } + } + + /// Copies the pruned, sorted classes into the output results and returns + /// the number of classes. + int SetupResults(std::vector<CP_RESULT_STRUCT>* results) const { + results->resize(num_classes_); + for (int c = 0; c < num_classes_; ++c) { + (*results)[c].Class = sort_index_[num_classes_ - c]; + (*results)[c].Rating = 1.0f - sort_key_[num_classes_ - c] / + (static_cast<float>(CLASS_PRUNER_CLASS_MASK) * num_features_); + } + return num_classes_; + } + + private: + /** Array[rounded_classes_] of initial counts for each class. */ + int *class_count_; + /// Array[rounded_classes_] of modified counts for each class after + /// normalizing for expected number of features, disabled classes, fragments, + /// and xheights. + int *norm_count_; + /** Array[rounded_classes_ +1] of pruned counts that gets sorted */ + int *sort_key_; + /** Array[rounded_classes_ +1] of classes corresponding to sort_key_. */ + int *sort_index_; + /** Number of classes in this class pruner. */ + int max_classes_; + /** Rounded up number of classes used for array sizes. */ + int rounded_classes_; + /** Threshold count applied to prune classes. */ + int pruning_threshold_; + /** The number of features used to compute the scores. */ + int num_features_; + /** Final number of pruned classes. */ + int num_classes_; +}; + +/*---------------------------------------------------------------------------- + Public Code +----------------------------------------------------------------------------*/ +/** + * Runs the class pruner from int_templates on the given features, returning + * the number of classes output in results. + * @param int_templates Class pruner tables + * @param num_features Number of features in blob + * @param features Array of features + * @param normalization_factors Array of fudge factors from blob + * normalization process (by CLASS_INDEX) + * @param expected_num_features Array of expected number of features + * for each class (by CLASS_INDEX) + * @param results Sorted Array of pruned classes. Must be an + * array of size at least + * int_templates->NumClasses. + * @param keep_this + */ +int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates, + int num_features, int keep_this, + const INT_FEATURE_STRUCT* features, + const uint8_t* normalization_factors, + const uint16_t* expected_num_features, + std::vector<CP_RESULT_STRUCT>* results) { + ClassPruner pruner(int_templates->NumClasses); + // Compute initial match scores for all classes. + pruner.ComputeScores(int_templates, num_features, features); + // Adjust match scores for number of expected features. + pruner.AdjustForExpectedNumFeatures(expected_num_features, + classify_cp_cutoff_strength); + // Apply disabled classes in unicharset - only works without a shape_table. + if (shape_table_ == nullptr) + pruner.DisableDisabledClasses(unicharset); + // If fragments are disabled, remove them, also only without a shape table. + if (disable_character_fragments && shape_table_ == nullptr) + pruner.DisableFragments(unicharset); + + // If we have good x-heights, apply the given normalization factors. + if (normalization_factors != nullptr) { + pruner.NormalizeForXheight(classify_class_pruner_multiplier, + normalization_factors); + } else { + pruner.NoNormalization(); + } + // Do the actual pruning and sort the short-list. + pruner.PruneAndSort(classify_class_pruner_threshold, keep_this, + shape_table_ == nullptr, unicharset); + + if (classify_debug_level > 2) { + pruner.DebugMatch(*this, int_templates, features); + } + if (classify_debug_level > 1) { + pruner.SummarizeResult(*this, int_templates, expected_num_features, + classify_class_pruner_multiplier, + normalization_factors); + } + // Convert to the expected output format. + return pruner.SetupResults(results); +} + +/** + * IntegerMatcher returns the best configuration and rating + * for a single class. The class matched against is determined + * by the uniqueness of the ClassTemplate parameter. The + * best rating and its associated configuration are returned. + * + * Globals: + * - local_matcher_multiplier_ Normalization factor multiplier + * param ClassTemplate Prototypes & tables for a class + * param NumFeatures Number of features in blob + * param Features Array of features + * param NormalizationFactor Fudge factor from blob normalization process + * param Result Class rating & configuration: (0.0 -> 1.0), 0=bad, 1=good + * param Debug Debugger flag: 1=debugger on + */ +void IntegerMatcher::Match(INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + int16_t NumFeatures, + const INT_FEATURE_STRUCT* Features, + UnicharRating* Result, + int AdaptFeatureThreshold, + int Debug, + bool SeparateDebugWindows) { + auto *tables = new ScratchEvidence(); + int Feature; + + if (MatchDebuggingOn (Debug)) + tprintf ("Integer Matcher -------------------------------------------\n"); + + tables->Clear(ClassTemplate); + Result->feature_misses = 0; + + for (Feature = 0; Feature < NumFeatures; Feature++) { + int csum = UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask, + Feature, &Features[Feature], + tables, Debug); + // Count features that were missed over all configs. + if (csum == 0) + ++Result->feature_misses; + } + +#ifndef GRAPHICS_DISABLED + if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) { + DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, + NumFeatures, Debug); + } + + if (DisplayProtoMatchesOn(Debug)) { + DisplayProtoDebugInfo(ClassTemplate, ConfigMask, + *tables, SeparateDebugWindows); + } + + if (DisplayFeatureMatchesOn(Debug)) { + DisplayFeatureDebugInfo(ClassTemplate, ProtoMask, ConfigMask, NumFeatures, + Features, AdaptFeatureThreshold, Debug, + SeparateDebugWindows); + } +#endif + + tables->UpdateSumOfProtoEvidences(ClassTemplate, ConfigMask); + tables->NormalizeSums(ClassTemplate, NumFeatures); + + FindBestMatch(ClassTemplate, *tables, Result); + +#ifndef GRAPHICS_DISABLED + if (PrintMatchSummaryOn(Debug)) + Result->Print(); + + if (MatchDebuggingOn(Debug)) + tprintf("Match Complete --------------------------------------------\n"); +#endif + + delete tables; +} + +/** + * FindGoodProtos finds all protos whose normalized proto-evidence + * exceed AdaptProtoThreshold. The list is ordered by increasing + * proto id number. + * + * Globals: + * - local_matcher_multiplier_ Normalization factor multiplier + * param ClassTemplate Prototypes & tables for a class + * param ProtoMask AND Mask for proto word + * param ConfigMask AND Mask for config word + * param NumFeatures Number of features in blob + * param Features Array of features + * param ProtoArray Array of good protos + * param AdaptProtoThreshold Threshold for good protos + * param Debug Debugger flag: 1=debugger on + * @return Number of good protos in ProtoArray. + */ +int IntegerMatcher::FindGoodProtos( + INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + int16_t NumFeatures, + INT_FEATURE_ARRAY Features, + PROTO_ID *ProtoArray, + int AdaptProtoThreshold, + int Debug) { + auto *tables = new ScratchEvidence(); + int NumGoodProtos = 0; + + /* DEBUG opening heading */ + if (MatchDebuggingOn (Debug)) + tprintf + ("Find Good Protos -------------------------------------------\n"); + + tables->Clear(ClassTemplate); + + for (int Feature = 0; Feature < NumFeatures; Feature++) + UpdateTablesForFeature( + ClassTemplate, ProtoMask, ConfigMask, Feature, &(Features[Feature]), + tables, Debug); + +#ifndef GRAPHICS_DISABLED + if (PrintProtoMatchesOn (Debug) || PrintMatchSummaryOn (Debug)) + DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, + NumFeatures, Debug); +#endif + + /* Average Proto Evidences & Find Good Protos */ + for (int proto = 0; proto < ClassTemplate->NumProtos; proto++) { + /* Compute Average for Actual Proto */ + int Temp = 0; + for (uint8_t i = 0; + i < MAX_PROTO_INDEX && i < ClassTemplate->ProtoLengths[proto]; i++) + Temp += tables->proto_evidence_[proto][i]; + + Temp /= ClassTemplate->ProtoLengths[proto]; + + /* Find Good Protos */ + if (Temp >= AdaptProtoThreshold) { + *ProtoArray = proto; + ProtoArray++; + NumGoodProtos++; + } + } + + if (MatchDebuggingOn (Debug)) + tprintf ("Match Complete --------------------------------------------\n"); + delete tables; + + return NumGoodProtos; +} + +/** + * FindBadFeatures finds all features with maximum feature-evidence < + * AdaptFeatureThresh. The list is ordered by increasing feature number. + * @param ClassTemplate Prototypes & tables for a class + * @param ProtoMask AND Mask for proto word + * @param ConfigMask AND Mask for config word + * @param NumFeatures Number of features in blob + * @param Features Array of features + * @param FeatureArray Array of bad features + * @param AdaptFeatureThreshold Threshold for bad features + * @param Debug Debugger flag: 1=debugger on + * @return Number of bad features in FeatureArray. + */ +int IntegerMatcher::FindBadFeatures( + INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + int16_t NumFeatures, + INT_FEATURE_ARRAY Features, + FEATURE_ID *FeatureArray, + int AdaptFeatureThreshold, + int Debug) { + auto *tables = new ScratchEvidence(); + int NumBadFeatures = 0; + + /* DEBUG opening heading */ + if (MatchDebuggingOn(Debug)) + tprintf("Find Bad Features -------------------------------------------\n"); + + tables->Clear(ClassTemplate); + + for (int Feature = 0; Feature < NumFeatures; Feature++) { + UpdateTablesForFeature( + ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature], + tables, Debug); + + /* Find Best Evidence for Current Feature */ + int best = 0; + assert(ClassTemplate->NumConfigs < MAX_NUM_CONFIGS); + for (int i = 0; i < MAX_NUM_CONFIGS && i < ClassTemplate->NumConfigs; i++) + if (tables->feature_evidence_[i] > best) + best = tables->feature_evidence_[i]; + + /* Find Bad Features */ + if (best < AdaptFeatureThreshold) { + *FeatureArray = Feature; + FeatureArray++; + NumBadFeatures++; + } + } + +#ifndef GRAPHICS_DISABLED + if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) + DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, + NumFeatures, Debug); +#endif + + if (MatchDebuggingOn(Debug)) + tprintf("Match Complete --------------------------------------------\n"); + + delete tables; + return NumBadFeatures; +} + + +IntegerMatcher::IntegerMatcher(tesseract::IntParam *classify_debug_level) + : classify_debug_level_(classify_debug_level) +{ + /* Initialize table for evidence to similarity lookup */ + for (int i = 0; i < SE_TABLE_SIZE; i++) { + uint32_t IntSimilarity = i << (27 - SE_TABLE_BITS); + double Similarity = (static_cast<double>(IntSimilarity)) / 65536.0 / 65536.0; + double evidence = Similarity / kSimilarityCenter; + evidence = 255.0 / (evidence * evidence + 1.0); + + if (kSEExponentialMultiplier > 0.0) { + double scale = 1.0 - exp(-kSEExponentialMultiplier) * + exp(kSEExponentialMultiplier * (static_cast<double>(i) / SE_TABLE_SIZE)); + evidence *= ClipToRange(scale, 0.0, 1.0); + } + + similarity_evidence_table_[i] = static_cast<uint8_t>(evidence + 0.5); + } + + /* Initialize evidence computation variables */ + evidence_table_mask_ = + ((1 << kEvidenceTableBits) - 1) << (9 - kEvidenceTableBits); + mult_trunc_shift_bits_ = (14 - kIntEvidenceTruncBits); + table_trunc_shift_bits_ = (27 - SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1)); + evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1); +} + +/*---------------------------------------------------------------------------- + Private Code +----------------------------------------------------------------------------*/ +void ScratchEvidence::Clear(const INT_CLASS class_template) { + memset(sum_feature_evidence_, 0, + class_template->NumConfigs * sizeof(sum_feature_evidence_[0])); + memset(proto_evidence_, 0, + class_template->NumProtos * sizeof(proto_evidence_[0])); +} + +void ScratchEvidence::ClearFeatureEvidence(const INT_CLASS class_template) { + memset(feature_evidence_, 0, + class_template->NumConfigs * sizeof(feature_evidence_[0])); +} + +/** + * Print debugging information for Configurations + */ +static void IMDebugConfiguration(int FeatureNum, uint16_t ActualProtoNum, + uint8_t Evidence, uint32_t ConfigWord) { + tprintf ("F = %3d, P = %3d, E = %3d, Configs = ", + FeatureNum, static_cast<int>(ActualProtoNum), static_cast<int>(Evidence)); + while (ConfigWord) { + if (ConfigWord & 1) + tprintf ("1"); + else + tprintf ("0"); + ConfigWord >>= 1; + } + tprintf ("\n"); +} + +/** + * Print debugging information for Configurations + */ +static void IMDebugConfigurationSum(int FeatureNum, uint8_t *FeatureEvidence, + int32_t ConfigCount) { + tprintf("F=%3d, C=", FeatureNum); + for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) { + tprintf("%4d", FeatureEvidence[ConfigNum]); + } + tprintf("\n"); +} + +/** + * For the given feature: prune protos, compute evidence, + * update Feature Evidence, Proto Evidence, and Sum of Feature + * Evidence tables. + * @param ClassTemplate Prototypes & tables for a class + * @param FeatureNum Current feature number (for DEBUG only) + * @param Feature Pointer to a feature struct + * @param tables Evidence tables + * @param Debug Debugger flag: 1=debugger on + * @return sum of feature evidence tables + */ +int IntegerMatcher::UpdateTablesForFeature( + INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + int FeatureNum, + const INT_FEATURE_STRUCT* Feature, + ScratchEvidence *tables, + int Debug) { + uint32_t ConfigWord; + uint32_t ProtoWord; + uint32_t ProtoNum; + uint32_t ActualProtoNum; + uint8_t proto_byte; + int32_t proto_word_offset; + int32_t proto_offset; + PROTO_SET ProtoSet; + uint32_t *ProtoPrunerPtr; + INT_PROTO Proto; + int ProtoSetIndex; + uint8_t Evidence; + uint32_t XFeatureAddress; + uint32_t YFeatureAddress; + uint32_t ThetaFeatureAddress; + + tables->ClearFeatureEvidence(ClassTemplate); + + /* Precompute Feature Address offset for Proto Pruning */ + XFeatureAddress = ((Feature->X >> 2) << 1); + YFeatureAddress = (NUM_PP_BUCKETS << 1) + ((Feature->Y >> 2) << 1); + ThetaFeatureAddress = (NUM_PP_BUCKETS << 2) + ((Feature->Theta >> 2) << 1); + + for (ProtoSetIndex = 0, ActualProtoNum = 0; + ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) { + ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; + ProtoPrunerPtr = reinterpret_cast<uint32_t *>((*ProtoSet).ProtoPruner); + for (ProtoNum = 0; ProtoNum < PROTOS_PER_PROTO_SET; + ProtoNum += (PROTOS_PER_PROTO_SET >> 1), ActualProtoNum += + (PROTOS_PER_PROTO_SET >> 1), ProtoMask++, ProtoPrunerPtr++) { + /* Prune Protos of current Proto Set */ + ProtoWord = *(ProtoPrunerPtr + XFeatureAddress); + ProtoWord &= *(ProtoPrunerPtr + YFeatureAddress); + ProtoWord &= *(ProtoPrunerPtr + ThetaFeatureAddress); + ProtoWord &= *ProtoMask; + + if (ProtoWord != 0) { + proto_byte = ProtoWord & 0xff; + ProtoWord >>= 8; + proto_word_offset = 0; + while (ProtoWord != 0 || proto_byte != 0) { + while (proto_byte == 0) { + proto_byte = ProtoWord & 0xff; + ProtoWord >>= 8; + proto_word_offset += 8; + } + proto_offset = offset_table[proto_byte] + proto_word_offset; + proto_byte = next_table[proto_byte]; + Proto = &(ProtoSet->Protos[ProtoNum + proto_offset]); + ConfigWord = Proto->Configs[0]; + int32_t A3 = (((Proto->A * (Feature->X - 128)) * 2) + - (Proto->B * (Feature->Y - 128)) + (Proto->C * 512)); + int32_t M3 = ((static_cast<int8_t>(Feature->Theta - Proto->Angle)) * + kIntThetaFudge) * 2; + + if (A3 < 0) + A3 = ~A3; + if (M3 < 0) + M3 = ~M3; + A3 >>= mult_trunc_shift_bits_; + M3 >>= mult_trunc_shift_bits_; + if (static_cast<uint32_t>(A3) > evidence_mult_mask_) + A3 = evidence_mult_mask_; + if (static_cast<uint32_t>(M3) > evidence_mult_mask_) + M3 = evidence_mult_mask_; + + uint32_t A4 = (A3 * A3) + (M3 * M3); + A4 >>= table_trunc_shift_bits_; + if (A4 > evidence_table_mask_) + Evidence = 0; + else + Evidence = similarity_evidence_table_[A4]; + + if (PrintFeatureMatchesOn (Debug)) + IMDebugConfiguration (FeatureNum, + ActualProtoNum + proto_offset, + Evidence, ConfigWord); + + ConfigWord &= *ConfigMask; + + uint8_t feature_evidence_index = 0; + uint8_t config_byte = 0; + while (ConfigWord != 0 || config_byte != 0) { + while (config_byte == 0) { + config_byte = ConfigWord & 0xff; + ConfigWord >>= 8; + feature_evidence_index += 8; + } + const uint8_t config_offset = + offset_table[config_byte] + feature_evidence_index - 8; + config_byte = next_table[config_byte]; + if (Evidence > tables->feature_evidence_[config_offset]) + tables->feature_evidence_[config_offset] = Evidence; + } + + uint8_t ProtoIndex = + ClassTemplate->ProtoLengths[ActualProtoNum + proto_offset]; + if (ProtoIndex > MAX_PROTO_INDEX) { + // Avoid buffer overflow. + // TODO: A better fix is still open. + ProtoIndex = MAX_PROTO_INDEX; + } + uint8_t* UINT8Pointer = + &(tables->proto_evidence_[ActualProtoNum + proto_offset][0]); + for (; Evidence > 0 && ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) { + if (Evidence > *UINT8Pointer) { + uint8_t Temp = *UINT8Pointer; + *UINT8Pointer = Evidence; + Evidence = Temp; + } + } + } + } + } + } + + if (PrintFeatureMatchesOn(Debug)) { + IMDebugConfigurationSum(FeatureNum, tables->feature_evidence_, + ClassTemplate->NumConfigs); + } + + int* IntPointer = tables->sum_feature_evidence_; + uint8_t* UINT8Pointer = tables->feature_evidence_; + int SumOverConfigs = 0; + for (int ConfigNum = ClassTemplate->NumConfigs; ConfigNum > 0; ConfigNum--) { + int evidence = *UINT8Pointer++; + SumOverConfigs += evidence; + *IntPointer++ += evidence; + } + return SumOverConfigs; +} + +/** + * Print debugging information for Configurations + */ +#ifndef GRAPHICS_DISABLED +void IntegerMatcher::DebugFeatureProtoError( + INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + const ScratchEvidence& tables, + int16_t NumFeatures, + int Debug) { + float ProtoConfigs[MAX_NUM_CONFIGS]; + int ConfigNum; + uint32_t ConfigWord; + int ProtoSetIndex; + uint16_t ProtoNum; + uint8_t ProtoWordNum; + PROTO_SET ProtoSet; + uint16_t ActualProtoNum; + + if (PrintMatchSummaryOn(Debug)) { + tprintf("Configuration Mask:\n"); + for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) + tprintf("%1d", (((*ConfigMask) >> ConfigNum) & 1)); + tprintf("\n"); + + tprintf("Feature Error for Configurations:\n"); + for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) { + tprintf( + " %5.1f", + 100.0 * (1.0 - static_cast<float>(tables.sum_feature_evidence_[ConfigNum]) + / NumFeatures / 256.0)); + } + tprintf("\n\n\n"); + } + + if (PrintMatchSummaryOn (Debug)) { + tprintf ("Proto Mask:\n"); + for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; + ProtoSetIndex++) { + ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); + for (ProtoWordNum = 0; ProtoWordNum < 2; + ProtoWordNum++, ProtoMask++) { + ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); + for (ProtoNum = 0; + ((ProtoNum < (PROTOS_PER_PROTO_SET >> 1)) + && (ActualProtoNum < ClassTemplate->NumProtos)); + ProtoNum++, ActualProtoNum++) + tprintf ("%1d", (((*ProtoMask) >> ProtoNum) & 1)); + tprintf ("\n"); + } + } + tprintf ("\n"); + } + + for (int i = 0; i < ClassTemplate->NumConfigs; i++) + ProtoConfigs[i] = 0; + + if (PrintProtoMatchesOn (Debug)) { + tprintf ("Proto Evidence:\n"); + for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; + ProtoSetIndex++) { + ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; + ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); + for (ProtoNum = 0; + ((ProtoNum < PROTOS_PER_PROTO_SET) && + (ActualProtoNum < ClassTemplate->NumProtos)); + ProtoNum++, ActualProtoNum++) { + tprintf ("P %3d =", ActualProtoNum); + int temp = 0; + for (uint8_t j = 0; j < ClassTemplate->ProtoLengths[ActualProtoNum]; j++) { + uint8_t data = tables.proto_evidence_[ActualProtoNum][j]; + tprintf(" %d", data); + temp += data; + } + + tprintf(" = %6.4f%%\n", + temp / 256.0 / ClassTemplate->ProtoLengths[ActualProtoNum]); + + ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0]; + ConfigNum = 0; + while (ConfigWord) { + tprintf ("%5d", ConfigWord & 1 ? temp : 0); + if (ConfigWord & 1) + ProtoConfigs[ConfigNum] += temp; + ConfigNum++; + ConfigWord >>= 1; + } + tprintf("\n"); + } + } + } + + if (PrintMatchSummaryOn (Debug)) { + tprintf ("Proto Error for Configurations:\n"); + for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) + tprintf (" %5.1f", + 100.0 * (1.0 - + ProtoConfigs[ConfigNum] / + ClassTemplate->ConfigLengths[ConfigNum] / 256.0)); + tprintf ("\n\n"); + } + + if (PrintProtoMatchesOn (Debug)) { + tprintf ("Proto Sum for Configurations:\n"); + for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) + tprintf (" %4.1f", ProtoConfigs[ConfigNum] / 256.0); + tprintf ("\n\n"); + + tprintf ("Proto Length for Configurations:\n"); + for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) + tprintf (" %4.1f", + static_cast<float>(ClassTemplate->ConfigLengths[ConfigNum])); + tprintf ("\n\n"); + } + +} + +void IntegerMatcher::DisplayProtoDebugInfo( + INT_CLASS ClassTemplate, + BIT_VECTOR ConfigMask, + const ScratchEvidence& tables, + bool SeparateDebugWindows) { + uint16_t ProtoNum; + uint16_t ActualProtoNum; + PROTO_SET ProtoSet; + int ProtoSetIndex; + + InitIntMatchWindowIfReqd(); + if (SeparateDebugWindows) { + InitFeatureDisplayWindowIfReqd(); + InitProtoDisplayWindowIfReqd(); + } + + for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; + ProtoSetIndex++) { + ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; + ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET; + for (ProtoNum = 0; + ((ProtoNum < PROTOS_PER_PROTO_SET) && + (ActualProtoNum < ClassTemplate->NumProtos)); + ProtoNum++, ActualProtoNum++) { + /* Compute Average for Actual Proto */ + int temp = 0; + for (uint8_t i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++) + temp += tables.proto_evidence_[ActualProtoNum][i]; + + temp /= ClassTemplate->ProtoLengths[ActualProtoNum]; + + if ((ProtoSet->Protos[ProtoNum]).Configs[0] & (*ConfigMask)) { + DisplayIntProto(ClassTemplate, ActualProtoNum, temp / 255.0); + } + } + } +} + + +void IntegerMatcher::DisplayFeatureDebugInfo( + INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + int16_t NumFeatures, + const INT_FEATURE_STRUCT* Features, + int AdaptFeatureThreshold, + int Debug, + bool SeparateDebugWindows) { + auto *tables = new ScratchEvidence(); + + tables->Clear(ClassTemplate); + + InitIntMatchWindowIfReqd(); + if (SeparateDebugWindows) { + InitFeatureDisplayWindowIfReqd(); + InitProtoDisplayWindowIfReqd(); + } + + for (int Feature = 0; Feature < NumFeatures; Feature++) { + UpdateTablesForFeature( + ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature], + tables, 0); + + /* Find Best Evidence for Current Feature */ + int best = 0; + assert(ClassTemplate->NumConfigs < MAX_NUM_CONFIGS); + for (int i = 0; i < MAX_NUM_CONFIGS && i < ClassTemplate->NumConfigs; i++) + if (tables->feature_evidence_[i] > best) + best = tables->feature_evidence_[i]; + + /* Update display for current feature */ + if (ClipMatchEvidenceOn(Debug)) { + if (best < AdaptFeatureThreshold) + DisplayIntFeature(&Features[Feature], 0.0); + else + DisplayIntFeature(&Features[Feature], 1.0); + } else { + DisplayIntFeature(&Features[Feature], best / 255.0); + } + } + + delete tables; +} +#endif + +/** + * Add sum of Proto Evidences into Sum Of Feature Evidence Array + */ +void ScratchEvidence::UpdateSumOfProtoEvidences( + INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask) { + + int *IntPointer; + uint32_t ConfigWord; + int ProtoSetIndex; + uint16_t ProtoNum; + PROTO_SET ProtoSet; + int NumProtos; + uint16_t ActualProtoNum; + + NumProtos = ClassTemplate->NumProtos; + + for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; + ProtoSetIndex++) { + ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; + ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); + for (ProtoNum = 0; + ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < NumProtos)); + ProtoNum++, ActualProtoNum++) { + int temp = 0; + for (uint8_t i = 0; i < MAX_PROTO_INDEX && + i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++) + temp += proto_evidence_[ActualProtoNum] [i]; + + ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0]; + ConfigWord &= *ConfigMask; + IntPointer = sum_feature_evidence_; + while (ConfigWord) { + if (ConfigWord & 1) + *IntPointer += temp; + IntPointer++; + ConfigWord >>= 1; + } + } + } +} + +/** + * Normalize Sum of Proto and Feature Evidence by dividing by the sum of + * the Feature Lengths and the Proto Lengths for each configuration. + */ +void ScratchEvidence::NormalizeSums( + INT_CLASS ClassTemplate, int16_t NumFeatures) { + + assert(ClassTemplate->NumConfigs < MAX_NUM_CONFIGS); + for (int i = 0; i < MAX_NUM_CONFIGS && i < ClassTemplate->NumConfigs; i++) { + sum_feature_evidence_[i] = (sum_feature_evidence_[i] << 8) / + (NumFeatures + ClassTemplate->ConfigLengths[i]); + } +} + +/** + * Find the best match for the current class and update the Result + * with the configuration and match rating. + * @return The best normalized sum of evidences + */ +int IntegerMatcher::FindBestMatch( + INT_CLASS class_template, + const ScratchEvidence &tables, + UnicharRating* result) { + int best_match = 0; + result->config = 0; + result->fonts.clear(); + result->fonts.reserve(class_template->NumConfigs); + + /* Find best match */ + assert(class_template->NumConfigs < MAX_NUM_CONFIGS); + for (int c = 0; c < MAX_NUM_CONFIGS && c < class_template->NumConfigs; ++c) { + int rating = tables.sum_feature_evidence_[c]; + if (*classify_debug_level_ > 2) + tprintf("Config %d, rating=%d\n", c, rating); + if (rating > best_match) { + result->config = c; + best_match = rating; + } + result->fonts.push_back(ScoredFont(c, rating)); + } + + // Compute confidence on a Probability scale. + result->rating = best_match / 65536.0f; + + return best_match; +} + +/** + * Applies the CN normalization factor to the given rating and returns + * the modified rating. + */ +float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length, + int normalization_factor, + int matcher_multiplier) { + int divisor = blob_length + matcher_multiplier; + return divisor == 0 ? 1.0f : (rating * blob_length + + matcher_multiplier * normalization_factor / 256.0f) / divisor; +} + +} // namespace tesseract diff --git a/tesseract/src/classify/intmatcher.h b/tesseract/src/classify/intmatcher.h new file mode 100644 index 00000000..8c6a1251 --- /dev/null +++ b/tesseract/src/classify/intmatcher.h @@ -0,0 +1,165 @@ +/****************************************************************************** + ** Filename: intmatcher.h + ** Purpose: Interface to high level generic classifier routines. + ** Author: Robert Moss + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ +#ifndef INTMATCHER_H +#define INTMATCHER_H + +#include "params.h" +#include "intproto.h" + +namespace tesseract { + +// Character fragments could be present in the trained templaes +// but turned on/off on the language-by-language basis or depending +// on particular properties of the corpus (e.g. when we expect the +// images to have low exposure). +extern BOOL_VAR_H(disable_character_fragments, false, + "Do not include character fragments in the" + " results of the classifier"); + +extern INT_VAR_H(classify_integer_matcher_multiplier, 10, + "Integer Matcher Multiplier 0-255: "); + +struct UnicharRating; + +struct CP_RESULT_STRUCT { + CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {} + + float Rating; + CLASS_ID Class; +}; + + +/**---------------------------------------------------------------------------- + Public Function Prototypes +----------------------------------------------------------------------------**/ + +#define SE_TABLE_BITS 9 +#define SE_TABLE_SIZE 512 + +struct ScratchEvidence { + uint8_t feature_evidence_[MAX_NUM_CONFIGS]; + int sum_feature_evidence_[MAX_NUM_CONFIGS]; + uint8_t proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]; + + void Clear(const INT_CLASS class_template); + void ClearFeatureEvidence(const INT_CLASS class_template); + void NormalizeSums(INT_CLASS ClassTemplate, int16_t NumFeatures); + void UpdateSumOfProtoEvidences( + INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask); +}; + + +class IntegerMatcher { + public: + // Integer Matcher Theta Fudge (0-255). + static const int kIntThetaFudge = 128; + // Bits in Similarity to Evidence Lookup (8-9). + static const int kEvidenceTableBits = 9; + // Integer Evidence Truncation Bits (8-14). + static const int kIntEvidenceTruncBits = 14; + // Similarity to Evidence Table Exponential Multiplier. + static const float kSEExponentialMultiplier; + // Center of Similarity Curve. + static const float kSimilarityCenter; + + IntegerMatcher(tesseract::IntParam *classify_debug_level); + + void Match(INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + int16_t NumFeatures, + const INT_FEATURE_STRUCT* Features, + tesseract::UnicharRating* Result, + int AdaptFeatureThreshold, + int Debug, + bool SeparateDebugWindows); + + // Applies the CN normalization factor to the given rating and returns + // the modified rating. + float ApplyCNCorrection(float rating, int blob_length, + int normalization_factor, int matcher_multiplier); + + int FindGoodProtos(INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + int16_t NumFeatures, + INT_FEATURE_ARRAY Features, + PROTO_ID *ProtoArray, + int AdaptProtoThreshold, + int Debug); + + int FindBadFeatures(INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + int16_t NumFeatures, + INT_FEATURE_ARRAY Features, + FEATURE_ID *FeatureArray, + int AdaptFeatureThreshold, + int Debug); + + private: + int UpdateTablesForFeature( + INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + int FeatureNum, + const INT_FEATURE_STRUCT* Feature, + ScratchEvidence *evidence, + int Debug); + + int FindBestMatch(INT_CLASS ClassTemplate, + const ScratchEvidence &tables, + tesseract::UnicharRating* Result); + +#ifndef GRAPHICS_DISABLED + void DebugFeatureProtoError( + INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + const ScratchEvidence &tables, + int16_t NumFeatures, + int Debug); + + void DisplayProtoDebugInfo( + INT_CLASS ClassTemplate, + BIT_VECTOR ConfigMask, + const ScratchEvidence &tables, + bool SeparateDebugWindows); + + void DisplayFeatureDebugInfo( + INT_CLASS ClassTemplate, + BIT_VECTOR ProtoMask, + BIT_VECTOR ConfigMask, + int16_t NumFeatures, + const INT_FEATURE_STRUCT* Features, + int AdaptFeatureThreshold, + int Debug, + bool SeparateDebugWindows); +#endif + + private: + tesseract::IntParam *classify_debug_level_; + uint8_t similarity_evidence_table_[SE_TABLE_SIZE]; + uint32_t evidence_table_mask_; + uint32_t mult_trunc_shift_bits_; + uint32_t table_trunc_shift_bits_; + uint32_t evidence_mult_mask_; +}; + +} // namespace tesseract + +#endif diff --git a/tesseract/src/classify/intproto.cpp b/tesseract/src/classify/intproto.cpp new file mode 100644 index 00000000..37a92f7b --- /dev/null +++ b/tesseract/src/classify/intproto.cpp @@ -0,0 +1,1743 @@ +/****************************************************************************** + ** Filename: intproto.c + ** Purpose: Definition of data structures for integer protos. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ +/*----------------------------------------------------------------------------- + Include Files and Type Defines +-----------------------------------------------------------------------------*/ + +#define _USE_MATH_DEFINES // for M_PI + +// Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "intproto.h" + +#include "classify.h" +#include "fontinfo.h" +#include "mfoutline.h" +#include "picofeat.h" +#include "points.h" +#include "shapetable.h" +#include "svmnode.h" + +#include "helpers.h" + +#include <algorithm> +#include <cmath> // for M_PI, std::floor +#include <cstdio> +#include <cassert> + +namespace tesseract { + +/* match debug display constants*/ +#define PROTO_PRUNER_SCALE (4.0) + +#define INT_DESCENDER (0.0 * INT_CHAR_NORM_RANGE) +#define INT_BASELINE (0.25 * INT_CHAR_NORM_RANGE) +#define INT_XHEIGHT (0.75 * INT_CHAR_NORM_RANGE) +#define INT_CAPHEIGHT (1.0 * INT_CHAR_NORM_RANGE) + +#define INT_XCENTER (0.5 * INT_CHAR_NORM_RANGE) +#define INT_YCENTER (0.5 * INT_CHAR_NORM_RANGE) +#define INT_XRADIUS (0.2 * INT_CHAR_NORM_RANGE) +#define INT_YRADIUS (0.2 * INT_CHAR_NORM_RANGE) +#define INT_MIN_X 0 +#define INT_MIN_Y 0 +#define INT_MAX_X INT_CHAR_NORM_RANGE +#define INT_MAX_Y INT_CHAR_NORM_RANGE + +/** define pad used to snap near horiz/vertical protos to horiz/vertical */ +#define HV_TOLERANCE (0.0025) /* approx 0.9 degrees */ + +typedef enum +{ StartSwitch, EndSwitch, LastSwitch } +SWITCH_TYPE; +#define MAX_NUM_SWITCHES 3 + +typedef struct +{ + SWITCH_TYPE Type; + int8_t X, Y; + int16_t YInit; + int16_t Delta; +} +FILL_SWITCH; + +typedef struct +{ + uint8_t NextSwitch; + uint8_t AngleStart, AngleEnd; + int8_t X; + int16_t YStart, YEnd; + int16_t StartDelta, EndDelta; + FILL_SWITCH Switch[MAX_NUM_SWITCHES]; +} +TABLE_FILLER; + +typedef struct +{ + int8_t X; + int8_t YStart, YEnd; + uint8_t AngleStart, AngleEnd; +} +FILL_SPEC; + + +/* constants for conversion from old inttemp format */ +#define OLD_MAX_NUM_CONFIGS 32 +#define OLD_WERDS_PER_CONFIG_VEC ((OLD_MAX_NUM_CONFIGS + BITS_PER_WERD - 1) /\ + BITS_PER_WERD) + +/*----------------------------------------------------------------------------- + Macros +-----------------------------------------------------------------------------*/ +/** macro for performing circular increments of bucket indices */ +#define CircularIncrement(i,r) (((i) < (r) - 1)?((i)++):((i) = 0)) + +/** macro for mapping floats to ints without bounds checking */ +#define MapParam(P,O,N) (std::floor(((P) + (O)) * (N))) + +/*--------------------------------------------------------------------------- + Private Function Prototypes +----------------------------------------------------------------------------*/ +float BucketStart(int Bucket, float Offset, int NumBuckets); + +float BucketEnd(int Bucket, float Offset, int NumBuckets); + +void DoFill(FILL_SPEC *FillSpec, + CLASS_PRUNER_STRUCT* Pruner, + uint32_t ClassMask, + uint32_t ClassCount, + uint32_t WordIndex); + +bool FillerDone(TABLE_FILLER* Filler); + +void FillPPCircularBits(uint32_t + ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], + int Bit, float Center, float Spread, bool debug); + +void FillPPLinearBits(uint32_t ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], + int Bit, float Center, float Spread, bool debug); + +void GetCPPadsForLevel(int Level, + float *EndPad, + float *SidePad, + float *AnglePad); + +ScrollView::Color GetMatchColorFor(float Evidence); + +void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill); + +void InitTableFiller(float EndPad, + float SidePad, + float AnglePad, + PROTO Proto, + TABLE_FILLER *Filler); + +#ifndef GRAPHICS_DISABLED +void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature, + ScrollView::Color color); + +void RenderIntProto(ScrollView *window, + INT_CLASS Class, + PROTO_ID ProtoId, + ScrollView::Color color); +#endif // !GRAPHICS_DISABLED + +/*----------------------------------------------------------------------------- + Global Data Definitions and Declarations +-----------------------------------------------------------------------------*/ + +#ifndef GRAPHICS_DISABLED +/* global display lists used to display proto and feature match information*/ +static ScrollView* IntMatchWindow = nullptr; +static ScrollView* FeatureDisplayWindow = nullptr; +static ScrollView* ProtoDisplayWindow = nullptr; +#endif + +/*----------------------------------------------------------------------------- + Variables +-----------------------------------------------------------------------------*/ + +/* control knobs */ +static INT_VAR(classify_num_cp_levels, 3, "Number of Class Pruner Levels"); +static double_VAR(classify_cp_angle_pad_loose, 45.0, + "Class Pruner Angle Pad Loose"); +static double_VAR(classify_cp_angle_pad_medium, 20.0, + "Class Pruner Angle Pad Medium"); +static double_VAR(classify_cp_angle_pad_tight, 10.0, + "CLass Pruner Angle Pad Tight"); +static double_VAR(classify_cp_end_pad_loose, 0.5, "Class Pruner End Pad Loose"); +static double_VAR(classify_cp_end_pad_medium, 0.5, "Class Pruner End Pad Medium"); +static double_VAR(classify_cp_end_pad_tight, 0.5, "Class Pruner End Pad Tight"); +static double_VAR(classify_cp_side_pad_loose, 2.5, "Class Pruner Side Pad Loose"); +static double_VAR(classify_cp_side_pad_medium, 1.2, "Class Pruner Side Pad Medium"); +static double_VAR(classify_cp_side_pad_tight, 0.6, "Class Pruner Side Pad Tight"); +static double_VAR(classify_pp_angle_pad, 45.0, "Proto Pruner Angle Pad"); +static double_VAR(classify_pp_end_pad, 0.5, "Proto Prune End Pad"); +static double_VAR(classify_pp_side_pad, 2.5, "Proto Pruner Side Pad"); + +/** + * This routine truncates Param to lie within the range + * of Min-Max inclusive. + * + * @param Param parameter value to be truncated + * @param Min, Max parameter limits (inclusive) + * + * @return Truncated parameter. + */ +static int TruncateParam(float Param, int Min, int Max) { + int result; + if (Param < Min) { + result = Min; + } else if (Param > Max) { + result = Max; + } else { + result = static_cast<int>(std::floor(Param)); + } + return result; +} + +/*----------------------------------------------------------------------------- + Public Code +-----------------------------------------------------------------------------*/ +/// Builds a feature from an FCOORD for position with all the necessary +/// clipping and rounding. +INT_FEATURE_STRUCT::INT_FEATURE_STRUCT(const FCOORD& pos, uint8_t theta) + : X(ClipToRange<int16_t>(static_cast<int16_t>(pos.x() + 0.5), 0, 255)), + Y(ClipToRange<int16_t>(static_cast<int16_t>(pos.y() + 0.5), 0, 255)), + Theta(theta), + CP_misses(0) { +} +/** Builds a feature from ints with all the necessary clipping and casting. */ +INT_FEATURE_STRUCT::INT_FEATURE_STRUCT(int x, int y, int theta) + : X(static_cast<uint8_t>(ClipToRange<int>(x, 0, UINT8_MAX))), + Y(static_cast<uint8_t>(ClipToRange<int>(y, 0, UINT8_MAX))), + Theta(static_cast<uint8_t>(ClipToRange<int>(theta, 0, UINT8_MAX))), + CP_misses(0) { +} + +/** + * This routine adds a new class structure to a set of + * templates. Classes have to be added to Templates in + * the order of increasing ClassIds. + * + * @param Templates templates to add new class to + * @param ClassId class id to associate new class with + * @param Class class data structure to add to templates + * + * Globals: none + */ +void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class) { + int Pruner; + + assert (LegalClassId (ClassId)); + if (ClassId != Templates->NumClasses) { + fprintf(stderr, "Please make sure that classes are added to templates"); + fprintf(stderr, " in increasing order of ClassIds\n"); + exit(1); + } + ClassForClassId (Templates, ClassId) = Class; + Templates->NumClasses++; + + if (Templates->NumClasses > MaxNumClassesIn (Templates)) { + Pruner = Templates->NumClassPruners++; + Templates->ClassPruners[Pruner] = new CLASS_PRUNER_STRUCT; + memset(Templates->ClassPruners[Pruner], 0, sizeof(CLASS_PRUNER_STRUCT)); + } +} /* AddIntClass */ + + +/** + * This routine returns the index of the next free config + * in Class. + * + * @param Class class to add new configuration to + * + * Globals: none + * + * @return Index of next free config. + */ +int AddIntConfig(INT_CLASS Class) { + int Index; + + assert(Class->NumConfigs < MAX_NUM_CONFIGS); + + Index = Class->NumConfigs++; + Class->ConfigLengths[Index] = 0; + return Index; +} /* AddIntConfig */ + + +/** + * This routine allocates the next free proto in Class and + * returns its index. + * + * @param Class class to add new proto to + * + * Globals: none + * + * @return Proto index of new proto. + */ +int AddIntProto(INT_CLASS Class) { + int Index; + int ProtoSetId; + PROTO_SET ProtoSet; + INT_PROTO Proto; + uint32_t *Word; + + if (Class->NumProtos >= MAX_NUM_PROTOS) + return (NO_PROTO); + + Index = Class->NumProtos++; + + if (Class->NumProtos > MaxNumIntProtosIn(Class)) { + ProtoSetId = Class->NumProtoSets++; + + ProtoSet = static_cast<PROTO_SET>(malloc(sizeof(PROTO_SET_STRUCT))); + Class->ProtoSets[ProtoSetId] = ProtoSet; + memset(ProtoSet, 0, sizeof(*ProtoSet)); + + /* reallocate space for the proto lengths and install in class */ + Class->ProtoLengths = + static_cast<uint8_t *>(realloc(Class->ProtoLengths, + MaxNumIntProtosIn(Class) * sizeof(uint8_t))); + memset(&Class->ProtoLengths[Index], 0, + sizeof(*Class->ProtoLengths) * (MaxNumIntProtosIn(Class) - Index)); + } + + /* initialize proto so its length is zero and it isn't in any configs */ + Class->ProtoLengths[Index] = 0; + Proto = ProtoForProtoId (Class, Index); + for (Word = Proto->Configs; + Word < Proto->Configs + WERDS_PER_CONFIG_VEC; *Word++ = 0); + + return (Index); +} + +/** + * This routine adds Proto to the class pruning tables + * for the specified class in Templates. + * + * Globals: + * - classify_num_cp_levels number of levels used in the class pruner + * @param Proto floating-pt proto to add to class pruner + * @param ClassId class id corresponding to Proto + * @param Templates set of templates containing class pruner + */ +void AddProtoToClassPruner (PROTO Proto, CLASS_ID ClassId, + INT_TEMPLATES Templates) +#define MAX_LEVEL 2 +{ + CLASS_PRUNER_STRUCT* Pruner; + uint32_t ClassMask; + uint32_t ClassCount; + uint32_t WordIndex; + int Level; + float EndPad, SidePad, AnglePad; + TABLE_FILLER TableFiller; + FILL_SPEC FillSpec; + + Pruner = CPrunerFor (Templates, ClassId); + WordIndex = CPrunerWordIndexFor (ClassId); + ClassMask = CPrunerMaskFor (MAX_LEVEL, ClassId); + + for (Level = classify_num_cp_levels - 1; Level >= 0; Level--) { + GetCPPadsForLevel(Level, &EndPad, &SidePad, &AnglePad); + ClassCount = CPrunerMaskFor (Level, ClassId); + InitTableFiller(EndPad, SidePad, AnglePad, Proto, &TableFiller); + + while (!FillerDone (&TableFiller)) { + GetNextFill(&TableFiller, &FillSpec); + DoFill(&FillSpec, Pruner, ClassMask, ClassCount, WordIndex); + } + } +} /* AddProtoToClassPruner */ + +/** + * This routine updates the proto pruner lookup tables + * for Class to include a new proto identified by ProtoId + * and described by Proto. + * @param Proto floating-pt proto to be added to proto pruner + * @param ProtoId id of proto + * @param Class integer class that contains desired proto pruner + * @param debug debug flag + * @note Globals: none + */ +void AddProtoToProtoPruner(PROTO Proto, int ProtoId, + INT_CLASS Class, bool debug) { + float Angle, X, Y, Length; + float Pad; + int Index; + PROTO_SET ProtoSet; + + if (ProtoId >= Class->NumProtos) + tprintf("AddProtoToProtoPruner:assert failed: %d < %d", + ProtoId, Class->NumProtos); + assert(ProtoId < Class->NumProtos); + + Index = IndexForProto (ProtoId); + ProtoSet = Class->ProtoSets[SetForProto (ProtoId)]; + + Angle = Proto->Angle; +#ifndef _WIN32 + assert(!std::isnan(Angle)); +#endif + + FillPPCircularBits (ProtoSet->ProtoPruner[PRUNER_ANGLE], Index, + Angle + ANGLE_SHIFT, classify_pp_angle_pad / 360.0, + debug); + + Angle *= 2.0 * M_PI; + Length = Proto->Length; + + X = Proto->X + X_SHIFT; + Pad = std::max(fabs (cos (Angle)) * (Length / 2.0 + + classify_pp_end_pad * + GetPicoFeatureLength ()), + fabs (sin (Angle)) * (classify_pp_side_pad * + GetPicoFeatureLength ())); + + FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_X], Index, X, Pad, debug); + + Y = Proto->Y + Y_SHIFT; + Pad = std::max(fabs (sin (Angle)) * (Length / 2.0 + + classify_pp_end_pad * + GetPicoFeatureLength ()), + fabs (cos (Angle)) * (classify_pp_side_pad * + GetPicoFeatureLength ())); + + FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_Y], Index, Y, Pad, debug); +} /* AddProtoToProtoPruner */ + +/** + * Returns a quantized bucket for the given param shifted by offset, + * notionally (param + offset) * num_buckets, but clipped and casted to the + * appropriate type. + */ +uint8_t Bucket8For(float param, float offset, int num_buckets) { + int bucket = IntCastRounded(MapParam(param, offset, num_buckets)); + return static_cast<uint8_t>(ClipToRange<int>(bucket, 0, num_buckets - 1)); +} +uint16_t Bucket16For(float param, float offset, int num_buckets) { + int bucket = IntCastRounded(MapParam(param, offset, num_buckets)); + return static_cast<uint16_t>(ClipToRange<int>(bucket, 0, num_buckets - 1)); +} + +/** + * Returns a quantized bucket for the given circular param shifted by offset, + * notionally (param + offset) * num_buckets, but modded and casted to the + * appropriate type. + */ +uint8_t CircBucketFor(float param, float offset, int num_buckets) { + int bucket = IntCastRounded(MapParam(param, offset, num_buckets)); + return static_cast<uint8_t>(Modulo(bucket, num_buckets)); +} /* CircBucketFor */ + + +#ifndef GRAPHICS_DISABLED +/** + * This routine clears the global feature and proto + * display lists. + * + * Globals: + * - FeatureShapes display list for features + * - ProtoShapes display list for protos + */ +void UpdateMatchDisplay() { + if (IntMatchWindow != nullptr) + IntMatchWindow->Update(); +} /* ClearMatchDisplay */ +#endif + +/** + * This operation updates the config vectors of all protos + * in Class to indicate that the protos with 1's in Config + * belong to a new configuration identified by ConfigId. + * It is assumed that the length of the Config bit vector is + * equal to the number of protos in Class. + * @param Config config to be added to class + * @param ConfigId id to be used for new config + * @param Class class to add new config to + */ +void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class) { + int ProtoId; + INT_PROTO Proto; + int TotalLength; + + for (ProtoId = 0, TotalLength = 0; + ProtoId < Class->NumProtos; ProtoId++) { + if (test_bit(Config, ProtoId)) { + Proto = ProtoForProtoId(Class, ProtoId); + SET_BIT(Proto->Configs, ConfigId); + TotalLength += Class->ProtoLengths[ProtoId]; + } + } + Class->ConfigLengths[ConfigId] = TotalLength; +} /* ConvertConfig */ + +/** + * This routine converts Proto to integer format and + * installs it as ProtoId in Class. + * @param Proto floating-pt proto to be converted to integer format + * @param ProtoId id of proto + * @param Class integer class to add converted proto to + */ +void Classify::ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class) { + assert(ProtoId < Class->NumProtos); + + INT_PROTO P = ProtoForProtoId(Class, ProtoId); + + float Param = Proto->A * 128; + P->A = TruncateParam(Param, -128, 127); + + Param = -Proto->B * 256; + P->B = TruncateParam(Param, 0, 255); + + Param = Proto->C * 128; + P->C = TruncateParam(Param, -128, 127); + + Param = Proto->Angle * 256; + if (Param < 0 || Param >= 256) + P->Angle = 0; + else + P->Angle = static_cast<uint8_t>(Param); + + /* round proto length to nearest integer number of pico-features */ + Param = (Proto->Length / GetPicoFeatureLength()) + 0.5; + Class->ProtoLengths[ProtoId] = TruncateParam(Param, 1, 255); + if (classify_learning_debug_level >= 2) + tprintf("Converted ffeat to (A=%d,B=%d,C=%d,L=%d)", + P->A, P->B, P->C, Class->ProtoLengths[ProtoId]); +} /* ConvertProto */ + +/** + * This routine converts from the old floating point format + * to the new integer format. + * @param FloatProtos prototypes in old floating pt format + * @param target_unicharset the UNICHARSET to use + * @return New set of training templates in integer format. + * @note Globals: none + */ +INT_TEMPLATES Classify::CreateIntTemplates(CLASSES FloatProtos, + const UNICHARSET& + target_unicharset) { + INT_TEMPLATES IntTemplates; + CLASS_TYPE FClass; + INT_CLASS IClass; + int ClassId; + int ProtoId; + int ConfigId; + + IntTemplates = NewIntTemplates(); + + for (ClassId = 0; ClassId < target_unicharset.size(); ClassId++) { + FClass = &(FloatProtos[ClassId]); + if (FClass->NumProtos == 0 && FClass->NumConfigs == 0 && + strcmp(target_unicharset.id_to_unichar(ClassId), " ") != 0) { + tprintf("Warning: no protos/configs for %s in CreateIntTemplates()\n", + target_unicharset.id_to_unichar(ClassId)); + } + assert(UnusedClassIdIn(IntTemplates, ClassId)); + IClass = NewIntClass(FClass->NumProtos, FClass->NumConfigs); + FontSet fs; + fs.size = FClass->font_set.size(); + fs.configs = new int[fs.size]; + for (int i = 0; i < fs.size; ++i) { + fs.configs[i] = FClass->font_set.get(i); + } + if (this->fontset_table_.contains(fs)) { + IClass->font_set_id = this->fontset_table_.get_id(fs); + delete[] fs.configs; + } else { + IClass->font_set_id = this->fontset_table_.push_back(fs); + } + AddIntClass(IntTemplates, ClassId, IClass); + + for (ProtoId = 0; ProtoId < FClass->NumProtos; ProtoId++) { + AddIntProto(IClass); + ConvertProto(ProtoIn(FClass, ProtoId), ProtoId, IClass); + AddProtoToProtoPruner(ProtoIn(FClass, ProtoId), ProtoId, IClass, + classify_learning_debug_level >= 2); + AddProtoToClassPruner(ProtoIn(FClass, ProtoId), ClassId, IntTemplates); + } + + for (ConfigId = 0; ConfigId < FClass->NumConfigs; ConfigId++) { + AddIntConfig(IClass); + ConvertConfig(FClass->Configurations[ConfigId], ConfigId, IClass); + } + } + return (IntTemplates); +} /* CreateIntTemplates */ + +#ifndef GRAPHICS_DISABLED +/** + * This routine renders the specified feature into a + * global display list. + * + * Globals: + * - FeatureShapes global display list for features + * @param Feature pico-feature to be displayed + * @param Evidence best evidence for this feature (0-1) + */ +void DisplayIntFeature(const INT_FEATURE_STRUCT *Feature, float Evidence) { + ScrollView::Color color = GetMatchColorFor(Evidence); + RenderIntFeature(IntMatchWindow, Feature, color); + if (FeatureDisplayWindow) { + RenderIntFeature(FeatureDisplayWindow, Feature, color); + } +} /* DisplayIntFeature */ + +/** + * This routine renders the specified proto into a + * global display list. + * + * Globals: + * - ProtoShapes global display list for protos + * @param Class class to take proto from + * @param ProtoId id of proto in Class to be displayed + * @param Evidence total evidence for proto (0-1) + */ +void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, float Evidence) { + ScrollView::Color color = GetMatchColorFor(Evidence); + RenderIntProto(IntMatchWindow, Class, ProtoId, color); + if (ProtoDisplayWindow) { + RenderIntProto(ProtoDisplayWindow, Class, ProtoId, color); + } +} /* DisplayIntProto */ +#endif + +/** + * This routine creates a new integer class data structure + * and returns it. Sufficient space is allocated + * to handle the specified number of protos and configs. + * @param MaxNumProtos number of protos to allocate space for + * @param MaxNumConfigs number of configs to allocate space for + * @return New class created. + * @note Globals: none + */ +INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs) { + INT_CLASS Class; + PROTO_SET ProtoSet; + int i; + + assert(MaxNumConfigs <= MAX_NUM_CONFIGS); + + Class = static_cast<INT_CLASS>(malloc(sizeof(INT_CLASS_STRUCT))); + Class->NumProtoSets = ((MaxNumProtos + PROTOS_PER_PROTO_SET - 1) / + PROTOS_PER_PROTO_SET); + + assert(Class->NumProtoSets <= MAX_NUM_PROTO_SETS); + + Class->NumProtos = 0; + Class->NumConfigs = 0; + + for (i = 0; i < Class->NumProtoSets; i++) { + /* allocate space for a proto set, install in class, and initialize */ + ProtoSet = static_cast<PROTO_SET>(malloc(sizeof(PROTO_SET_STRUCT))); + memset(ProtoSet, 0, sizeof(*ProtoSet)); + Class->ProtoSets[i] = ProtoSet; + + /* allocate space for the proto lengths and install in class */ + } + if (MaxNumIntProtosIn (Class) > 0) { + Class->ProtoLengths = + static_cast<uint8_t *>(malloc(MaxNumIntProtosIn (Class) * sizeof (uint8_t))); + memset(Class->ProtoLengths, 0, + MaxNumIntProtosIn(Class) * sizeof(*Class->ProtoLengths)); + } else { + Class->ProtoLengths = nullptr; + } + memset(Class->ConfigLengths, 0, sizeof(Class->ConfigLengths)); + + return (Class); + +} /* NewIntClass */ + +static void free_int_class(INT_CLASS int_class) { + int i; + + for (i = 0; i < int_class->NumProtoSets; i++) { + free (int_class->ProtoSets[i]); + } + if (int_class->ProtoLengths != nullptr) { + free (int_class->ProtoLengths); + } + free(int_class); +} + +/** + * This routine allocates a new set of integer templates + * initialized to hold 0 classes. + * @return The integer templates created. + * @note Globals: none + */ +INT_TEMPLATES NewIntTemplates() { + INT_TEMPLATES T; + int i; + + T = static_cast<INT_TEMPLATES>(malloc (sizeof (INT_TEMPLATES_STRUCT))); + T->NumClasses = 0; + T->NumClassPruners = 0; + + for (i = 0; i < MAX_NUM_CLASSES; i++) + ClassForClassId (T, i) = nullptr; + + return (T); +} /* NewIntTemplates */ + + +/*---------------------------------------------------------------------------*/ +void free_int_templates(INT_TEMPLATES templates) { + int i; + + for (i = 0; i < templates->NumClasses; i++) + free_int_class(templates->Class[i]); + for (i = 0; i < templates->NumClassPruners; i++) + delete templates->ClassPruners[i]; + free(templates); +} + +/** + * This routine reads a set of integer templates from + * File. File must already be open and must be in the + * correct binary format. + * @param fp open file to read templates from + * @return Pointer to integer templates read from File. + * @note Globals: none + */ +INT_TEMPLATES Classify::ReadIntTemplates(TFile *fp) { + int i, j, w, x, y, z; + int unicharset_size; + int version_id = 0; + INT_TEMPLATES Templates; + CLASS_PRUNER_STRUCT* Pruner; + INT_CLASS Class; + uint8_t *Lengths; + PROTO_SET ProtoSet; + + /* variables for conversion from older inttemp formats */ + int b, bit_number, last_cp_bit_number, new_b, new_i, new_w; + CLASS_ID class_id, max_class_id; + auto *IndexFor = new int16_t[MAX_NUM_CLASSES]; + auto *ClassIdFor = new CLASS_ID[MAX_NUM_CLASSES]; + auto **TempClassPruner = + new CLASS_PRUNER_STRUCT*[MAX_NUM_CLASS_PRUNERS]; + uint32_t SetBitsForMask = // word with NUM_BITS_PER_CLASS + (1 << NUM_BITS_PER_CLASS) - 1; // set starting at bit 0 + uint32_t Mask, NewMask, ClassBits; + int MaxNumConfigs = MAX_NUM_CONFIGS; + int WerdsPerConfigVec = WERDS_PER_CONFIG_VEC; + + /* first read the high level template struct */ + Templates = NewIntTemplates(); + // Read Templates in parts for 64 bit compatibility. + if (fp->FReadEndian(&unicharset_size, sizeof(unicharset_size), 1) != 1) + tprintf("Bad read of inttemp!\n"); + if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses), + 1) != 1 || + fp->FReadEndian(&Templates->NumClassPruners, + sizeof(Templates->NumClassPruners), 1) != 1) + tprintf("Bad read of inttemp!\n"); + if (Templates->NumClasses < 0) { + // This file has a version id! + version_id = -Templates->NumClasses; + if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses), + 1) != 1) + tprintf("Bad read of inttemp!\n"); + } + + if (version_id < 3) { + MaxNumConfigs = OLD_MAX_NUM_CONFIGS; + WerdsPerConfigVec = OLD_WERDS_PER_CONFIG_VEC; + } + + if (version_id < 2) { + if (fp->FReadEndian(IndexFor, sizeof(IndexFor[0]), unicharset_size) != + unicharset_size) { + tprintf("Bad read of inttemp!\n"); + } + if (fp->FReadEndian(ClassIdFor, sizeof(ClassIdFor[0]), + Templates->NumClasses) != Templates->NumClasses) { + tprintf("Bad read of inttemp!\n"); + } + } + + /* then read in the class pruners */ + const int kNumBuckets = + NUM_CP_BUCKETS * NUM_CP_BUCKETS * NUM_CP_BUCKETS * WERDS_PER_CP_VECTOR; + for (i = 0; i < Templates->NumClassPruners; i++) { + Pruner = new CLASS_PRUNER_STRUCT; + if (fp->FReadEndian(Pruner, sizeof(Pruner->p[0][0][0][0]), kNumBuckets) != + kNumBuckets) { + tprintf("Bad read of inttemp!\n"); + } + if (version_id < 2) { + TempClassPruner[i] = Pruner; + } else { + Templates->ClassPruners[i] = Pruner; + } + } + + /* fix class pruners if they came from an old version of inttemp */ + if (version_id < 2) { + // Allocate enough class pruners to cover all the class ids. + max_class_id = 0; + for (i = 0; i < Templates->NumClasses; i++) + if (ClassIdFor[i] > max_class_id) + max_class_id = ClassIdFor[i]; + for (i = 0; i <= CPrunerIdFor(max_class_id); i++) { + Templates->ClassPruners[i] = new CLASS_PRUNER_STRUCT; + memset(Templates->ClassPruners[i], 0, sizeof(CLASS_PRUNER_STRUCT)); + } + // Convert class pruners from the old format (indexed by class index) + // to the new format (indexed by class id). + last_cp_bit_number = NUM_BITS_PER_CLASS * Templates->NumClasses - 1; + for (i = 0; i < Templates->NumClassPruners; i++) { + for (x = 0; x < NUM_CP_BUCKETS; x++) + for (y = 0; y < NUM_CP_BUCKETS; y++) + for (z = 0; z < NUM_CP_BUCKETS; z++) + for (w = 0; w < WERDS_PER_CP_VECTOR; w++) { + if (TempClassPruner[i]->p[x][y][z][w] == 0) + continue; + for (b = 0; b < BITS_PER_WERD; b += NUM_BITS_PER_CLASS) { + bit_number = i * BITS_PER_CP_VECTOR + w * BITS_PER_WERD + b; + if (bit_number > last_cp_bit_number) + break; // the rest of the bits in this word are not used + class_id = ClassIdFor[bit_number / NUM_BITS_PER_CLASS]; + // Single out NUM_BITS_PER_CLASS bits relating to class_id. + Mask = SetBitsForMask << b; + ClassBits = TempClassPruner[i]->p[x][y][z][w] & Mask; + // Move these bits to the new position in which they should + // appear (indexed corresponding to the class_id). + new_i = CPrunerIdFor(class_id); + new_w = CPrunerWordIndexFor(class_id); + new_b = CPrunerBitIndexFor(class_id) * NUM_BITS_PER_CLASS; + if (new_b > b) { + ClassBits <<= (new_b - b); + } else { + ClassBits >>= (b - new_b); + } + // Copy bits relating to class_id to the correct position + // in Templates->ClassPruner. + NewMask = SetBitsForMask << new_b; + Templates->ClassPruners[new_i]->p[x][y][z][new_w] &= ~NewMask; + Templates->ClassPruners[new_i]->p[x][y][z][new_w] |= ClassBits; + } + } + } + for (i = 0; i < Templates->NumClassPruners; i++) { + delete TempClassPruner[i]; + } + } + + /* then read in each class */ + for (i = 0; i < Templates->NumClasses; i++) { + /* first read in the high level struct for the class */ + Class = static_cast<INT_CLASS>(malloc (sizeof (INT_CLASS_STRUCT))); + if (fp->FReadEndian(&Class->NumProtos, sizeof(Class->NumProtos), 1) != 1 || + fp->FRead(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1) != 1 || + fp->FRead(&Class->NumConfigs, sizeof(Class->NumConfigs), 1) != 1) + tprintf("Bad read of inttemp!\n"); + if (version_id == 0) { + // Only version 0 writes 5 pointless pointers to the file. + for (j = 0; j < 5; ++j) { + int32_t junk; + if (fp->FRead(&junk, sizeof(junk), 1) != 1) + tprintf("Bad read of inttemp!\n"); + } + } + int num_configs = version_id < 4 ? MaxNumConfigs : Class->NumConfigs; + ASSERT_HOST(num_configs <= MaxNumConfigs); + if (fp->FReadEndian(Class->ConfigLengths, sizeof(uint16_t), num_configs) != + num_configs) { + tprintf("Bad read of inttemp!\n"); + } + if (version_id < 2) { + ClassForClassId (Templates, ClassIdFor[i]) = Class; + } else { + ClassForClassId (Templates, i) = Class; + } + + /* then read in the proto lengths */ + Lengths = nullptr; + if (MaxNumIntProtosIn (Class) > 0) { + Lengths = static_cast<uint8_t *>(malloc(sizeof(uint8_t) * MaxNumIntProtosIn(Class))); + if (fp->FRead(Lengths, sizeof(uint8_t), MaxNumIntProtosIn(Class)) != + MaxNumIntProtosIn(Class)) + tprintf("Bad read of inttemp!\n"); + } + Class->ProtoLengths = Lengths; + + /* then read in the proto sets */ + for (j = 0; j < Class->NumProtoSets; j++) { + ProtoSet = static_cast<PROTO_SET>(malloc(sizeof(PROTO_SET_STRUCT))); + int num_buckets = NUM_PP_PARAMS * NUM_PP_BUCKETS * WERDS_PER_PP_VECTOR; + if (fp->FReadEndian(&ProtoSet->ProtoPruner, + sizeof(ProtoSet->ProtoPruner[0][0][0]), + num_buckets) != num_buckets) + tprintf("Bad read of inttemp!\n"); + for (x = 0; x < PROTOS_PER_PROTO_SET; x++) { + if (fp->FRead(&ProtoSet->Protos[x].A, sizeof(ProtoSet->Protos[x].A), + 1) != 1 || + fp->FRead(&ProtoSet->Protos[x].B, sizeof(ProtoSet->Protos[x].B), + 1) != 1 || + fp->FRead(&ProtoSet->Protos[x].C, sizeof(ProtoSet->Protos[x].C), + 1) != 1 || + fp->FRead(&ProtoSet->Protos[x].Angle, + sizeof(ProtoSet->Protos[x].Angle), 1) != 1) + tprintf("Bad read of inttemp!\n"); + if (fp->FReadEndian(&ProtoSet->Protos[x].Configs, + sizeof(ProtoSet->Protos[x].Configs[0]), + WerdsPerConfigVec) != WerdsPerConfigVec) + tprintf("Bad read of inttemp!\n"); + } + Class->ProtoSets[j] = ProtoSet; + } + if (version_id < 4) { + Class->font_set_id = -1; + } else { + fp->FReadEndian(&Class->font_set_id, sizeof(Class->font_set_id), 1); + } + } + + if (version_id < 2) { + /* add an empty nullptr class with class id 0 */ + assert(UnusedClassIdIn (Templates, 0)); + ClassForClassId (Templates, 0) = NewIntClass (1, 1); + ClassForClassId (Templates, 0)->font_set_id = -1; + Templates->NumClasses++; + /* make sure the classes are contiguous */ + for (i = 0; i < MAX_NUM_CLASSES; i++) { + if (i < Templates->NumClasses) { + if (ClassForClassId (Templates, i) == nullptr) { + fprintf(stderr, "Non-contiguous class ids in inttemp\n"); + exit(1); + } + } else { + if (ClassForClassId (Templates, i) != nullptr) { + fprintf(stderr, "Class id %d exceeds NumClassesIn (Templates) %d\n", + i, Templates->NumClasses); + exit(1); + } + } + } + } + if (version_id >= 4) { + using namespace std::placeholders; // for _1, _2 + this->fontinfo_table_.read(fp, std::bind(read_info, _1, _2)); + if (version_id >= 5) { + this->fontinfo_table_.read(fp, + std::bind(read_spacing_info, _1, _2)); + } + this->fontset_table_.read(fp, std::bind(read_set, _1, _2)); + } + + // Clean up. + delete[] IndexFor; + delete[] ClassIdFor; + delete[] TempClassPruner; + + return (Templates); +} /* ReadIntTemplates */ + + +#ifndef GRAPHICS_DISABLED +/** + * This routine sends the shapes in the global display + * lists to the match debugger window. + * + * Globals: + * - FeatureShapes display list containing feature matches + * - ProtoShapes display list containing proto matches + */ +void Classify::ShowMatchDisplay() { + InitIntMatchWindowIfReqd(); + if (ProtoDisplayWindow) { + ProtoDisplayWindow->Clear(); + } + if (FeatureDisplayWindow) { + FeatureDisplayWindow->Clear(); + } + ClearFeatureSpaceWindow( + static_cast<NORM_METHOD>(static_cast<int>(classify_norm_method)), + IntMatchWindow); + IntMatchWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y, + INT_MAX_X, INT_MAX_Y); + if (ProtoDisplayWindow) { + ProtoDisplayWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y, + INT_MAX_X, INT_MAX_Y); + } + if (FeatureDisplayWindow) { + FeatureDisplayWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y, + INT_MAX_X, INT_MAX_Y); + } +} /* ShowMatchDisplay */ + +/// Clears the given window and draws the featurespace guides for the +/// appropriate normalization method. +void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView* window) { + window->Clear(); + + window->Pen(ScrollView::GREY); + // Draw the feature space limit rectangle. + window->Rectangle(0, 0, INT_MAX_X, INT_MAX_Y); + if (norm_method == baseline) { + window->SetCursor(0, INT_DESCENDER); + window->DrawTo(INT_MAX_X, INT_DESCENDER); + window->SetCursor(0, INT_BASELINE); + window->DrawTo(INT_MAX_X, INT_BASELINE); + window->SetCursor(0, INT_XHEIGHT); + window->DrawTo(INT_MAX_X, INT_XHEIGHT); + window->SetCursor(0, INT_CAPHEIGHT); + window->DrawTo(INT_MAX_X, INT_CAPHEIGHT); + } else { + window->Rectangle(INT_XCENTER - INT_XRADIUS, INT_YCENTER - INT_YRADIUS, + INT_XCENTER + INT_XRADIUS, INT_YCENTER + INT_YRADIUS); + } +} +#endif + +/** + * This routine writes Templates to File. The format + * is an efficient binary format. File must already be open + * for writing. + * @param File open file to write templates to + * @param Templates templates to save into File + * @param target_unicharset the UNICHARSET to use + */ +void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, + const UNICHARSET& target_unicharset) { + int i, j; + INT_CLASS Class; + int unicharset_size = target_unicharset.size(); + int version_id = -5; // When negated by the reader -1 becomes +1 etc. + + if (Templates->NumClasses != unicharset_size) { + tprintf("Warning: executing WriteIntTemplates() with %d classes in" + " Templates, while target_unicharset size is %d\n", + Templates->NumClasses, unicharset_size); + } + + /* first write the high level template struct */ + fwrite(&unicharset_size, sizeof(unicharset_size), 1, File); + fwrite(&version_id, sizeof(version_id), 1, File); + fwrite(&Templates->NumClassPruners, sizeof(Templates->NumClassPruners), + 1, File); + fwrite(&Templates->NumClasses, sizeof(Templates->NumClasses), 1, File); + + /* then write out the class pruners */ + for (i = 0; i < Templates->NumClassPruners; i++) + fwrite(Templates->ClassPruners[i], + sizeof(CLASS_PRUNER_STRUCT), 1, File); + + /* then write out each class */ + for (i = 0; i < Templates->NumClasses; i++) { + Class = Templates->Class[i]; + + /* first write out the high level struct for the class */ + fwrite(&Class->NumProtos, sizeof(Class->NumProtos), 1, File); + fwrite(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1, File); + ASSERT_HOST(Class->NumConfigs == this->fontset_table_.get(Class->font_set_id).size); + fwrite(&Class->NumConfigs, sizeof(Class->NumConfigs), 1, File); + for (j = 0; j < Class->NumConfigs; ++j) { + fwrite(&Class->ConfigLengths[j], sizeof(uint16_t), 1, File); + } + + /* then write out the proto lengths */ + if (MaxNumIntProtosIn (Class) > 0) { + fwrite(Class->ProtoLengths, sizeof(uint8_t), + MaxNumIntProtosIn(Class), File); + } + + /* then write out the proto sets */ + for (j = 0; j < Class->NumProtoSets; j++) + fwrite(Class->ProtoSets[j], sizeof(PROTO_SET_STRUCT), 1, File); + + /* then write the fonts info */ + fwrite(&Class->font_set_id, sizeof(int), 1, File); + } + + /* Write the fonts info tables */ + using namespace std::placeholders; // for _1, _2 + this->fontinfo_table_.write(File, std::bind(write_info, _1, _2)); + this->fontinfo_table_.write(File, + std::bind(write_spacing_info, _1, _2)); + this->fontset_table_.write(File, std::bind(write_set, _1, _2)); +} /* WriteIntTemplates */ + +/*----------------------------------------------------------------------------- + Private Code +-----------------------------------------------------------------------------*/ +/** + * This routine returns the parameter value which + * corresponds to the beginning of the specified bucket. + * The bucket number should have been generated using the + * BucketFor() function with parameters Offset and NumBuckets. + * @param Bucket bucket whose start is to be computed + * @param Offset offset used to map params to buckets + * @param NumBuckets total number of buckets + * @return Param value corresponding to start position of Bucket. + * @note Globals: none + */ +float BucketStart(int Bucket, float Offset, int NumBuckets) { + return ((static_cast<float>(Bucket) / NumBuckets) - Offset); + +} /* BucketStart */ + +/** + * This routine returns the parameter value which + * corresponds to the end of the specified bucket. + * The bucket number should have been generated using the + * BucketFor() function with parameters Offset and NumBuckets. + * @param Bucket bucket whose end is to be computed + * @param Offset offset used to map params to buckets + * @param NumBuckets total number of buckets + * @return Param value corresponding to end position of Bucket. + * @note Globals: none + */ +float BucketEnd(int Bucket, float Offset, int NumBuckets) { + return ((static_cast<float>(Bucket + 1) / NumBuckets) - Offset); +} /* BucketEnd */ + +/** + * This routine fills in the section of a class pruner + * corresponding to a single x value for a single proto of + * a class. + * @param FillSpec specifies which bits to fill in pruner + * @param Pruner class pruner to be filled + * @param ClassMask indicates which bits to change in each word + * @param ClassCount indicates what to change bits to + * @param WordIndex indicates which word to change + */ +void DoFill(FILL_SPEC *FillSpec, + CLASS_PRUNER_STRUCT* Pruner, + uint32_t ClassMask, + uint32_t ClassCount, + uint32_t WordIndex) { + int X, Y, Angle; + uint32_t OldWord; + + X = FillSpec->X; + if (X < 0) + X = 0; + if (X >= NUM_CP_BUCKETS) + X = NUM_CP_BUCKETS - 1; + + if (FillSpec->YStart < 0) + FillSpec->YStart = 0; + if (FillSpec->YEnd >= NUM_CP_BUCKETS) + FillSpec->YEnd = NUM_CP_BUCKETS - 1; + + for (Y = FillSpec->YStart; Y <= FillSpec->YEnd; Y++) + for (Angle = FillSpec->AngleStart; ; + CircularIncrement(Angle, NUM_CP_BUCKETS)) { + OldWord = Pruner->p[X][Y][Angle][WordIndex]; + if (ClassCount > (OldWord & ClassMask)) { + OldWord &= ~ClassMask; + OldWord |= ClassCount; + Pruner->p[X][Y][Angle][WordIndex] = OldWord; + } + if (Angle == FillSpec->AngleEnd) + break; + } +} /* DoFill */ + +/** + * Return true if the specified table filler is done, i.e. + * if it has no more lines to fill. + * @param Filler table filler to check if done + * @return true if no more lines to fill, false otherwise. + * @note Globals: none + */ +bool FillerDone(TABLE_FILLER* Filler) { + FILL_SWITCH *Next; + + Next = &(Filler->Switch[Filler->NextSwitch]); + + return Filler->X > Next->X && Next->Type == LastSwitch; + +} /* FillerDone */ + +/** + * This routine sets Bit in each bit vector whose + * bucket lies within the range Center +- Spread. The fill + * is done for a circular dimension, i.e. bucket 0 is adjacent + * to the last bucket. It is assumed that Center and Spread + * are expressed in a circular coordinate system whose range + * is 0 to 1. + * @param ParamTable table of bit vectors, one per param bucket + * @param Bit bit position in vectors to be filled + * @param Center center of filled area + * @param Spread spread of filled area + * @param debug debug flag + */ +void FillPPCircularBits(uint32_t ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], + int Bit, float Center, float Spread, bool debug) { + int i, FirstBucket, LastBucket; + + if (Spread > 0.5) + Spread = 0.5; + + FirstBucket = static_cast<int>(std::floor((Center - Spread) * NUM_PP_BUCKETS)); + if (FirstBucket < 0) + FirstBucket += NUM_PP_BUCKETS; + + LastBucket = static_cast<int>(std::floor((Center + Spread) * NUM_PP_BUCKETS)); + if (LastBucket >= NUM_PP_BUCKETS) + LastBucket -= NUM_PP_BUCKETS; + if (debug) tprintf("Circular fill from %d to %d", FirstBucket, LastBucket); + for (i = FirstBucket; true; CircularIncrement (i, NUM_PP_BUCKETS)) { + SET_BIT (ParamTable[i], Bit); + + /* exit loop after we have set the bit for the last bucket */ + if (i == LastBucket) + break; + } + +} /* FillPPCircularBits */ + +/** + * This routine sets Bit in each bit vector whose + * bucket lies within the range Center +- Spread. The fill + * is done for a linear dimension, i.e. there is no wrap-around + * for this dimension. It is assumed that Center and Spread + * are expressed in a linear coordinate system whose range + * is approximately 0 to 1. Values outside this range will + * be clipped. + * @param ParamTable table of bit vectors, one per param bucket + * @param Bit bit number being filled + * @param Center center of filled area + * @param Spread spread of filled area + * @param debug debug flag + */ +void FillPPLinearBits(uint32_t ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], + int Bit, float Center, float Spread, bool debug) { + int i, FirstBucket, LastBucket; + + FirstBucket = static_cast<int>(std::floor((Center - Spread) * NUM_PP_BUCKETS)); + if (FirstBucket < 0) + FirstBucket = 0; + + LastBucket = static_cast<int>(std::floor((Center + Spread) * NUM_PP_BUCKETS)); + if (LastBucket >= NUM_PP_BUCKETS) + LastBucket = NUM_PP_BUCKETS - 1; + + if (debug) tprintf("Linear fill from %d to %d", FirstBucket, LastBucket); + for (i = FirstBucket; i <= LastBucket; i++) + SET_BIT (ParamTable[i], Bit); + +} /* FillPPLinearBits */ + + +/*---------------------------------------------------------------------------*/ +#ifndef GRAPHICS_DISABLED +/** + * This routine prompts the user with Prompt and waits + * for the user to enter something in the debug window. + * @param Prompt prompt to print while waiting for input from window + * @param adaptive_on + * @param pretrained_on + * @param shape_id + * @return Character entered in the debug window. + * @note Globals: none + */ +CLASS_ID Classify::GetClassToDebug(const char *Prompt, bool* adaptive_on, + bool* pretrained_on, int* shape_id) { + tprintf("%s\n", Prompt); + SVEvent* ev; + SVEventType ev_type; + int unichar_id = INVALID_UNICHAR_ID; + // Wait until a click or popup event. + do { + ev = IntMatchWindow->AwaitEvent(SVET_ANY); + ev_type = ev->type; + if (ev_type == SVET_POPUP) { + if (ev->command_id == IDA_SHAPE_INDEX) { + if (shape_table_ != nullptr) { + *shape_id = atoi(ev->parameter); + *adaptive_on = false; + *pretrained_on = true; + if (*shape_id >= 0 && *shape_id < shape_table_->NumShapes()) { + int font_id; + shape_table_->GetFirstUnicharAndFont(*shape_id, &unichar_id, + &font_id); + tprintf("Shape %d, first unichar=%d, font=%d\n", + *shape_id, unichar_id, font_id); + return unichar_id; + } + tprintf("Shape index '%s' not found in shape table\n", ev->parameter); + } else { + tprintf("No shape table loaded!\n"); + } + } else { + if (unicharset.contains_unichar(ev->parameter)) { + unichar_id = unicharset.unichar_to_id(ev->parameter); + if (ev->command_id == IDA_ADAPTIVE) { + *adaptive_on = true; + *pretrained_on = false; + *shape_id = -1; + } else if (ev->command_id == IDA_STATIC) { + *adaptive_on = false; + *pretrained_on = true; + } else { + *adaptive_on = true; + *pretrained_on = true; + } + if (ev->command_id == IDA_ADAPTIVE || shape_table_ == nullptr) { + *shape_id = -1; + return unichar_id; + } + for (int s = 0; s < shape_table_->NumShapes(); ++s) { + if (shape_table_->GetShape(s).ContainsUnichar(unichar_id)) { + tprintf("%s\n", shape_table_->DebugStr(s).c_str()); + } + } + } else { + tprintf("Char class '%s' not found in unicharset", + ev->parameter); + } + } + } + delete ev; + } while (ev_type != SVET_CLICK); + return 0; +} /* GetClassToDebug */ + +#endif + +/** + * This routine copies the appropriate global pad variables + * into EndPad, SidePad, and AnglePad. This is a kludge used + * to get around the fact that global control variables cannot + * be arrays. If the specified level is illegal, the tightest + * possible pads are returned. + * @param Level "tightness" level to return pads for + * @param EndPad place to put end pad for Level + * @param SidePad place to put side pad for Level + * @param AnglePad place to put angle pad for Level + */ +void GetCPPadsForLevel(int Level, + float *EndPad, + float *SidePad, + float *AnglePad) { + switch (Level) { + case 0: + *EndPad = classify_cp_end_pad_loose * GetPicoFeatureLength (); + *SidePad = classify_cp_side_pad_loose * GetPicoFeatureLength (); + *AnglePad = classify_cp_angle_pad_loose / 360.0; + break; + + case 1: + *EndPad = classify_cp_end_pad_medium * GetPicoFeatureLength (); + *SidePad = classify_cp_side_pad_medium * GetPicoFeatureLength (); + *AnglePad = classify_cp_angle_pad_medium / 360.0; + break; + + case 2: + *EndPad = classify_cp_end_pad_tight * GetPicoFeatureLength (); + *SidePad = classify_cp_side_pad_tight * GetPicoFeatureLength (); + *AnglePad = classify_cp_angle_pad_tight / 360.0; + break; + + default: + *EndPad = classify_cp_end_pad_tight * GetPicoFeatureLength (); + *SidePad = classify_cp_side_pad_tight * GetPicoFeatureLength (); + *AnglePad = classify_cp_angle_pad_tight / 360.0; + break; + } + if (*AnglePad > 0.5) + *AnglePad = 0.5; + +} /* GetCPPadsForLevel */ + +/** + * @param Evidence evidence value to return color for + * @return Color which corresponds to specified Evidence value. + * @note Globals: none + */ +ScrollView::Color GetMatchColorFor(float Evidence) { + assert (Evidence >= 0.0); + assert (Evidence <= 1.0); + + if (Evidence >= 0.90) + return ScrollView::WHITE; + else if (Evidence >= 0.75) + return ScrollView::GREEN; + else if (Evidence >= 0.50) + return ScrollView::RED; + else + return ScrollView::BLUE; +} /* GetMatchColorFor */ + +/** + * This routine returns (in Fill) the specification of + * the next line to be filled from Filler. FillerDone() should + * always be called before GetNextFill() to ensure that we + * do not run past the end of the fill table. + * @param Filler filler to get next fill spec from + * @param Fill place to put spec for next fill + */ +void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill) { + FILL_SWITCH *Next; + + /* compute the fill assuming no switches will be encountered */ + Fill->AngleStart = Filler->AngleStart; + Fill->AngleEnd = Filler->AngleEnd; + Fill->X = Filler->X; + Fill->YStart = Filler->YStart >> 8; + Fill->YEnd = Filler->YEnd >> 8; + + /* update the fill info and the filler for ALL switches at this X value */ + Next = &(Filler->Switch[Filler->NextSwitch]); + while (Filler->X >= Next->X) { + Fill->X = Filler->X = Next->X; + if (Next->Type == StartSwitch) { + Fill->YStart = Next->Y; + Filler->StartDelta = Next->Delta; + Filler->YStart = Next->YInit; + } + else if (Next->Type == EndSwitch) { + Fill->YEnd = Next->Y; + Filler->EndDelta = Next->Delta; + Filler->YEnd = Next->YInit; + } + else { /* Type must be LastSwitch */ + break; + } + Filler->NextSwitch++; + Next = &(Filler->Switch[Filler->NextSwitch]); + } + + /* prepare the filler for the next call to this routine */ + Filler->X++; + Filler->YStart += Filler->StartDelta; + Filler->YEnd += Filler->EndDelta; + +} /* GetNextFill */ + +/** + * This routine computes a data structure (Filler) + * which can be used to fill in a rectangle surrounding + * the specified Proto. Results are returned in Filler. + * + * @param EndPad, SidePad, AnglePad padding to add to proto + * @param Proto proto to create a filler for + * @param Filler place to put table filler + */ +void InitTableFiller (float EndPad, float SidePad, + float AnglePad, PROTO Proto, TABLE_FILLER * Filler) +#define XS X_SHIFT +#define YS Y_SHIFT +#define AS ANGLE_SHIFT +#define NB NUM_CP_BUCKETS +{ + float Angle; + float X, Y, HalfLength; + float Cos, Sin; + float XAdjust, YAdjust; + FPOINT Start, Switch1, Switch2, End; + int S1 = 0; + int S2 = 1; + + Angle = Proto->Angle; + X = Proto->X; + Y = Proto->Y; + HalfLength = Proto->Length / 2.0; + + Filler->AngleStart = CircBucketFor(Angle - AnglePad, AS, NB); + Filler->AngleEnd = CircBucketFor(Angle + AnglePad, AS, NB); + Filler->NextSwitch = 0; + + if (fabs (Angle - 0.0) < HV_TOLERANCE || fabs (Angle - 0.5) < HV_TOLERANCE) { + /* horizontal proto - handle as special case */ + Filler->X = Bucket8For(X - HalfLength - EndPad, XS, NB); + Filler->YStart = Bucket16For(Y - SidePad, YS, NB * 256); + Filler->YEnd = Bucket16For(Y + SidePad, YS, NB * 256); + Filler->StartDelta = 0; + Filler->EndDelta = 0; + Filler->Switch[0].Type = LastSwitch; + Filler->Switch[0].X = Bucket8For(X + HalfLength + EndPad, XS, NB); + } else if (fabs(Angle - 0.25) < HV_TOLERANCE || + fabs(Angle - 0.75) < HV_TOLERANCE) { + /* vertical proto - handle as special case */ + Filler->X = Bucket8For(X - SidePad, XS, NB); + Filler->YStart = Bucket16For(Y - HalfLength - EndPad, YS, NB * 256); + Filler->YEnd = Bucket16For(Y + HalfLength + EndPad, YS, NB * 256); + Filler->StartDelta = 0; + Filler->EndDelta = 0; + Filler->Switch[0].Type = LastSwitch; + Filler->Switch[0].X = Bucket8For(X + SidePad, XS, NB); + } else { + /* diagonal proto */ + + if ((Angle > 0.0 && Angle < 0.25) || (Angle > 0.5 && Angle < 0.75)) { + /* rising diagonal proto */ + Angle *= 2.0 * M_PI; + Cos = fabs(cos(Angle)); + Sin = fabs(sin(Angle)); + + /* compute the positions of the corners of the acceptance region */ + Start.x = X - (HalfLength + EndPad) * Cos - SidePad * Sin; + Start.y = Y - (HalfLength + EndPad) * Sin + SidePad * Cos; + End.x = 2.0 * X - Start.x; + End.y = 2.0 * Y - Start.y; + Switch1.x = X - (HalfLength + EndPad) * Cos + SidePad * Sin; + Switch1.y = Y - (HalfLength + EndPad) * Sin - SidePad * Cos; + Switch2.x = 2.0 * X - Switch1.x; + Switch2.y = 2.0 * Y - Switch1.y; + + if (Switch1.x > Switch2.x) { + S1 = 1; + S2 = 0; + } + + /* translate into bucket positions and deltas */ + Filler->X = Bucket8For(Start.x, XS, NB); + Filler->StartDelta = -static_cast<int16_t>((Cos / Sin) * 256); + Filler->EndDelta = static_cast<int16_t>((Sin / Cos) * 256); + + XAdjust = BucketEnd(Filler->X, XS, NB) - Start.x; + YAdjust = XAdjust * Cos / Sin; + Filler->YStart = Bucket16For(Start.y - YAdjust, YS, NB * 256); + YAdjust = XAdjust * Sin / Cos; + Filler->YEnd = Bucket16For(Start.y + YAdjust, YS, NB * 256); + + Filler->Switch[S1].Type = StartSwitch; + Filler->Switch[S1].X = Bucket8For(Switch1.x, XS, NB); + Filler->Switch[S1].Y = Bucket8For(Switch1.y, YS, NB); + XAdjust = Switch1.x - BucketStart(Filler->Switch[S1].X, XS, NB); + YAdjust = XAdjust * Sin / Cos; + Filler->Switch[S1].YInit = Bucket16For(Switch1.y - YAdjust, YS, NB * 256); + Filler->Switch[S1].Delta = Filler->EndDelta; + + Filler->Switch[S2].Type = EndSwitch; + Filler->Switch[S2].X = Bucket8For(Switch2.x, XS, NB); + Filler->Switch[S2].Y = Bucket8For(Switch2.y, YS, NB); + XAdjust = Switch2.x - BucketStart(Filler->Switch[S2].X, XS, NB); + YAdjust = XAdjust * Cos / Sin; + Filler->Switch[S2].YInit = Bucket16For(Switch2.y + YAdjust, YS, NB * 256); + Filler->Switch[S2].Delta = Filler->StartDelta; + + Filler->Switch[2].Type = LastSwitch; + Filler->Switch[2].X = Bucket8For(End.x, XS, NB); + } else { + /* falling diagonal proto */ + Angle *= 2.0 * M_PI; + Cos = fabs(cos(Angle)); + Sin = fabs(sin(Angle)); + + /* compute the positions of the corners of the acceptance region */ + Start.x = X - (HalfLength + EndPad) * Cos - SidePad * Sin; + Start.y = Y + (HalfLength + EndPad) * Sin - SidePad * Cos; + End.x = 2.0 * X - Start.x; + End.y = 2.0 * Y - Start.y; + Switch1.x = X - (HalfLength + EndPad) * Cos + SidePad * Sin; + Switch1.y = Y + (HalfLength + EndPad) * Sin + SidePad * Cos; + Switch2.x = 2.0 * X - Switch1.x; + Switch2.y = 2.0 * Y - Switch1.y; + + if (Switch1.x > Switch2.x) { + S1 = 1; + S2 = 0; + } + + /* translate into bucket positions and deltas */ + Filler->X = Bucket8For(Start.x, XS, NB); + Filler->StartDelta = static_cast<int16_t>(ClipToRange<int>( + -IntCastRounded((Sin / Cos) * 256), INT16_MIN, INT16_MAX)); + Filler->EndDelta = static_cast<int16_t>(ClipToRange<int>( + IntCastRounded((Cos / Sin) * 256), INT16_MIN, INT16_MAX)); + + XAdjust = BucketEnd(Filler->X, XS, NB) - Start.x; + YAdjust = XAdjust * Sin / Cos; + Filler->YStart = Bucket16For(Start.y - YAdjust, YS, NB * 256); + YAdjust = XAdjust * Cos / Sin; + Filler->YEnd = Bucket16For(Start.y + YAdjust, YS, NB * 256); + + Filler->Switch[S1].Type = EndSwitch; + Filler->Switch[S1].X = Bucket8For(Switch1.x, XS, NB); + Filler->Switch[S1].Y = Bucket8For(Switch1.y, YS, NB); + XAdjust = Switch1.x - BucketStart(Filler->Switch[S1].X, XS, NB); + YAdjust = XAdjust * Sin / Cos; + Filler->Switch[S1].YInit = Bucket16For(Switch1.y + YAdjust, YS, NB * 256); + Filler->Switch[S1].Delta = Filler->StartDelta; + + Filler->Switch[S2].Type = StartSwitch; + Filler->Switch[S2].X = Bucket8For(Switch2.x, XS, NB); + Filler->Switch[S2].Y = Bucket8For(Switch2.y, YS, NB); + XAdjust = Switch2.x - BucketStart(Filler->Switch[S2].X, XS, NB); + YAdjust = XAdjust * Cos / Sin; + Filler->Switch[S2].YInit = Bucket16For(Switch2.y - YAdjust, YS, NB * 256); + Filler->Switch[S2].Delta = Filler->EndDelta; + + Filler->Switch[2].Type = LastSwitch; + Filler->Switch[2].X = Bucket8For(End.x, XS, NB); + } + } +} /* InitTableFiller */ + + +/*---------------------------------------------------------------------------*/ +#ifndef GRAPHICS_DISABLED +/** + * This routine renders the specified feature into ShapeList. + * @param window to add feature rendering to + * @param Feature feature to be rendered + * @param color color to use for feature rendering + * @return New shape list with rendering of Feature added. + * @note Globals: none + */ +void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature, + ScrollView::Color color) { + float X, Y, Dx, Dy, Length; + + window->Pen(color); + assert(Feature != nullptr); + assert(color != 0); + + X = Feature->X; + Y = Feature->Y; + Length = GetPicoFeatureLength() * 0.7 * INT_CHAR_NORM_RANGE; + // The -PI has no significant effect here, but the value of Theta is computed + // using BinaryAnglePlusPi in intfx.cpp. + Dx = (Length / 2.0) * cos((Feature->Theta / 256.0) * 2.0 * M_PI - M_PI); + Dy = (Length / 2.0) * sin((Feature->Theta / 256.0) * 2.0 * M_PI - M_PI); + + window->SetCursor(X, Y); + window->DrawTo(X + Dx, Y + Dy); +} /* RenderIntFeature */ + +/** + * This routine extracts the parameters of the specified + * proto from the class description and adds a rendering of + * the proto onto the ShapeList. + * + * @param window ScrollView instance + * @param Class class that proto is contained in + * @param ProtoId id of proto to be rendered + * @param color color to render proto in + * + * Globals: none + * + * @return New shape list with a rendering of one proto added. + */ +void RenderIntProto(ScrollView *window, + INT_CLASS Class, + PROTO_ID ProtoId, + ScrollView::Color color) { + PROTO_SET ProtoSet; + INT_PROTO Proto; + int ProtoSetIndex; + int ProtoWordIndex; + float Length; + int Xmin, Xmax, Ymin, Ymax; + float X, Y, Dx, Dy; + uint32_t ProtoMask; + int Bucket; + + assert(ProtoId >= 0); + assert(Class != nullptr); + assert(ProtoId < Class->NumProtos); + assert(color != 0); + window->Pen(color); + + ProtoSet = Class->ProtoSets[SetForProto(ProtoId)]; + ProtoSetIndex = IndexForProto(ProtoId); + Proto = &(ProtoSet->Protos[ProtoSetIndex]); + Length = (Class->ProtoLengths[ProtoId] * + GetPicoFeatureLength() * INT_CHAR_NORM_RANGE); + ProtoMask = PPrunerMaskFor(ProtoId); + ProtoWordIndex = PPrunerWordIndexFor(ProtoId); + + // find the x and y extent of the proto from the proto pruning table + Xmin = Ymin = NUM_PP_BUCKETS; + Xmax = Ymax = 0; + for (Bucket = 0; Bucket < NUM_PP_BUCKETS; Bucket++) { + if (ProtoMask & ProtoSet->ProtoPruner[PRUNER_X][Bucket][ProtoWordIndex]) { + UpdateRange(Bucket, &Xmin, &Xmax); + } + + if (ProtoMask & ProtoSet->ProtoPruner[PRUNER_Y][Bucket][ProtoWordIndex]) { + UpdateRange(Bucket, &Ymin, &Ymax); + } + } + X = (Xmin + Xmax + 1) / 2.0 * PROTO_PRUNER_SCALE; + Y = (Ymin + Ymax + 1) / 2.0 * PROTO_PRUNER_SCALE; + // The -PI has no significant effect here, but the value of Theta is computed + // using BinaryAnglePlusPi in intfx.cpp. + Dx = (Length / 2.0) * cos((Proto->Angle / 256.0) * 2.0 * M_PI - M_PI); + Dy = (Length / 2.0) * sin((Proto->Angle / 256.0) * 2.0 * M_PI - M_PI); + + window->SetCursor(X - Dx, Y - Dy); + window->DrawTo(X + Dx, Y + Dy); +} /* RenderIntProto */ +#endif + +#ifndef GRAPHICS_DISABLED +/** + * Initializes the int matcher window if it is not already + * initialized. + */ +void InitIntMatchWindowIfReqd() { + if (IntMatchWindow == nullptr) { + IntMatchWindow = CreateFeatureSpaceWindow("IntMatchWindow", 50, 200); + auto* popup_menu = new SVMenuNode(); + + popup_menu->AddChild("Debug Adapted classes", IDA_ADAPTIVE, + "x", "Class to debug"); + popup_menu->AddChild("Debug Static classes", IDA_STATIC, + "x", "Class to debug"); + popup_menu->AddChild("Debug Both", IDA_BOTH, + "x", "Class to debug"); + popup_menu->AddChild("Debug Shape Index", IDA_SHAPE_INDEX, + "0", "Index to debug"); + popup_menu->BuildMenu(IntMatchWindow, false); + } +} + +/** + * Initializes the proto display window if it is not already + * initialized. + */ +void InitProtoDisplayWindowIfReqd() { + if (ProtoDisplayWindow == nullptr) { + ProtoDisplayWindow = CreateFeatureSpaceWindow("ProtoDisplayWindow", + 550, 200); + } +} + +/** + * Initializes the feature display window if it is not already + * initialized. + */ +void InitFeatureDisplayWindowIfReqd() { + if (FeatureDisplayWindow == nullptr) { + FeatureDisplayWindow = CreateFeatureSpaceWindow("FeatureDisplayWindow", + 50, 700); + } +} + +/// Creates a window of the appropriate size for displaying elements +/// in feature space. +ScrollView* CreateFeatureSpaceWindow(const char* name, int xpos, int ypos) { + return new ScrollView(name, xpos, ypos, 520, 520, 260, 260, true); +} +#endif // !GRAPHICS_DISABLED + +} // namespace tesseract diff --git a/tesseract/src/classify/intproto.h b/tesseract/src/classify/intproto.h new file mode 100644 index 00000000..77bf2376 --- /dev/null +++ b/tesseract/src/classify/intproto.h @@ -0,0 +1,265 @@ +/****************************************************************************** + ** Filename: intproto.h + ** Purpose: Definition of data structures for integer protos. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + *****************************************************************************/ + +#ifndef INTPROTO_H +#define INTPROTO_H + +/**---------------------------------------------------------------------------- + Include Files and Type Defines +----------------------------------------------------------------------------**/ +#include "matchdefs.h" +#include "mfoutline.h" +#include "protos.h" +#include "scrollview.h" +#include "unicharset.h" + +namespace tesseract { + +class FCOORD; + +/* define order of params in pruners */ +#define PRUNER_X 0 +#define PRUNER_Y 1 +#define PRUNER_ANGLE 2 + +/* definition of coordinate system offsets for each table parameter */ +#define ANGLE_SHIFT (0.0) +#define X_SHIFT (0.5) +#define Y_SHIFT (0.5) + +#define MAX_PROTO_INDEX 24 +#define BITS_PER_WERD static_cast<int>(8 * sizeof(uint32_t)) +/* Script detection: increase this number to 128 */ +#define MAX_NUM_CONFIGS 64 +#define MAX_NUM_PROTOS 512 +#define PROTOS_PER_PROTO_SET 64 +#define MAX_NUM_PROTO_SETS (MAX_NUM_PROTOS / PROTOS_PER_PROTO_SET) +#define NUM_PP_PARAMS 3 +#define NUM_PP_BUCKETS 64 +#define NUM_CP_BUCKETS 24 +#define CLASSES_PER_CP 32 +#define NUM_BITS_PER_CLASS 2 +#define CLASS_PRUNER_CLASS_MASK (~(~0u << NUM_BITS_PER_CLASS)) +#define CLASSES_PER_CP_WERD (CLASSES_PER_CP / NUM_BITS_PER_CLASS) +#define PROTOS_PER_PP_WERD BITS_PER_WERD +#define BITS_PER_CP_VECTOR (CLASSES_PER_CP * NUM_BITS_PER_CLASS) +#define MAX_NUM_CLASS_PRUNERS \ + ((MAX_NUM_CLASSES + CLASSES_PER_CP - 1) / CLASSES_PER_CP) +#define WERDS_PER_CP_VECTOR (BITS_PER_CP_VECTOR / BITS_PER_WERD) +#define WERDS_PER_PP_VECTOR \ + ((PROTOS_PER_PROTO_SET + BITS_PER_WERD - 1) / BITS_PER_WERD) +#define WERDS_PER_PP (NUM_PP_PARAMS * NUM_PP_BUCKETS * WERDS_PER_PP_VECTOR) +#define WERDS_PER_CP \ + (NUM_CP_BUCKETS * NUM_CP_BUCKETS * NUM_CP_BUCKETS * WERDS_PER_CP_VECTOR) +#define WERDS_PER_CONFIG_VEC \ + ((MAX_NUM_CONFIGS + BITS_PER_WERD - 1) / BITS_PER_WERD) + +/* The first 3 dimensions of the CLASS_PRUNER_STRUCT are the + * 3 axes of the quantized feature space. + * The position of the the bits recorded for each class in the + * 4th dimension is determined by using CPrunerWordIndexFor(c), + * where c is the corresponding class id. */ +struct CLASS_PRUNER_STRUCT { + uint32_t p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS] + [WERDS_PER_CP_VECTOR]; +}; + +typedef struct { + int8_t A; + uint8_t B; + int8_t C; + uint8_t Angle; + uint32_t Configs[WERDS_PER_CONFIG_VEC]; +} + +INT_PROTO_STRUCT, + *INT_PROTO; + +typedef uint32_t PROTO_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS] + [WERDS_PER_PP_VECTOR]; + +typedef struct { + PROTO_PRUNER ProtoPruner; + INT_PROTO_STRUCT Protos[PROTOS_PER_PROTO_SET]; +} + +PROTO_SET_STRUCT, + *PROTO_SET; + +typedef uint32_t CONFIG_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS][4]; + +typedef struct { + uint16_t NumProtos; + uint8_t NumProtoSets; + uint8_t NumConfigs; + PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]; + uint8_t* ProtoLengths; + uint16_t ConfigLengths[MAX_NUM_CONFIGS]; + int font_set_id; // FontSet id, see above +} + +INT_CLASS_STRUCT, + *INT_CLASS; + +typedef struct { + int NumClasses; + int NumClassPruners; + INT_CLASS Class[MAX_NUM_CLASSES]; + CLASS_PRUNER_STRUCT* ClassPruners[MAX_NUM_CLASS_PRUNERS]; +} + +INT_TEMPLATES_STRUCT, + *INT_TEMPLATES; + +/* definitions of integer features*/ +#define MAX_NUM_INT_FEATURES 512 +#define INT_CHAR_NORM_RANGE 256 + +struct INT_FEATURE_STRUCT { + INT_FEATURE_STRUCT() : X(0), Y(0), Theta(0), CP_misses(0) {} + // Builds a feature from an FCOORD for position with all the necessary + // clipping and rounding. + INT_FEATURE_STRUCT(const FCOORD& pos, uint8_t theta); + // Builds a feature from ints with all the necessary clipping and casting. + INT_FEATURE_STRUCT(int x, int y, int theta); + + uint8_t X; + uint8_t Y; + uint8_t Theta; + int8_t CP_misses; + + void print() const { + tprintf("(%d,%d):%d\n", X, Y, Theta); + } +}; + +using INT_FEATURE = INT_FEATURE_STRUCT*; + +typedef INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]; + +enum IntmatcherDebugAction { + IDA_ADAPTIVE, + IDA_STATIC, + IDA_SHAPE_INDEX, + IDA_BOTH +}; + +/**---------------------------------------------------------------------------- + Macros +----------------------------------------------------------------------------**/ + +#define MaxNumIntProtosIn(C) (C->NumProtoSets * PROTOS_PER_PROTO_SET) +#define SetForProto(P) (P / PROTOS_PER_PROTO_SET) +#define IndexForProto(P) (P % PROTOS_PER_PROTO_SET) +#define ProtoForProtoId(C, P) \ + (&((C->ProtoSets[SetForProto(P)])->Protos[IndexForProto(P)])) +#define PPrunerWordIndexFor(I) \ + (((I) % PROTOS_PER_PROTO_SET) / PROTOS_PER_PP_WERD) +#define PPrunerBitIndexFor(I) ((I) % PROTOS_PER_PP_WERD) +#define PPrunerMaskFor(I) (1 << PPrunerBitIndexFor(I)) + +#define MaxNumClassesIn(T) (T->NumClassPruners * CLASSES_PER_CP) +#define LegalClassId(c) ((c) >= 0 && (c) < MAX_NUM_CLASSES) +#define UnusedClassIdIn(T, c) ((T)->Class[c] == nullptr) +#define ClassForClassId(T, c) ((T)->Class[c]) +#define ClassPrunersFor(T) ((T)->ClassPruner) +#define CPrunerIdFor(c) ((c) / CLASSES_PER_CP) +#define CPrunerFor(T, c) ((T)->ClassPruners[CPrunerIdFor(c)]) +#define CPrunerWordIndexFor(c) (((c) % CLASSES_PER_CP) / CLASSES_PER_CP_WERD) +#define CPrunerBitIndexFor(c) (((c) % CLASSES_PER_CP) % CLASSES_PER_CP_WERD) +#define CPrunerMaskFor(L, c) \ + (((L) + 1) << CPrunerBitIndexFor(c) * NUM_BITS_PER_CLASS) + +/* DEBUG macros*/ +#define PRINT_MATCH_SUMMARY 0x001 +#define DISPLAY_FEATURE_MATCHES 0x002 +#define DISPLAY_PROTO_MATCHES 0x004 +#define PRINT_FEATURE_MATCHES 0x008 +#define PRINT_PROTO_MATCHES 0x010 +#define CLIP_MATCH_EVIDENCE 0x020 + +#define MatchDebuggingOn(D) (D) +#define PrintMatchSummaryOn(D) ((D)&PRINT_MATCH_SUMMARY) +#define DisplayFeatureMatchesOn(D) ((D)&DISPLAY_FEATURE_MATCHES) +#define DisplayProtoMatchesOn(D) ((D)&DISPLAY_PROTO_MATCHES) +#define PrintFeatureMatchesOn(D) ((D)&PRINT_FEATURE_MATCHES) +#define PrintProtoMatchesOn(D) ((D)&PRINT_PROTO_MATCHES) +#define ClipMatchEvidenceOn(D) ((D)&CLIP_MATCH_EVIDENCE) + +/**---------------------------------------------------------------------------- + Public Function Prototypes +----------------------------------------------------------------------------**/ +void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class); + +int AddIntConfig(INT_CLASS Class); + +int AddIntProto(INT_CLASS Class); + +void AddProtoToClassPruner(PROTO Proto, CLASS_ID ClassId, + INT_TEMPLATES Templates); + +void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, + bool debug); + +uint8_t Bucket8For(float param, float offset, int num_buckets); +uint16_t Bucket16For(float param, float offset, int num_buckets); + +uint8_t CircBucketFor(float param, float offset, int num_buckets); + +void UpdateMatchDisplay(); + +void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class); + +void DisplayIntFeature(const INT_FEATURE_STRUCT* Feature, float Evidence); + +void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, float Evidence); + +INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs); + +INT_TEMPLATES NewIntTemplates(); + +TESS_API +void free_int_templates(INT_TEMPLATES templates); + +void ShowMatchDisplay(); + +// Clears the given window and draws the featurespace guides for the +// appropriate normalization method. +TESS_API +void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView* window); + +/*----------------------------------------------------------------------------*/ +#ifndef GRAPHICS_DISABLED +TESS_API +void RenderIntFeature(ScrollView* window, const INT_FEATURE_STRUCT* Feature, + ScrollView::Color color); + +void InitIntMatchWindowIfReqd(); + +void InitProtoDisplayWindowIfReqd(); + +void InitFeatureDisplayWindowIfReqd(); + +// Creates a window of the appropriate size for displaying elements +// in feature space. +TESS_API +ScrollView* CreateFeatureSpaceWindow(const char* name, int xpos, int ypos); +#endif // !GRAPHICS_DISABLED + +} // namespace tesseract + +#endif diff --git a/tesseract/src/classify/kdtree.cpp b/tesseract/src/classify/kdtree.cpp new file mode 100644 index 00000000..d8ff700d --- /dev/null +++ b/tesseract/src/classify/kdtree.cpp @@ -0,0 +1,541 @@ +/****************************************************************************** + ** Filename: kdtree.cpp + ** Purpose: Routines for managing K-D search trees + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +/*----------------------------------------------------------------------------- + Include Files and Type Defines +-----------------------------------------------------------------------------*/ +#include "kdtree.h" + +#include <algorithm> +#include <cfloat> // for FLT_MAX +#include <cstdio> +#include <cmath> + +namespace tesseract { + +#define Magnitude(X) ((X) < 0 ? -(X) : (X)) +#define NodeFound(N,K,D) (((N)->Key == (K)) && ((N)->Data == (D))) + +/*----------------------------------------------------------------------------- + Global Data Definitions and Declarations +-----------------------------------------------------------------------------*/ +#define MINSEARCH -FLT_MAX +#define MAXSEARCH FLT_MAX + +// Helper function to find the next essential dimension in a cycle. +static int NextLevel(KDTREE *tree, int level) { + do { + ++level; + if (level >= tree->KeySize) + level = 0; + } while (tree->KeyDesc[level].NonEssential); + return level; +} + +//----------------------------------------------------------------------------- +/** Store the k smallest-keyed key-value pairs. */ +template<typename Key, typename Value> +class MinK { + public: + MinK(Key max_key, int k); + ~MinK(); + + struct Element { + Element() {} + Element(const Key& k, const Value& v) : key(k), value(v) {} + + Key key; + Value value; + }; + + bool insert(Key k, Value v); + const Key& max_insertable_key(); + + int elements_count() { return elements_count_; } + const Element* elements() { return elements_; } + + private: + const Key max_key_; ///< the maximum possible Key + Element *elements_; ///< unsorted array of elements + int elements_count_; ///< the number of results collected so far + int k_; ///< the number of results we want from the search + int max_index_; ///< the index of the result with the largest key +}; + +template<typename Key, typename Value> +MinK<Key, Value>::MinK(Key max_key, int k) : + max_key_(max_key), elements_count_(0), k_(k < 1 ? 1 : k), max_index_(0) { + elements_ = new Element[k_]; +} + +template<typename Key, typename Value> +MinK<Key, Value>::~MinK() { + delete []elements_; +} + +template<typename Key, typename Value> +const Key& MinK<Key, Value>::max_insertable_key() { + if (elements_count_ < k_) + return max_key_; + return elements_[max_index_].key; +} + +template<typename Key, typename Value> +bool MinK<Key, Value>::insert(Key key, Value value) { + if (elements_count_ < k_) { + elements_[elements_count_++] = Element(key, value); + if (key > elements_[max_index_].key) + max_index_ = elements_count_ - 1; + return true; + } else if (key < elements_[max_index_].key) { + // evict the largest element. + elements_[max_index_] = Element(key, value); + // recompute max_index_ + for (int i = 0; i < elements_count_; i++) { + if (elements_[i].key > elements_[max_index_].key) + max_index_ = i; + } + return true; + } + return false; +} + + +//----------------------------------------------------------------------------- +/** Helper class for searching for the k closest points to query_point in tree. + */ +class KDTreeSearch { + public: + KDTreeSearch(KDTREE* tree, float *query_point, int k_closest); + ~KDTreeSearch(); + + /** Return the k nearest points' data. */ + void Search(int *result_count, float *distances, void **results); + + private: + void SearchRec(int Level, KDNODE *SubTree); + bool BoxIntersectsSearch(float *lower, float *upper); + + KDTREE *tree_; + float *query_point_; + float *sb_min_; ///< search box minimum + float *sb_max_; ///< search box maximum + MinK<float, void *> results_; +}; + +KDTreeSearch::KDTreeSearch(KDTREE *tree, float *query_point, int k_closest) + : tree_(tree), query_point_(query_point), results_(MAXSEARCH, k_closest) { + sb_min_ = new float[tree->KeySize]; + sb_max_ = new float[tree->KeySize]; +} + +KDTreeSearch::~KDTreeSearch() { + delete[] sb_min_; + delete[] sb_max_; +} + +/// Locate the k_closest points to query_point_, and return their distances and +/// data into the given buffers. +void KDTreeSearch::Search(int *result_count, + float *distances, + void **results) { + if (tree_->Root.Left == nullptr) { + *result_count = 0; + } else { + for (int i = 0; i < tree_->KeySize; i++) { + sb_min_[i] = tree_->KeyDesc[i].Min; + sb_max_[i] = tree_->KeyDesc[i].Max; + } + SearchRec(0, tree_->Root.Left); + int count = results_.elements_count(); + *result_count = count; + for (int j = 0; j < count; j++) { + // Pre-cast to float64 as key is a template type and we have no control + // over its actual type. + distances[j] = static_cast<float>(sqrt(static_cast<double>(results_.elements()[j].key))); + results[j] = results_.elements()[j].value; + } + } +} + +/*----------------------------------------------------------------------------- + Public Code +-----------------------------------------------------------------------------*/ +/// @return a new KDTREE based on the specified parameters. +/// @param KeySize # of dimensions in the K-D tree +/// @param KeyDesc array of params to describe key dimensions +KDTREE *MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[]) { + auto *KDTree = static_cast<KDTREE *>(malloc( + sizeof(KDTREE) + (KeySize - 1) * sizeof(PARAM_DESC))); + for (int i = 0; i < KeySize; i++) { + KDTree->KeyDesc[i].NonEssential = KeyDesc[i].NonEssential; + KDTree->KeyDesc[i].Circular = KeyDesc[i].Circular; + if (KeyDesc[i].Circular) { + KDTree->KeyDesc[i].Min = KeyDesc[i].Min; + KDTree->KeyDesc[i].Max = KeyDesc[i].Max; + KDTree->KeyDesc[i].Range = KeyDesc[i].Max - KeyDesc[i].Min; + KDTree->KeyDesc[i].HalfRange = KDTree->KeyDesc[i].Range / 2; + KDTree->KeyDesc[i].MidRange = (KeyDesc[i].Max + KeyDesc[i].Min) / 2; + } else { + KDTree->KeyDesc[i].Min = MINSEARCH; + KDTree->KeyDesc[i].Max = MAXSEARCH; + } + } + KDTree->KeySize = KeySize; + KDTree->Root.Left = nullptr; + KDTree->Root.Right = nullptr; + return KDTree; +} + + +/** + * This routine stores Data in the K-D tree specified by Tree + * using Key as an access key. + * + * @param Tree K-D tree in which data is to be stored + * @param Key ptr to key by which data can be retrieved + * @param Data ptr to data to be stored in the tree + */ +void KDStore(KDTREE *Tree, float *Key, void *Data) { + int Level; + KDNODE *Node; + KDNODE **PtrToNode; + + PtrToNode = &(Tree->Root.Left); + Node = *PtrToNode; + Level = NextLevel(Tree, -1); + while (Node != nullptr) { + if (Key[Level] < Node->BranchPoint) { + PtrToNode = &(Node->Left); + if (Key[Level] > Node->LeftBranch) + Node->LeftBranch = Key[Level]; + } + else { + PtrToNode = &(Node->Right); + if (Key[Level] < Node->RightBranch) + Node->RightBranch = Key[Level]; + } + Level = NextLevel(Tree, Level); + Node = *PtrToNode; + } + + *PtrToNode = MakeKDNode(Tree, Key, Data, Level); +} /* KDStore */ + +/** + * This routine deletes a node from Tree. The node to be + * deleted is specified by the Key for the node and the Data + * contents of the node. These two pointers must be identical + * to the pointers that were used for the node when it was + * originally stored in the tree. A node will be deleted from + * the tree only if its key and data pointers are identical + * to Key and Data respectively. The tree is re-formed by removing + * the affected subtree and inserting all elements but the root. + * + * @param Tree K-D tree to delete node from + * @param Key key of node to be deleted + * @param Data data contents of node to be deleted + */ +void +KDDelete (KDTREE * Tree, float Key[], void *Data) { + int Level; + KDNODE *Current; + KDNODE *Father; + + /* initialize search at root of tree */ + Father = &(Tree->Root); + Current = Father->Left; + Level = NextLevel(Tree, -1); + + /* search tree for node to be deleted */ + while ((Current != nullptr) && (!NodeFound (Current, Key, Data))) { + Father = Current; + if (Key[Level] < Current->BranchPoint) + Current = Current->Left; + else + Current = Current->Right; + + Level = NextLevel(Tree, Level); + } + + if (Current != nullptr) { /* if node to be deleted was found */ + if (Current == Father->Left) { + Father->Left = nullptr; + Father->LeftBranch = Tree->KeyDesc[Level].Min; + } else { + Father->Right = nullptr; + Father->RightBranch = Tree->KeyDesc[Level].Max; + } + + InsertNodes(Tree, Current->Left); + InsertNodes(Tree, Current->Right); + FreeSubTree(Current); + } +} /* KDDelete */ + +/** + * This routine searches the K-D tree specified by Tree and + * finds the QuerySize nearest neighbors of Query. All neighbors + * must be within MaxDistance of Query. The data contents of + * the nearest neighbors + * are placed in NBuffer and their distances from Query are + * placed in DBuffer. + * @param Tree ptr to K-D tree to be searched + * @param Query ptr to query key (point in D-space) + * @param QuerySize number of nearest neighbors to be found + * @param MaxDistance all neighbors must be within this distance + * @param NBuffer ptr to QuerySize buffer to hold nearest neighbors + * @param DBuffer ptr to QuerySize buffer to hold distances + * from nearest neighbor to query point + * @param NumberOfResults [out] Number of nearest neighbors actually found + */ +void KDNearestNeighborSearch( + KDTREE *Tree, float Query[], int QuerySize, float MaxDistance, + int *NumberOfResults, void **NBuffer, float DBuffer[]) { + KDTreeSearch search(Tree, Query, QuerySize); + search.Search(NumberOfResults, DBuffer, NBuffer); +} + + +/*---------------------------------------------------------------------------*/ +/** Walk a given Tree with action. */ +void KDWalk(KDTREE *Tree, void_proc action, void *context) { + if (Tree->Root.Left != nullptr) + Walk(Tree, action, context, Tree->Root.Left, NextLevel(Tree, -1)); +} + + +/*---------------------------------------------------------------------------*/ +/** + * This routine frees all memory which is allocated to the + * specified KD-tree. This includes the data structure for + * the kd-tree itself plus the data structures for each node + * in the tree. It does not include the Key and Data items + * which are pointed to by the nodes. This memory is left + * untouched. + * @param Tree tree data structure to be released + */ +void FreeKDTree(KDTREE *Tree) { + FreeSubTree(Tree->Root.Left); + free(Tree); +} /* FreeKDTree */ + + +/*----------------------------------------------------------------------------- + Private Code +-----------------------------------------------------------------------------*/ +/*---------------------------------------------------------------------------*/ +/** + * This routine allocates memory for a new K-D tree node + * and places the specified Key and Data into it. The + * left and right subtree pointers for the node are + * initialized to empty subtrees. + * @param tree The tree to create the node for + * @param Key Access key for new node in KD tree + * @param Data ptr to data to be stored in new node + * @param Index index of Key to branch on + * @return pointer to new K-D tree node + */ +KDNODE *MakeKDNode(KDTREE *tree, float Key[], void *Data, int Index) { + KDNODE *NewNode; + + NewNode = static_cast<KDNODE *>(malloc (sizeof (KDNODE))); + + NewNode->Key = Key; + NewNode->Data = Data; + NewNode->BranchPoint = Key[Index]; + NewNode->LeftBranch = tree->KeyDesc[Index].Min; + NewNode->RightBranch = tree->KeyDesc[Index].Max; + NewNode->Left = nullptr; + NewNode->Right = nullptr; + + return NewNode; +} /* MakeKDNode */ + + +/*---------------------------------------------------------------------------*/ +void FreeKDNode(KDNODE *Node) { free(Node); } + +/*---------------------------------------------------------------------------*/ +/** + * Recursively accumulate the k_closest points to query_point_ into results_. + * @param Level level in tree of sub-tree to be searched + * @param SubTree sub-tree to be searched + */ +void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) { + if (level >= tree_->KeySize) + level = 0; + + if (!BoxIntersectsSearch(sb_min_, sb_max_)) + return; + + results_.insert(DistanceSquared(tree_->KeySize, tree_->KeyDesc, query_point_, + sub_tree->Key), + sub_tree->Data); + + if (query_point_[level] < sub_tree->BranchPoint) { + if (sub_tree->Left != nullptr) { + float tmp = sb_max_[level]; + sb_max_[level] = sub_tree->LeftBranch; + SearchRec(NextLevel(tree_, level), sub_tree->Left); + sb_max_[level] = tmp; + } + if (sub_tree->Right != nullptr) { + float tmp = sb_min_[level]; + sb_min_[level] = sub_tree->RightBranch; + SearchRec(NextLevel(tree_, level), sub_tree->Right); + sb_min_[level] = tmp; + } + } else { + if (sub_tree->Right != nullptr) { + float tmp = sb_min_[level]; + sb_min_[level] = sub_tree->RightBranch; + SearchRec(NextLevel(tree_, level), sub_tree->Right); + sb_min_[level] = tmp; + } + if (sub_tree->Left != nullptr) { + float tmp = sb_max_[level]; + sb_max_[level] = sub_tree->LeftBranch; + SearchRec(NextLevel(tree_, level), sub_tree->Left); + sb_max_[level] = tmp; + } + } +} + + +/*---------------------------------------------------------------------------*/ +/** + *Returns the Euclidean distance squared between p1 and p2 for all essential + * dimensions. + * @param k keys are in k-space + * @param dim dimension descriptions (essential, circular, etc) + * @param p1,p2 two different points in K-D space + */ +float DistanceSquared(int k, PARAM_DESC *dim, float p1[], float p2[]) { + float total_distance = 0; + + for (; k > 0; k--, p1++, p2++, dim++) { + if (dim->NonEssential) + continue; + + float dimension_distance = *p1 - *p2; + + /* if this dimension is circular - check wraparound distance */ + if (dim->Circular) { + dimension_distance = Magnitude(dimension_distance); + float wrap_distance = dim->Max - dim->Min - dimension_distance; + dimension_distance = std::min(dimension_distance, wrap_distance); + } + + total_distance += dimension_distance * dimension_distance; + } + return total_distance; +} + +float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[]) { + return sqrt(DistanceSquared(k, dim, p1, p2)); +} + +/*---------------------------------------------------------------------------*/ +/// Return whether the query region (the smallest known circle about +/// query_point_ containing results->k_ points) intersects the box specified +/// between lower and upper. For circular dimensions, we also check the point +/// one wrap distance away from the query. +bool KDTreeSearch::BoxIntersectsSearch(float *lower, float *upper) { + float *query = query_point_; + // Compute the sum in higher precision. + double total_distance = 0.0; + double radius_squared = static_cast<double>(results_.max_insertable_key()) * + results_.max_insertable_key(); + PARAM_DESC *dim = tree_->KeyDesc; + + for (int i = tree_->KeySize; i > 0; i--, dim++, query++, lower++, upper++) { + if (dim->NonEssential) + continue; + + float dimension_distance; + if (*query < *lower) + dimension_distance = *lower - *query; + else if (*query > *upper) + dimension_distance = *query - *upper; + else + dimension_distance = 0; + + /* if this dimension is circular - check wraparound distance */ + if (dim->Circular) { + float wrap_distance = FLT_MAX; + if (*query < *lower) + wrap_distance = *query + dim->Max - dim->Min - *upper; + else if (*query > *upper) + wrap_distance = *lower - (*query - (dim->Max - dim->Min)); + dimension_distance = std::min(dimension_distance, wrap_distance); + } + + total_distance += + static_cast<double>(dimension_distance) * dimension_distance; + if (total_distance >= radius_squared) + return false; + } + return true; +} + + +/*---------------------------------------------------------------------------*/ +/** + * Walk a tree, calling action once on each node. + * + * Operation: + * This routine walks through the specified sub_tree and invokes action + * action at each node as follows: + * action(context, data, level) + * data the data contents of the node being visited, + * level is the level of the node in the tree with the root being level 0. + * @param tree root of the tree being walked. + * @param action action to be performed at every node + * @param context action's context + * @param sub_tree ptr to root of subtree to be walked + * @param level current level in the tree for this node + */ +void Walk(KDTREE *tree, void_proc action, void *context, + KDNODE *sub_tree, int32_t level) { + (*action)(context, sub_tree->Data, level); + if (sub_tree->Left != nullptr) + Walk(tree, action, context, sub_tree->Left, NextLevel(tree, level)); + if (sub_tree->Right != nullptr) + Walk(tree, action, context, sub_tree->Right, NextLevel(tree, level)); +} + +/** Given a subtree nodes, insert all of its elements into tree. */ +void InsertNodes(KDTREE *tree, KDNODE *nodes) { + if (nodes == nullptr) + return; + + KDStore(tree, nodes->Key, nodes->Data); + InsertNodes(tree, nodes->Left); + InsertNodes(tree, nodes->Right); +} + +/** Free all of the nodes of a sub tree. */ +void FreeSubTree(KDNODE *sub_tree) { + if (sub_tree != nullptr) { + FreeSubTree(sub_tree->Left); + FreeSubTree(sub_tree->Right); + free(sub_tree); + } +} + +} // namespace tesseract diff --git a/tesseract/src/classify/kdtree.h b/tesseract/src/classify/kdtree.h new file mode 100644 index 00000000..b8512191 --- /dev/null +++ b/tesseract/src/classify/kdtree.h @@ -0,0 +1,98 @@ +/****************************************************************************** + ** Filename: kdtree.h + ** Purpose: Definition of K-D tree access routines. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + *****************************************************************************/ + +#ifndef KDTREE_H +#define KDTREE_H + +#include "ocrfeatures.h" + +namespace tesseract { + +using void_proc = void (*)(...); + +/** +NOTE: All circular parameters of all keys must be in the range + +Min <= Param < Max + +where Min and Max are specified in the KeyDesc parameter passed to +MakeKDTree. All KD routines assume that this is true and will not operate +correctly if circular parameters outside the specified range are used. +*/ + +struct KDNODE { + float* Key; /**< search key */ + void* Data; /**< data that corresponds to key */ + float BranchPoint; /**< needed to make deletes work efficiently */ + float LeftBranch; /**< used to optimize search pruning */ + float RightBranch; /**< used to optimize search pruning */ + struct KDNODE* Left; /**< ptrs for KD tree structure */ + struct KDNODE* Right; +}; + +struct KDTREE { + int16_t KeySize; /* number of dimensions in the tree */ + KDNODE Root; /* Root.Left points to actual root node */ + PARAM_DESC KeyDesc[1]; /* description of each dimension */ +}; + +/*---------------------------------------------------------------------------- + Macros +-----------------------------------------------------------------------------*/ +#define RootOf(T) ((T)->Root.Left->Data) + +/*----------------------------------------------------------------------------- + Public Function Prototypes +-----------------------------------------------------------------------------*/ +KDTREE* MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[]); + +void KDStore(KDTREE* Tree, float* Key, void* Data); + +void KDDelete(KDTREE* Tree, float Key[], void* Data); + +void KDNearestNeighborSearch(KDTREE* Tree, float Query[], int QuerySize, + float MaxDistance, int* NumberOfResults, + void** NBuffer, float DBuffer[]); + +void KDWalk(KDTREE* Tree, void_proc Action, void* context); + +void FreeKDTree(KDTREE* Tree); + +/*----------------------------------------------------------------------------- + Private Function Prototypes +-----------------------------------------------------------------------------*/ +KDNODE* MakeKDNode(KDTREE* tree, float Key[], void* Data, int Index); + +void FreeKDNode(KDNODE* Node); + +float DistanceSquared(int k, PARAM_DESC* dim, float p1[], float p2[]); + +TESS_API +float ComputeDistance(int k, PARAM_DESC* dim, float p1[], float p2[]); + +int QueryInSearch(KDTREE* tree); + +void Walk(KDTREE* tree, void_proc action, void* context, KDNODE* SubTree, + int32_t Level); + +void InsertNodes(KDTREE* tree, KDNODE* nodes); + +void FreeSubTree(KDNODE* SubTree); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/classify/mf.cpp b/tesseract/src/classify/mf.cpp new file mode 100644 index 00000000..d6232eee --- /dev/null +++ b/tesseract/src/classify/mf.cpp @@ -0,0 +1,82 @@ +/****************************************************************************** + ** Filename: mf.c + ** Purpose: Micro-feature interface to flexible feature extractor. + ** Author: Dan Johnson + ** History: Thu May 24 09:08:38 1990, DSJ, Created. + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ +/*---------------------------------------------------------------------------- + Include Files and Type Defines +----------------------------------------------------------------------------*/ +#include "mf.h" + +#include "featdefs.h" +#include "mfdefs.h" +#include "mfx.h" + +#include <cmath> + +namespace tesseract { + +/*---------------------------------------------------------------------------- + Private Code +----------------------------------------------------------------------------*/ +/** + * Call the old micro-feature extractor and then copy + * the features into the new format. Then deallocate the + * old micro-features. + * @param Blob blob to extract micro-features from + * @param cn_denorm control parameter to feature extractor. + * @return Micro-features for Blob. + */ +FEATURE_SET ExtractMicros(TBLOB* Blob, const DENORM& cn_denorm) { + int NumFeatures; + MICROFEATURES Features, OldFeatures; + FEATURE_SET FeatureSet; + FEATURE Feature; + MICROFEATURE OldFeature; + + OldFeatures = BlobMicroFeatures(Blob, cn_denorm); + if (OldFeatures == nullptr) + return nullptr; + NumFeatures = count (OldFeatures); + FeatureSet = NewFeatureSet (NumFeatures); + + Features = OldFeatures; + iterate(Features) { + OldFeature = reinterpret_cast<MICROFEATURE>first_node (Features); + Feature = NewFeature (&MicroFeatureDesc); + Feature->Params[MFDirection] = OldFeature[ORIENTATION]; + Feature->Params[MFXPosition] = OldFeature[XPOSITION]; + Feature->Params[MFYPosition] = OldFeature[YPOSITION]; + Feature->Params[MFLength] = OldFeature[MFLENGTH]; + + // Bulge features are deprecated and should not be used. Set to 0. + Feature->Params[MFBulge1] = 0.0f; + Feature->Params[MFBulge2] = 0.0f; + +#ifndef _WIN32 + // Assert that feature parameters are well defined. + int i; + for (i = 0; i < Feature->Type->NumParams; i++) { + ASSERT_HOST(!std::isnan(Feature->Params[i])); + } +#endif + + AddFeature(FeatureSet, Feature); + } + FreeMicroFeatures(OldFeatures); + return FeatureSet; +} /* ExtractMicros */ + +} // namespace tesseract diff --git a/tesseract/src/classify/mf.h b/tesseract/src/classify/mf.h new file mode 100644 index 00000000..b1113ce5 --- /dev/null +++ b/tesseract/src/classify/mf.h @@ -0,0 +1,40 @@ +/****************************************************************************** + ** Filename: mf.h + ** Purpose: Micro-feature interface to flexible feature extractor. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#ifndef MF_H +#define MF_H + +#include "ocrfeatures.h" +#include "blobs.h" + +namespace tesseract { + +typedef enum { + MFXPosition, MFYPosition, + MFLength, MFDirection, MFBulge1, MFBulge2, + MFCount // For array sizes. +} MF_PARAM_NAME; + +typedef float MicroFeature[MFCount]; +/*---------------------------------------------------------------------------- + Private Function Prototypes +-----------------------------------------------------------------------------*/ +FEATURE_SET ExtractMicros(TBLOB* Blob, const DENORM& cn_denorm); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/classify/mfdefs.cpp b/tesseract/src/classify/mfdefs.cpp new file mode 100644 index 00000000..3442fdfc --- /dev/null +++ b/tesseract/src/classify/mfdefs.cpp @@ -0,0 +1,46 @@ +/****************************************************************************** + ** Filename: mfdefs.cpp + ** Purpose: Basic routines for manipulating micro-features + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#include "mfdefs.h" + +#include <cmath> + +namespace tesseract { + +/*---------------------------------------------------------------------------- + Public Code +----------------------------------------------------------------------------**/ + +/** + * This routine allocates and returns a new micro-feature + * data structure. + * @return New MICROFEATURE + */ +MICROFEATURE NewMicroFeature() { + return (static_cast<MICROFEATURE>(malloc (sizeof (MFBLOCK)))); +} /* NewMicroFeature */ + +/** + * This routine deallocates all of the memory consumed by + * a list of micro-features. + * @param MicroFeatures list of micro-features to be freed + */ +void FreeMicroFeatures(MICROFEATURES MicroFeatures) { + destroy_nodes(MicroFeatures, free); +} /* FreeMicroFeatures */ + +} // namespace tesseract diff --git a/tesseract/src/classify/mfdefs.h b/tesseract/src/classify/mfdefs.h new file mode 100644 index 00000000..90d5374b --- /dev/null +++ b/tesseract/src/classify/mfdefs.h @@ -0,0 +1,61 @@ +/****************************************************************************** + ** Filename: mfdefs.h + ** Purpose: Definition of micro-features + ** Author: Dan Johnson + ** History: Mon Jan 22 08:42:13 1990, DSJ, Created. + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ +#ifndef MFDEFS_H +#define MFDEFS_H + +/**---------------------------------------------------------------------------- + Include Files and Type Defines +----------------------------------------------------------------------------**/ +#include "matchdefs.h" +#include "oldlist.h" + +namespace tesseract { + +/* definition of a list of micro-features */ +using MICROFEATURES = LIST; + +/* definition of structure of micro-features */ +#define MFSIZE 6 +typedef float MFBLOCK[MFSIZE]; +using MICROFEATURE = float*; + +/* definitions of individual micro-feature parameters */ +#define XPOSITION 0 +#define YPOSITION 1 +#define MFLENGTH 2 +#define ORIENTATION 3 +#define FIRSTBULGE 4 +#define SECONDBULGE 5 + +/**---------------------------------------------------------------------------- + Macros +----------------------------------------------------------------------------**/ + +/* macros for accessing micro-feature lists */ +#define NextFeatureOf(L) ((MICROFEATURE)first_node(L)) + +/**---------------------------------------------------------------------------- + Public Function Prototypes +----------------------------------------------------------------------------**/ +MICROFEATURE NewMicroFeature(); + +void FreeMicroFeatures(MICROFEATURES MicroFeatures); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/classify/mfoutline.cpp b/tesseract/src/classify/mfoutline.cpp new file mode 100644 index 00000000..450c7acc --- /dev/null +++ b/tesseract/src/classify/mfoutline.cpp @@ -0,0 +1,446 @@ +/****************************************************************************** + ** Filename: mfoutline.c + ** Purpose: Interface to outline struct used for extracting features + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#include "mfoutline.h" + +#include "clusttool.h" //If remove you get cought in a loop somewhere +#include "blobs.h" +#include "mfx.h" +#include "params.h" +#include "classify.h" + +#include <cmath> +#include <cstdio> + +namespace tesseract { + +/*---------------------------------------------------------------------------*/ +/** Convert a blob into a list of MFOUTLINEs (float-based microfeature format). + */ +LIST ConvertBlob(TBLOB *blob) { + LIST outlines = NIL_LIST; + return (blob == nullptr) + ? NIL_LIST + : ConvertOutlines(blob->outlines, outlines, outer); +} + + +/*---------------------------------------------------------------------------*/ +/** Convert a TESSLINE into the float-based MFOUTLINE micro-feature format. */ +MFOUTLINE ConvertOutline(TESSLINE *outline) { + MFEDGEPT *NewPoint; + MFOUTLINE MFOutline = NIL_LIST; + EDGEPT *EdgePoint; + EDGEPT *StartPoint; + EDGEPT *NextPoint; + + if (outline == nullptr || outline->loop == nullptr) + return MFOutline; + + StartPoint = outline->loop; + EdgePoint = StartPoint; + do { + NextPoint = EdgePoint->next; + + /* filter out duplicate points */ + if (EdgePoint->pos.x != NextPoint->pos.x || + EdgePoint->pos.y != NextPoint->pos.y) { + NewPoint = NewEdgePoint(); + NewPoint->ClearMark(); + NewPoint->Hidden = EdgePoint->IsHidden(); + NewPoint->Point.x = EdgePoint->pos.x; + NewPoint->Point.y = EdgePoint->pos.y; + MFOutline = push(MFOutline, NewPoint); + } + EdgePoint = NextPoint; + } while (EdgePoint != StartPoint); + + if (MFOutline != nullptr) + MakeOutlineCircular(MFOutline); + return MFOutline; +} + + +/*---------------------------------------------------------------------------*/ +/** + * Convert a tree of outlines to a list of MFOUTLINEs (lists of MFEDGEPTs). + * + * @param outline first outline to be converted + * @param mf_outlines list to add converted outlines to + * @param outline_type are the outlines outer or holes? + */ +LIST ConvertOutlines(TESSLINE *outline, + LIST mf_outlines, + OUTLINETYPE outline_type) { + MFOUTLINE mf_outline; + + while (outline != nullptr) { + mf_outline = ConvertOutline(outline); + if (mf_outline != nullptr) + mf_outlines = push(mf_outlines, mf_outline); + outline = outline->next; + } + return mf_outlines; +} + +/*---------------------------------------------------------------------------*/ +/** + * This routine searches through the specified outline, computes + * a slope for each vector in the outline, and marks each + * vector as having one of the following directions: + * N, S, E, W, NE, NW, SE, SW + * This information is then stored in the outline and the + * outline is returned. + * @param Outline micro-feature outline to analyze + * @param MinSlope controls "snapping" of segments to horizontal + * @param MaxSlope controls "snapping" of segments to vertical + */ +void FindDirectionChanges(MFOUTLINE Outline, + float MinSlope, + float MaxSlope) { + MFEDGEPT *Current; + MFEDGEPT *Last; + MFOUTLINE EdgePoint; + + if (DegenerateOutline (Outline)) + return; + + Last = PointAt (Outline); + Outline = NextPointAfter (Outline); + EdgePoint = Outline; + do { + Current = PointAt (EdgePoint); + ComputeDirection(Last, Current, MinSlope, MaxSlope); + + Last = Current; + EdgePoint = NextPointAfter (EdgePoint); + } + while (EdgePoint != Outline); + +} /* FindDirectionChanges */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine deallocates all of the memory consumed by + * a micro-feature outline. + * @param arg micro-feature outline to be freed + */ +void FreeMFOutline(void *arg) { //MFOUTLINE Outline) + MFOUTLINE Start; + auto Outline = static_cast<MFOUTLINE>(arg); + + /* break the circular outline so we can use std. techniques to deallocate */ + Start = list_rest (Outline); + set_rest(Outline, NIL_LIST); + while (Start != nullptr) { + free(first_node(Start)); + Start = pop (Start); + } + +} /* FreeMFOutline */ + + +/*---------------------------------------------------------------------------*/ +/** + * Release all memory consumed by the specified list + * of outlines. + * @param Outlines list of mf-outlines to be freed + */ +void FreeOutlines(LIST Outlines) { + destroy_nodes(Outlines, FreeMFOutline); +} /* FreeOutlines */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine searches through the specified outline and finds + * the points at which the outline changes direction. These + * points are then marked as "extremities". This routine is + * used as an alternative to FindExtremities(). It forces the + * endpoints of the microfeatures to be at the direction + * changes rather than at the midpoint between direction + * changes. + * @param Outline micro-feature outline to analyze + */ +void MarkDirectionChanges(MFOUTLINE Outline) { + MFOUTLINE Current; + MFOUTLINE Last; + MFOUTLINE First; + + if (DegenerateOutline (Outline)) + return; + + First = NextDirectionChange (Outline); + Last = First; + do { + Current = NextDirectionChange (Last); + PointAt(Current)->MarkPoint(); + Last = Current; + } + while (Last != First); + +} /* MarkDirectionChanges */ + + +/*---------------------------------------------------------------------------*/ +/** Return a new edge point for a micro-feature outline. */ +MFEDGEPT *NewEdgePoint() { + return reinterpret_cast<MFEDGEPT *>(malloc(sizeof(MFEDGEPT))); +} + +/*---------------------------------------------------------------------------*/ +/** + * This routine returns the next point in the micro-feature + * outline that is an extremity. The search starts after + * EdgePoint. The routine assumes that the outline being + * searched is not a degenerate outline (i.e. it must have + * 2 or more edge points). + * @param EdgePoint start search from this point + * @return Next extremity in the outline after EdgePoint. + * @note Globals: none + */ +MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) { + EdgePoint = NextPointAfter(EdgePoint); + while (!PointAt(EdgePoint)->ExtremityMark) + EdgePoint = NextPointAfter(EdgePoint); + + return (EdgePoint); + +} /* NextExtremity */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine normalizes the coordinates of the specified + * outline so that the outline is deskewed down to the + * baseline, translated so that x=0 is at XOrigin, and scaled + * so that the height of a character cell from descender to + * ascender is 1. Of this height, 0.25 is for the descender, + * 0.25 for the ascender, and 0.5 for the x-height. The + * y coordinate of the baseline is 0. + * @param Outline outline to be normalized + * @param XOrigin x-origin of text + */ +void NormalizeOutline(MFOUTLINE Outline, + float XOrigin) { + if (Outline == NIL_LIST) + return; + + MFOUTLINE EdgePoint = Outline; + do { + MFEDGEPT *Current = PointAt(EdgePoint); + Current->Point.y = MF_SCALE_FACTOR * + (Current->Point.y - kBlnBaselineOffset); + Current->Point.x = MF_SCALE_FACTOR * (Current->Point.x - XOrigin); + EdgePoint = NextPointAfter(EdgePoint); + } while (EdgePoint != Outline); +} /* NormalizeOutline */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine normalizes every outline in Outlines + * according to the currently selected normalization method. + * It also returns the scale factors that it used to do this + * scaling. The scale factors returned represent the x and + * y sizes in the normalized coordinate system that correspond + * to 1 pixel in the original coordinate system. + * Outlines are changed and XScale and YScale are updated. + * + * Globals: + * - classify_norm_method method being used for normalization + * - classify_char_norm_range map radius of gyration to this value + * @param Outlines list of outlines to be normalized + * @param XScale x-direction scale factor used by routine + * @param YScale y-direction scale factor used by routine + */ +void Classify::NormalizeOutlines(LIST Outlines, + float *XScale, + float *YScale) { + MFOUTLINE Outline; + + switch (classify_norm_method) { + case character: + ASSERT_HOST(!"How did NormalizeOutlines get called in character mode?"); + break; + + case baseline: + iterate(Outlines) { + Outline = static_cast<MFOUTLINE>first_node(Outlines); + NormalizeOutline(Outline, 0.0); + } + *XScale = *YScale = MF_SCALE_FACTOR; + break; + } +} /* NormalizeOutlines */ + +/*---------------------------------------------------------------------------- + Private Code +----------------------------------------------------------------------------*/ +/** + * Change the direction of every vector in the specified + * outline segment to Direction. The segment to be changed + * starts at Start and ends at End. Note that the previous + * direction of End must also be changed to reflect the + * change in direction of the point before it. + * @param Start defines start of segment of outline to be modified + * @param End defines end of segment of outline to be modified + * @param Direction new direction to assign to segment + */ +void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) { + MFOUTLINE Current; + + for (Current = Start; Current != End; Current = NextPointAfter (Current)) + PointAt (Current)->Direction = Direction; + + PointAt (End)->PreviousDirection = Direction; + +} /* ChangeDirection */ + +/** + * This routine normalizes each point in Outline by + * translating it to the specified center and scaling it + * anisotropically according to the given scale factors. + * @param Outline outline to be character normalized + * @param cn_denorm + */ +void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm) { + MFOUTLINE First, Current; + MFEDGEPT *CurrentPoint; + + if (Outline == NIL_LIST) + return; + + First = Outline; + Current = First; + do { + CurrentPoint = PointAt(Current); + FCOORD pos(CurrentPoint->Point.x, CurrentPoint->Point.y); + cn_denorm.LocalNormTransform(pos, &pos); + CurrentPoint->Point.x = (pos.x() - UINT8_MAX / 2) * MF_SCALE_FACTOR; + CurrentPoint->Point.y = (pos.y() - UINT8_MAX / 2) * MF_SCALE_FACTOR; + + Current = NextPointAfter(Current); + } + while (Current != First); + +} /* CharNormalizeOutline */ + +/** + * This routine computes the slope from Start to Finish and + * and then computes the approximate direction of the line + * segment from Start to Finish. The direction is quantized + * into 8 buckets: + * N, S, E, W, NE, NW, SE, SW + * Both the slope and the direction are then stored into + * the appropriate fields of the Start edge point. The + * direction is also stored into the PreviousDirection field + * of the Finish edge point. + * @param Start starting point to compute direction from + * @param Finish finishing point to compute direction to + * @param MinSlope slope below which lines are horizontal + * @param MaxSlope slope above which lines are vertical + */ +void ComputeDirection(MFEDGEPT *Start, + MFEDGEPT *Finish, + float MinSlope, + float MaxSlope) { + FVECTOR Delta; + + Delta.x = Finish->Point.x - Start->Point.x; + Delta.y = Finish->Point.y - Start->Point.y; + if (Delta.x == 0) { + if (Delta.y < 0) { + Start->Slope = -FLT_MAX; + Start->Direction = south; + } else { + Start->Slope = FLT_MAX; + Start->Direction = north; + } + } else { + Start->Slope = Delta.y / Delta.x; + if (Delta.x > 0) { + if (Delta.y > 0) { + if (Start->Slope > MinSlope) { + if (Start->Slope < MaxSlope) { + Start->Direction = northeast; + } else { + Start->Direction = north; + } + } else { + Start->Direction = east; + } + } + else if (Start->Slope < -MinSlope) { + if (Start->Slope > -MaxSlope) { + Start->Direction = southeast; + } else { + Start->Direction = south; + } + } else { + Start->Direction = east; + } + } else if (Delta.y > 0) { + if (Start->Slope < -MinSlope) { + if (Start->Slope > -MaxSlope) { + Start->Direction = northwest; + } else { + Start->Direction = north; + } + } else { + Start->Direction = west; + } + } else if (Start->Slope > MinSlope) { + if (Start->Slope < MaxSlope) { + Start->Direction = southwest; + } else { + Start->Direction = south; + } + } else { + Start->Direction = west; + } + } + Finish->PreviousDirection = Start->Direction; +} + +/** + * This routine returns the next point in the micro-feature + * outline that has a direction different than EdgePoint. The + * routine assumes that the outline being searched is not a + * degenerate outline (i.e. it must have 2 or more edge points). + * @param EdgePoint start search from this point + * @return Point of next direction change in micro-feature outline. + * @note Globals: none + */ +MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) { + DIRECTION InitialDirection; + + InitialDirection = PointAt (EdgePoint)->Direction; + + MFOUTLINE next_pt = nullptr; + do { + EdgePoint = NextPointAfter(EdgePoint); + next_pt = NextPointAfter(EdgePoint); + } while (PointAt(EdgePoint)->Direction == InitialDirection && + !PointAt(EdgePoint)->Hidden && + next_pt != nullptr && !PointAt(next_pt)->Hidden); + + return (EdgePoint); +} + +} // namespace tesseract diff --git a/tesseract/src/classify/mfoutline.h b/tesseract/src/classify/mfoutline.h new file mode 100644 index 00000000..6da42855 --- /dev/null +++ b/tesseract/src/classify/mfoutline.h @@ -0,0 +1,135 @@ +/****************************************************************************** + ** Filename: mfoutline.h + ** Purpose: Interface spec for fx outline structures + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#ifndef MFOUTLINE_H +#define MFOUTLINE_H + +#include "blobs.h" +#include "fpoint.h" +#include "oldlist.h" +#include "params.h" + +namespace tesseract { + +using MFOUTLINE = LIST; + +enum DIRECTION : uint8_t { + north, + south, + east, + west, + northeast, + northwest, + southeast, + southwest +}; + +struct MFEDGEPT { + // Inline functions for manipulating micro-feature outline edge points. + + void ClearMark() { + ExtremityMark = false; + } + + void MarkPoint() { + ExtremityMark = true; + } + + FPOINT Point; + float Slope; + bool Hidden; + bool ExtremityMark; + DIRECTION Direction; + DIRECTION PreviousDirection; +}; + +enum OUTLINETYPE { outer, hole }; + +enum NORM_METHOD { baseline, character }; + +/**---------------------------------------------------------------------------- + Macros +----------------------------------------------------------------------------**/ +#define AverageOf(A, B) (((A) + (B)) / 2) + +// Constant for computing the scale factor to use to normalize characters. +const float MF_SCALE_FACTOR = 0.5f / kBlnXHeight; + +// Inline functions for manipulating micro-feature outlines. + +static inline bool DegenerateOutline(MFOUTLINE Outline) { + return (Outline == NIL_LIST) || (Outline == list_rest(Outline)); +} + +static inline MFEDGEPT* PointAt(MFOUTLINE Outline) { + return reinterpret_cast<MFEDGEPT*>first_node(Outline); +} + +static inline MFOUTLINE NextPointAfter(MFOUTLINE Outline) { + return list_rest(Outline); +} + +static inline void MakeOutlineCircular(MFOUTLINE Outline) { + set_rest(last(Outline), Outline); +} + +/**---------------------------------------------------------------------------- + Public Function Prototypes +----------------------------------------------------------------------------**/ +void ComputeBlobCenter(TBLOB* Blob, TPOINT* BlobCenter); + +LIST ConvertBlob(TBLOB* Blob); + +MFOUTLINE ConvertOutline(TESSLINE* Outline); + +LIST ConvertOutlines(TESSLINE* Outline, LIST ConvertedOutlines, + OUTLINETYPE OutlineType); + +void FilterEdgeNoise(MFOUTLINE Outline, float NoiseSegmentLength); + +void FindDirectionChanges(MFOUTLINE Outline, float MinSlope, float MaxSlope); + +void FreeMFOutline(void* agr); // MFOUTLINE Outline); + +void FreeOutlines(LIST Outlines); + +void MarkDirectionChanges(MFOUTLINE Outline); + +MFEDGEPT* NewEdgePoint(); + +MFOUTLINE NextExtremity(MFOUTLINE EdgePoint); + +void NormalizeOutline(MFOUTLINE Outline, float XOrigin); + +/*---------------------------------------------------------------------------- + Private Function Prototypes +-----------------------------------------------------------------------------*/ +void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction); + +// Normalizes the Outline in-place using cn_denorm's local transformation, +// then converts from the integer feature range [0,255] to the clusterer +// feature range of [-0.5, 0.5]. +void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm); + +void ComputeDirection(MFEDGEPT* Start, MFEDGEPT* Finish, float MinSlope, + float MaxSlope); + +MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/classify/mfx.cpp b/tesseract/src/classify/mfx.cpp new file mode 100644 index 00000000..49b7f0e0 --- /dev/null +++ b/tesseract/src/classify/mfx.cpp @@ -0,0 +1,152 @@ +/****************************************************************************** + ** Filename: mfx.c + ** Purpose: Micro feature extraction routines + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + *****************************************************************************/ + +#include "mfx.h" + +#include "mfdefs.h" +#include "mfoutline.h" +#include "clusttool.h" //NEEDED +#include "intfx.h" +#include "normalis.h" +#include "params.h" + +namespace tesseract { + +/* old numbers corresponded to 10.0 degrees and 80.0 degrees */ +double_VAR(classify_min_slope, 0.414213562, + "Slope below which lines are called horizontal"); +double_VAR(classify_max_slope, 2.414213562, + "Slope above which lines are called vertical"); + +/*---------------------------------------------------------------------------- + Private Function Prototypes +-----------------------------------------------------------------------------*/ + +MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline, + MICROFEATURES MicroFeatures); + +MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End); + +/*---------------------------------------------------------------------------- + Public Code +----------------------------------------------------------------------------*/ + +/** + * This routine extracts micro-features from the specified + * blob and returns a list of the micro-features. All + * micro-features are normalized according to the specified + * line statistics. + * @param Blob blob to extract micro-features from + * @param cn_denorm control parameter to feature extractor + * @return List of micro-features extracted from the blob. + */ +MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm) { + MICROFEATURES MicroFeatures = NIL_LIST; + LIST Outlines; + LIST RemainingOutlines; + MFOUTLINE Outline; + + if (Blob != nullptr) { + Outlines = ConvertBlob(Blob); + + RemainingOutlines = Outlines; + iterate(RemainingOutlines) { + Outline = static_cast<MFOUTLINE>first_node (RemainingOutlines); + CharNormalizeOutline(Outline, cn_denorm); + } + + RemainingOutlines = Outlines; + iterate(RemainingOutlines) { + Outline = static_cast<MFOUTLINE>first_node(RemainingOutlines); + FindDirectionChanges(Outline, classify_min_slope, classify_max_slope); + MarkDirectionChanges(Outline); + MicroFeatures = ConvertToMicroFeatures(Outline, MicroFeatures); + } + FreeOutlines(Outlines); + } + return MicroFeatures; +} /* BlobMicroFeatures */ + +/*--------------------------------------------------------------------------- + Private Code +---------------------------------------------------------------------------*/ + +/** + * Convert Outline to MicroFeatures + * @param Outline outline to extract micro-features from + * @param MicroFeatures list of micro-features to add to + * @return List of micro-features with new features added to front. + * @note Globals: none + */ +MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline, + MICROFEATURES MicroFeatures) { + MFOUTLINE Current; + MFOUTLINE Last; + MFOUTLINE First; + MICROFEATURE NewFeature; + + if (DegenerateOutline (Outline)) + return (MicroFeatures); + + First = NextExtremity (Outline); + Last = First; + do { + Current = NextExtremity (Last); + if (!PointAt(Current)->Hidden) { + NewFeature = ExtractMicroFeature (Last, Current); + if (NewFeature != nullptr) + MicroFeatures = push (MicroFeatures, NewFeature); + } + Last = Current; + } + while (Last != First); + + return (MicroFeatures); +} /* ConvertToMicroFeatures */ + +/** + * This routine computes the feature parameters which describe + * the micro-feature that starts and Start and ends at End. + * A new micro-feature is allocated, filled with the feature + * parameters, and returned. The routine assumes that + * Start and End are not the same point. If they are the + * same point, nullptr is returned, a warning message is + * printed, and the current outline is dumped to stdout. + * @param Start starting point of micro-feature + * @param End ending point of micro-feature + * @return New micro-feature or nullptr if the feature was rejected. + * @note Globals: none + */ +MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End) { + MICROFEATURE NewFeature; + MFEDGEPT *P1, *P2; + + P1 = PointAt(Start); + P2 = PointAt(End); + + NewFeature = NewMicroFeature (); + NewFeature[XPOSITION] = AverageOf(P1->Point.x, P2->Point.x); + NewFeature[YPOSITION] = AverageOf(P1->Point.y, P2->Point.y); + NewFeature[MFLENGTH] = DistanceBetween(P1->Point, P2->Point); + NewFeature[ORIENTATION] = NormalizedAngleFrom(&P1->Point, &P2->Point, 1.0); + NewFeature[FIRSTBULGE] = 0.0f; // deprecated + NewFeature[SECONDBULGE] = 0.0f; // deprecated + + return NewFeature; +} /* ExtractMicroFeature */ + +} // namespace tesseract diff --git a/tesseract/src/classify/mfx.h b/tesseract/src/classify/mfx.h new file mode 100644 index 00000000..818e6917 --- /dev/null +++ b/tesseract/src/classify/mfx.h @@ -0,0 +1,46 @@ +/****************************************************************************** + ** Filename: mfx.h + ** Purpose: Definition of micro-feature extraction routines + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#ifndef MFX_H +#define MFX_H + +#include "mfdefs.h" +#include "params.h" + +namespace tesseract { + +class DENORM; +struct TBLOB; + +/*---------------------------------------------------------------------------- + Variables +----------------------------------------------------------------------------**/ + +/* old numbers corresponded to 10.0 degrees and 80.0 degrees */ +extern double_VAR_H(classify_min_slope, 0.414213562, + "Slope below which lines are called horizontal"); +extern double_VAR_H(classify_max_slope, 2.414213562, + "Slope above which lines are called vertical"); + +/*---------------------------------------------------------------------------- + Public Function Prototypes +----------------------------------------------------------------------------**/ +MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/classify/normfeat.cpp b/tesseract/src/classify/normfeat.cpp new file mode 100644 index 00000000..6aa13cc2 --- /dev/null +++ b/tesseract/src/classify/normfeat.cpp @@ -0,0 +1,73 @@ +/****************************************************************************** + ** Filename: normfeat.c + ** Purpose: Definition of char normalization features. + ** Author: Dan Johnson + ** History: 12/14/90, DSJ, Created. + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#include "normfeat.h" + +#include "intfx.h" +#include "featdefs.h" +#include "mfoutline.h" + +namespace tesseract { + +/** Return the length of the outline in baseline normalized form. */ +float ActualOutlineLength(FEATURE Feature) { + return (Feature->Params[CharNormLength] * LENGTH_COMPRESSION); +} + +/** + * Return the character normalization feature for a blob. + * + * The features returned are in a scale where the x-height has been + * normalized to live in the region y = [-0.25 .. 0.25]. Example ranges + * for English below are based on the Linux font collection on 2009-12-04: + * + * - Params[CharNormY] + * - The y coordinate of the grapheme's centroid. + * - English: [-0.27, 0.71] + * + * - Params[CharNormLength] + * - The length of the grapheme's outline (tiny segments discarded), + * divided by 10.0=LENGTH_COMPRESSION. + * - English: [0.16, 0.85] + * + * - Params[CharNormRx] + * - The radius of gyration about the x axis, as measured from CharNormY. + * - English: [0.011, 0.34] + * + * - Params[CharNormRy] + * - The radius of gyration about the y axis, as measured from + * the x center of the grapheme's bounding box. + * - English: [0.011, 0.31] + */ +FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT& fx_info) { + FEATURE_SET feature_set = NewFeatureSet(1); + FEATURE feature = NewFeature(&CharNormDesc); + + feature->Params[CharNormY] = + MF_SCALE_FACTOR * (fx_info.Ymean - kBlnBaselineOffset); + feature->Params[CharNormLength] = + MF_SCALE_FACTOR * fx_info.Length / LENGTH_COMPRESSION; + feature->Params[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx; + feature->Params[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry; + + AddFeature(feature_set, feature); + + return feature_set; +} /* ExtractCharNormFeatures */ + +} // namespace tesseract diff --git a/tesseract/src/classify/normfeat.h b/tesseract/src/classify/normfeat.h new file mode 100644 index 00000000..6293cab9 --- /dev/null +++ b/tesseract/src/classify/normfeat.h @@ -0,0 +1,40 @@ +/****************************************************************************** + ** Filename: normfeat.h + ** Purpose: Definition of character normalization features. + ** Author: Dan Johnson + ** History: 12/14/90, DSJ, Created. + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + *****************************************************************************/ + +#ifndef NORMFEAT_H +#define NORMFEAT_H + +#include "ocrfeatures.h" + +namespace tesseract { + +#define LENGTH_COMPRESSION (10.0) + +struct INT_FX_RESULT_STRUCT; + +typedef enum { + CharNormY, CharNormLength, CharNormRx, CharNormRy +} NORM_PARAM_NAME; + +float ActualOutlineLength(FEATURE Feature); + +FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT &fx_info); + +} + +#endif diff --git a/tesseract/src/classify/normmatch.cpp b/tesseract/src/classify/normmatch.cpp new file mode 100644 index 00000000..32bd2876 --- /dev/null +++ b/tesseract/src/classify/normmatch.cpp @@ -0,0 +1,231 @@ +/****************************************************************************** + ** Filename: normmatch.c + ** Purpose: Simple matcher based on character normalization features. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ +/*---------------------------------------------------------------------------- + Include Files and Type Defines +----------------------------------------------------------------------------*/ +#include "normmatch.h" + +#include "classify.h" +#include "clusttool.h" +#include "helpers.h" +#include "normfeat.h" +#include "unicharset.h" +#include "params.h" + +#include <cstdio> +#include <cmath> +#include <sstream> // for std::istringstream + +namespace tesseract { + +struct NORM_PROTOS +{ + int NumParams; + PARAM_DESC *ParamDesc; + LIST* Protos; + int NumProtos; +}; + +/*---------------------------------------------------------------------------- + Private Code +----------------------------------------------------------------------------*/ + +/** + * @name NormEvidenceOf + * + * Return the new type of evidence number corresponding to this + * normalization adjustment. The equation that represents the transform is: + * 1 / (1 + (NormAdj / midpoint) ^ curl) + */ +static double NormEvidenceOf(double NormAdj) { + NormAdj /= classify_norm_adj_midpoint; + + if (classify_norm_adj_curl == 3) { + NormAdj = NormAdj * NormAdj * NormAdj; + } else if (classify_norm_adj_curl == 2) { + NormAdj = NormAdj * NormAdj; + } else { + NormAdj = pow(NormAdj, classify_norm_adj_curl); + } + return (1.0 / (1.0 + NormAdj)); +} + +/*---------------------------------------------------------------------------- + Variables +----------------------------------------------------------------------------*/ + +/** control knobs used to control the normalization adjustment process */ +double_VAR(classify_norm_adj_midpoint, 32.0, "Norm adjust midpoint ..."); +double_VAR(classify_norm_adj_curl, 2.0, "Norm adjust curl ..."); +/** Weight of width variance against height and vertical position. */ +const double kWidthErrorWeighting = 0.125; + +/*---------------------------------------------------------------------------- + Public Code +----------------------------------------------------------------------------*/ +/** + * This routine compares Features against each character + * normalization proto for ClassId and returns the match + * rating of the best match. + * @param ClassId id of class to match against + * @param feature character normalization feature + * @param DebugMatch controls dump of debug info + * + * Globals: + * #NormProtos character normalization prototypes + * + * @return Best match rating for Feature against protos of ClassId. + */ +float Classify::ComputeNormMatch(CLASS_ID ClassId, + const FEATURE_STRUCT& feature, + bool DebugMatch) { + LIST Protos; + float BestMatch; + float Match; + float Delta; + PROTOTYPE *Proto; + int ProtoId; + + if (ClassId >= NormProtos->NumProtos) { + ClassId = NO_CLASS; + } + + /* handle requests for classification as noise */ + if (ClassId == NO_CLASS) { + /* kludge - clean up constants and make into control knobs later */ + Match = (feature.Params[CharNormLength] * + feature.Params[CharNormLength] * 500.0 + + feature.Params[CharNormRx] * + feature.Params[CharNormRx] * 8000.0 + + feature.Params[CharNormRy] * + feature.Params[CharNormRy] * 8000.0); + return (1.0 - NormEvidenceOf(Match)); + } + + BestMatch = FLT_MAX; + Protos = NormProtos->Protos[ClassId]; + + if (DebugMatch) { + tprintf("\nChar norm for class %s\n", unicharset.id_to_unichar(ClassId)); + } + + ProtoId = 0; + iterate(Protos) { + Proto = reinterpret_cast<PROTOTYPE *>first_node (Protos); + Delta = feature.Params[CharNormY] - Proto->Mean[CharNormY]; + Match = Delta * Delta * Proto->Weight.Elliptical[CharNormY]; + if (DebugMatch) { + tprintf("YMiddle: Proto=%g, Delta=%g, Var=%g, Dist=%g\n", + Proto->Mean[CharNormY], Delta, + Proto->Weight.Elliptical[CharNormY], Match); + } + Delta = feature.Params[CharNormRx] - Proto->Mean[CharNormRx]; + Match += Delta * Delta * Proto->Weight.Elliptical[CharNormRx]; + if (DebugMatch) { + tprintf("Height: Proto=%g, Delta=%g, Var=%g, Dist=%g\n", + Proto->Mean[CharNormRx], Delta, + Proto->Weight.Elliptical[CharNormRx], Match); + } + // Ry is width! See intfx.cpp. + Delta = feature.Params[CharNormRy] - Proto->Mean[CharNormRy]; + if (DebugMatch) { + tprintf("Width: Proto=%g, Delta=%g, Var=%g\n", + Proto->Mean[CharNormRy], Delta, + Proto->Weight.Elliptical[CharNormRy]); + } + Delta = Delta * Delta * Proto->Weight.Elliptical[CharNormRy]; + Delta *= kWidthErrorWeighting; + Match += Delta; + if (DebugMatch) { + tprintf("Total Dist=%g, scaled=%g, sigmoid=%g, penalty=%g\n", + Match, Match / classify_norm_adj_midpoint, + NormEvidenceOf(Match), 256 * (1 - NormEvidenceOf(Match))); + } + + if (Match < BestMatch) + BestMatch = Match; + + ProtoId++; + } + return 1.0 - NormEvidenceOf(BestMatch); +} /* ComputeNormMatch */ + +void Classify::FreeNormProtos() { + if (NormProtos != nullptr) { + for (int i = 0; i < NormProtos->NumProtos; i++) + FreeProtoList(&NormProtos->Protos[i]); + free(NormProtos->Protos); + free(NormProtos->ParamDesc); + free(NormProtos); + NormProtos = nullptr; + } +} + +/** + * This routine allocates a new data structure to hold + * a set of character normalization protos. It then fills in + * the data structure by reading from the specified File. + * @param fp open text file to read normalization protos from + * Globals: none + * @return Character normalization protos. + */ +NORM_PROTOS *Classify::ReadNormProtos(TFile *fp) { + NORM_PROTOS *NormProtos; + int i; + char unichar[2 * UNICHAR_LEN + 1]; + UNICHAR_ID unichar_id; + LIST Protos; + int NumProtos; + + /* allocate and initialization data structure */ + NormProtos = static_cast<NORM_PROTOS *>(malloc (sizeof (NORM_PROTOS))); + NormProtos->NumProtos = unicharset.size(); + NormProtos->Protos = static_cast<LIST *>(malloc (NormProtos->NumProtos * sizeof(LIST))); + for (i = 0; i < NormProtos->NumProtos; i++) + NormProtos->Protos[i] = NIL_LIST; + + /* read file header and save in data structure */ + NormProtos->NumParams = ReadSampleSize(fp); + NormProtos->ParamDesc = ReadParamDesc(fp, NormProtos->NumParams); + + /* read protos for each class into a separate list */ + const int kMaxLineSize = 100; + char line[kMaxLineSize]; + while (fp->FGets(line, kMaxLineSize) != nullptr) { + std::istringstream stream(line); + stream.imbue(std::locale::classic()); + stream >> unichar >> NumProtos; + if (stream.fail()) { + continue; + } + if (unicharset.contains_unichar(unichar)) { + unichar_id = unicharset.unichar_to_id(unichar); + Protos = NormProtos->Protos[unichar_id]; + for (i = 0; i < NumProtos; i++) + Protos = push_last(Protos, ReadPrototype(fp, NormProtos->NumParams)); + NormProtos->Protos[unichar_id] = Protos; + } else { + tprintf("Error: unichar %s in normproto file is not in unichar set.\n", + unichar); + for (i = 0; i < NumProtos; i++) + FreePrototype(ReadPrototype(fp, NormProtos->NumParams)); + } + } + return (NormProtos); +} /* ReadNormProtos */ + +} // namespace tesseract diff --git a/tesseract/src/classify/normmatch.h b/tesseract/src/classify/normmatch.h new file mode 100644 index 00000000..77f66550 --- /dev/null +++ b/tesseract/src/classify/normmatch.h @@ -0,0 +1,34 @@ +/****************************************************************************** + ** Filename: normmatch.h + ** Purpose: Simple matcher based on character normalization features. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#ifndef NORMMATCH_H +#define NORMMATCH_H + +#include "matchdefs.h" +#include "ocrfeatures.h" +#include "params.h" + +namespace tesseract { + +/* control knobs used to control the normalization adjustment process */ +extern double_VAR_H(classify_norm_adj_midpoint, 32.0, + "Norm adjust midpoint ..."); +extern double_VAR_H(classify_norm_adj_curl, 2.0, "Norm adjust curl ..."); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/classify/ocrfeatures.cpp b/tesseract/src/classify/ocrfeatures.cpp new file mode 100644 index 00000000..b8d646b1 --- /dev/null +++ b/tesseract/src/classify/ocrfeatures.cpp @@ -0,0 +1,190 @@ +/****************************************************************************** + ** Filename: ocrfeatures.cpp + ** Purpose: Generic definition of a feature. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#include "ocrfeatures.h" + +#include "scanutils.h" +#include "strngs.h" // for STRING + +#include <cassert> +#include <cmath> + +namespace tesseract { + +/*---------------------------------------------------------------------------- + Public Code +----------------------------------------------------------------------------*/ +/** + * Add a feature to a feature set. If the feature set is + * already full, false is returned to indicate that the + * feature could not be added to the set; otherwise, true is + * returned. + * @param FeatureSet set of features to add Feature to + * @param Feature feature to be added to FeatureSet + * @return true if feature added to set, false if set is already full. + */ +bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature) { + if (FeatureSet->NumFeatures >= FeatureSet->MaxNumFeatures) { + FreeFeature(Feature); + return false; + } + + FeatureSet->Features[FeatureSet->NumFeatures++] = Feature; + return true; +} /* AddFeature */ + +/** + * Release the memory consumed by the specified feature. + * @param Feature feature to be deallocated. + */ +void FreeFeature(FEATURE Feature) { free(Feature); } /* FreeFeature */ + +/** + * Release the memory consumed by the specified feature + * set. This routine also frees the memory consumed by the + * features contained in the set. + * @param FeatureSet set of features to be freed + */ +void FreeFeatureSet(FEATURE_SET FeatureSet) { + int i; + + if (FeatureSet) { + for (i = 0; i < FeatureSet->NumFeatures; i++) + FreeFeature(FeatureSet->Features[i]); + free(FeatureSet); + } +} /* FreeFeatureSet */ + +/** + * Allocate and return a new feature of the specified + * type. + * @param FeatureDesc description of feature to be created. + * @return New #FEATURE. + */ +FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc) { + FEATURE Feature; + + Feature = static_cast<FEATURE>(malloc(sizeof(FEATURE_STRUCT) + + (FeatureDesc->NumParams - 1) * sizeof(float))); + Feature->Type = FeatureDesc; + return (Feature); + +} /* NewFeature */ + +/** + * Allocate and return a new feature set large enough to + * hold the specified number of features. + * @param NumFeatures maximum # of features to be put in feature set + * @return New #FEATURE_SET. + */ +FEATURE_SET NewFeatureSet(int NumFeatures) { + FEATURE_SET FeatureSet; + + FeatureSet = static_cast<FEATURE_SET>(malloc (sizeof (FEATURE_SET_STRUCT) + + (NumFeatures - 1) * sizeof (FEATURE))); + FeatureSet->MaxNumFeatures = NumFeatures; + FeatureSet->NumFeatures = 0; + return (FeatureSet); + +} /* NewFeatureSet */ + +/** + * Create a new feature of the specified type and read in + * the value of its parameters from File. The extra penalty + * for the feature is also computed by calling the appropriate + * function for the specified feature type. The correct text + * representation for a feature is a list of N floats where + * N is the number of parameters in the feature. + * @param File open text file to read feature from + * @param FeatureDesc specifies type of feature to read from File + * @return New #FEATURE read from File. + */ +static FEATURE ReadFeature(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) { + FEATURE Feature; + int i; + + Feature = NewFeature (FeatureDesc); + for (i = 0; i < Feature->Type->NumParams; i++) { + ASSERT_HOST(tfscanf(File, "%f", &(Feature->Params[i])) == 1); +#ifndef _WIN32 + assert (!std::isnan(Feature->Params[i])); +#endif + } + return Feature; +} + +/** + * Create a new feature set of the specified type and read in + * the features from File. The correct text representation + * for a feature set is an integer which specifies the number (N) + * of features in a set followed by a list of N feature + * descriptions. + * @param File open text file to read new feature set from + * @param FeatureDesc specifies type of feature to read from File + * @return New feature set read from File. + */ +FEATURE_SET ReadFeatureSet(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) { + int NumFeatures; + ASSERT_HOST(tfscanf(File, "%d", &NumFeatures) == 1); + ASSERT_HOST(NumFeatures >= 0); + + FEATURE_SET FeatureSet = NewFeatureSet(NumFeatures); + for (int i = 0; i < NumFeatures; i++) + AddFeature(FeatureSet, ReadFeature(File, FeatureDesc)); + + return FeatureSet; +} + +/** + * Appends a textual representation of Feature to str. + * This representation is simply a list of the N parameters + * of the feature, terminated with a newline. It is assumed + * that the ExtraPenalty field can be reconstructed from the + * parameters of the feature. It is also assumed that the + * feature type information is specified or assumed elsewhere. + * @param Feature feature to write out to str + * @param str string to write Feature to + */ +static void WriteFeature(FEATURE Feature, STRING* str) { + for (int i = 0; i < Feature->Type->NumParams; i++) { +#ifndef WIN32 + assert(!std::isnan(Feature->Params[i])); +#endif + str->add_str_double(" ", Feature->Params[i]); + } + *str += "\n"; +} /* WriteFeature */ + +/** + * Write a textual representation of FeatureSet to File. + * This representation is an integer specifying the number of + * features in the set, followed by a newline, followed by + * text representations for each feature in the set. + * @param FeatureSet feature set to write to File + * @param str string to write Feature to + */ +void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str) { + if (FeatureSet) { + str->add_str_int("", FeatureSet->NumFeatures); + *str += "\n"; + for (int i = 0; i < FeatureSet->NumFeatures; i++) { + WriteFeature(FeatureSet->Features[i], str); + } + } +} /* WriteFeatureSet */ + +} // namespace tesseract diff --git a/tesseract/src/classify/ocrfeatures.h b/tesseract/src/classify/ocrfeatures.h new file mode 100644 index 00000000..edf63496 --- /dev/null +++ b/tesseract/src/classify/ocrfeatures.h @@ -0,0 +1,122 @@ +/****************************************************************************** + ** Filename: features.h + ** Purpose: Generic definition of a feature. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#ifndef FEATURES_H +#define FEATURES_H + +#include "blobs.h" + +#include <cstdio> + +namespace tesseract { + +class DENORM; + +#undef Min +#undef Max +#define FEAT_NAME_SIZE 80 + +// A character is described by multiple sets of extracted features. Each +// set contains a number of features of a particular type, for example, a +// set of bays, or a set of closures, or a set of microfeatures. Each +// feature consists of a number of parameters. All features within a +// feature set contain the same number of parameters. All circular +// parameters are required to be the first parameters in the feature. + +struct PARAM_DESC { + bool Circular; // true if dimension wraps around + bool NonEssential; // true if dimension not used in searches + float Min; // low end of range for circular dimensions + float Max; // high end of range for circular dimensions + float Range; // Max - Min + float HalfRange; // (Max - Min)/2 + float MidRange; // (Max + Min)/2 +}; + +struct FEATURE_DESC_STRUCT { + uint16_t NumParams; // total # of params + const char* ShortName; // short name for feature + const PARAM_DESC* ParamDesc; // array - one per param +}; +using FEATURE_DESC = FEATURE_DESC_STRUCT*; + +struct FEATURE_STRUCT { + const FEATURE_DESC_STRUCT* Type; // points to description of feature type + float Params[1]; // variable size array - params for feature +}; +using FEATURE = FEATURE_STRUCT*; + +struct FEATURE_SET_STRUCT { + uint16_t NumFeatures; // number of features in set + uint16_t MaxNumFeatures; // maximum size of feature set + FEATURE Features[1]; // variable size array of features +}; +using FEATURE_SET = FEATURE_SET_STRUCT*; + +// A generic character description as a char pointer. In reality, it will be +// a pointer to some data structure. Paired feature extractors/matchers need +// to agree on the data structure to be used, however, the high level +// classifier does not need to know the details of this data structure. +using CHAR_FEATURES = char*; + +/*---------------------------------------------------------------------- + Macros for defining the parameters of a new features +----------------------------------------------------------------------*/ +#define StartParamDesc(Name) const PARAM_DESC Name[] = { +#define DefineParam(Circular, NonEssential, Min, Max) \ + {Circular, \ + NonEssential, \ + Min, \ + Max, \ + (Max) - (Min), \ + (((Max) - (Min)) / 2.0), \ + (((Max) + (Min)) / 2.0)}, + +#define EndParamDesc }; + +/*---------------------------------------------------------------------- +Macro for describing a new feature. The parameters of the macro +are as follows: + +DefineFeature (Name, NumLinear, NumCircular, ShortName, ParamName) +----------------------------------------------------------------------*/ +#define DefineFeature(Name, NL, NC, SN, PN) \ + const FEATURE_DESC_STRUCT Name = {((NL) + (NC)), SN, PN}; + +/*---------------------------------------------------------------------- + Generic routines that work for all feature types +----------------------------------------------------------------------*/ +bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature); + +TESS_API +void FreeFeature(FEATURE Feature); + +TESS_API +void FreeFeatureSet(FEATURE_SET FeatureSet); + +TESS_API +FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc); + +FEATURE_SET NewFeatureSet(int NumFeatures); + +FEATURE_SET ReadFeatureSet(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc); + +void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/classify/outfeat.cpp b/tesseract/src/classify/outfeat.cpp new file mode 100644 index 00000000..f4746372 --- /dev/null +++ b/tesseract/src/classify/outfeat.cpp @@ -0,0 +1,168 @@ +/****************************************************************************** + ** Filename: outfeat.c + ** Purpose: Definition of outline-features. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#include "outfeat.h" + +#include "classify.h" +#include "featdefs.h" +#include "mfoutline.h" +#include "ocrfeatures.h" + +#include <cstdio> + +namespace tesseract { + +/*---------------------------------------------------------------------------- + Public Code +----------------------------------------------------------------------------*/ + +/** + * Convert each segment in the outline to a feature + * and return the features. + * @param Blob blob to extract pico-features from + * @return Outline-features for Blob. + * @note Globals: none + */ +FEATURE_SET Classify::ExtractOutlineFeatures(TBLOB *Blob) { + LIST Outlines; + LIST RemainingOutlines; + MFOUTLINE Outline; + FEATURE_SET FeatureSet; + float XScale, YScale; + + FeatureSet = NewFeatureSet (MAX_OUTLINE_FEATURES); + if (Blob == nullptr) + return (FeatureSet); + + Outlines = ConvertBlob (Blob); + + NormalizeOutlines(Outlines, &XScale, &YScale); + RemainingOutlines = Outlines; + iterate(RemainingOutlines) { + Outline = static_cast<MFOUTLINE>first_node (RemainingOutlines); + ConvertToOutlineFeatures(Outline, FeatureSet); + } + if (classify_norm_method == baseline) + NormalizeOutlineX(FeatureSet); + FreeOutlines(Outlines); + return (FeatureSet); +} /* ExtractOutlineFeatures */ + +/*---------------------------------------------------------------------------- + Private Code +----------------------------------------------------------------------------*/ +/*---------------------------------------------------------------------------*/ +/** + * This routine computes the midpoint between Start and + * End to obtain the x,y position of the outline-feature. It + * also computes the direction from Start to End as the + * direction of the outline-feature and the distance from + * Start to End as the length of the outline-feature. + * This feature is then + * inserted into the next feature slot in FeatureSet. + * @param Start starting point of outline-feature + * @param End ending point of outline-feature + * @param FeatureSet set to add outline-feature to + */ +void AddOutlineFeatureToSet(FPOINT *Start, + FPOINT *End, + FEATURE_SET FeatureSet) { + FEATURE Feature; + + Feature = NewFeature(&OutlineFeatDesc); + Feature->Params[OutlineFeatDir] = NormalizedAngleFrom(Start, End, 1.0); + Feature->Params[OutlineFeatX] = AverageOf(Start->x, End->x); + Feature->Params[OutlineFeatY] = AverageOf(Start->y, End->y); + Feature->Params[OutlineFeatLength] = DistanceBetween(*Start, *End); + AddFeature(FeatureSet, Feature); + +} /* AddOutlineFeatureToSet */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine steps converts each section in the specified + * outline to a feature described by its x,y position, length + * and angle. + * Results are returned in FeatureSet. + * @param Outline outline to extract outline-features from + * @param FeatureSet set of features to add outline-features to + */ +void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet) { + MFOUTLINE Next; + MFOUTLINE First; + FPOINT FeatureStart; + FPOINT FeatureEnd; + + if (DegenerateOutline (Outline)) + return; + + First = Outline; + Next = First; + do { + FeatureStart = PointAt(Next)->Point; + Next = NextPointAfter(Next); + + /* note that an edge is hidden if the ending point of the edge is + marked as hidden. This situation happens because the order of + the outlines is reversed when they are converted from the old + format. In the old format, a hidden edge is marked by the + starting point for that edge. */ + if (!PointAt(Next)->Hidden) { + FeatureEnd = PointAt(Next)->Point; + AddOutlineFeatureToSet(&FeatureStart, &FeatureEnd, FeatureSet); + } + } + while (Next != First); +} /* ConvertToOutlineFeatures */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine computes the weighted average x position + * over all of the outline-features in FeatureSet and then + * renormalizes the outline-features to force this average + * to be the x origin (i.e. x=0). + * FeatureSet is changed. + * @param FeatureSet outline-features to be normalized + */ +void NormalizeOutlineX(FEATURE_SET FeatureSet) { + int i; + FEATURE Feature; + float Length; + float TotalX = 0.0; + float TotalWeight = 0.0; + float Origin; + + if (FeatureSet->NumFeatures <= 0) + return; + + for (i = 0; i < FeatureSet->NumFeatures; i++) { + Feature = FeatureSet->Features[i]; + Length = Feature->Params[OutlineFeatLength]; + TotalX += Feature->Params[OutlineFeatX] * Length; + TotalWeight += Length; + } + Origin = TotalX / TotalWeight; + + for (i = 0; i < FeatureSet->NumFeatures; i++) { + Feature = FeatureSet->Features[i]; + Feature->Params[OutlineFeatX] -= Origin; + } +} /* NormalizeOutlineX */ + +} // namespace tesseract diff --git a/tesseract/src/classify/outfeat.h b/tesseract/src/classify/outfeat.h new file mode 100644 index 00000000..eefde2e1 --- /dev/null +++ b/tesseract/src/classify/outfeat.h @@ -0,0 +1,49 @@ +/****************************************************************************** + ** Filename: outfeat.h + ** Purpose: Definition of outline features. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#ifndef OUTFEAT_H +#define OUTFEAT_H + +#include "ocrfeatures.h" +#include "fpoint.h" +#include "mfoutline.h" + +namespace tesseract { + +typedef enum { + OutlineFeatX, + OutlineFeatY, + OutlineFeatLength, + OutlineFeatDir +} OUTLINE_FEAT_PARAM_NAME; + +#define MAX_OUTLINE_FEATURES (100) + +/*--------------------------------------------------------------------------- + Privat Function Prototypes +----------------------------------------------------------------------------*/ +void AddOutlineFeatureToSet(FPOINT *Start, + FPOINT *End, + FEATURE_SET FeatureSet); + +void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet); + +void NormalizeOutlineX(FEATURE_SET FeatureSet); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/classify/picofeat.cpp b/tesseract/src/classify/picofeat.cpp new file mode 100644 index 00000000..17f5e66d --- /dev/null +++ b/tesseract/src/classify/picofeat.cpp @@ -0,0 +1,264 @@ +/****************************************************************************** + ** Filename: picofeat.c + ** Purpose: Definition of pico-features. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#include "picofeat.h" + +#include "classify.h" +#include "featdefs.h" +#include "fpoint.h" +#include "mfoutline.h" +#include "ocrfeatures.h" +#include "params.h" +#include "trainingsample.h" + +#include <cmath> +#include <cstdio> + +namespace tesseract { + +/*--------------------------------------------------------------------------- + Variables +----------------------------------------------------------------------------*/ + +double_VAR(classify_pico_feature_length, 0.05, "Pico Feature Length"); + +/*--------------------------------------------------------------------------- + Private Function Prototypes +----------------------------------------------------------------------------*/ +void ConvertSegmentToPicoFeat(FPOINT *Start, + FPOINT *End, + FEATURE_SET FeatureSet); + +void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet); + +void NormalizePicoX(FEATURE_SET FeatureSet); + +/*---------------------------------------------------------------------------- + Public Code +----------------------------------------------------------------------------*/ +/*---------------------------------------------------------------------------*/ +/** + * Operation: Dummy for now. + * + * Globals: + * - classify_norm_method normalization method currently specified + * @param Blob blob to extract pico-features from + * @return Pico-features for Blob. + */ +FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) { + LIST Outlines; + LIST RemainingOutlines; + MFOUTLINE Outline; + FEATURE_SET FeatureSet; + float XScale, YScale; + + FeatureSet = NewFeatureSet(MAX_PICO_FEATURES); + Outlines = ConvertBlob(Blob); + NormalizeOutlines(Outlines, &XScale, &YScale); + RemainingOutlines = Outlines; + iterate(RemainingOutlines) { + Outline = static_cast<MFOUTLINE>first_node (RemainingOutlines); + ConvertToPicoFeatures2(Outline, FeatureSet); + } + if (classify_norm_method == baseline) + NormalizePicoX(FeatureSet); + FreeOutlines(Outlines); + return (FeatureSet); + +} /* ExtractPicoFeatures */ + +/*---------------------------------------------------------------------------- + Private Code +----------------------------------------------------------------------------*/ +/*---------------------------------------------------------------------------*/ +/** + * This routine converts an entire segment of an outline + * into a set of pico features which are added to + * FeatureSet. The length of the segment is rounded to the + * nearest whole number of pico-features. The pico-features + * are spaced evenly over the entire segment. + * Results are placed in FeatureSet. + * Globals: + * - classify_pico_feature_length length of a single pico-feature + * @param Start starting point of pico-feature + * @param End ending point of pico-feature + * @param FeatureSet set to add pico-feature to + */ +void ConvertSegmentToPicoFeat(FPOINT *Start, + FPOINT *End, + FEATURE_SET FeatureSet) { + FEATURE Feature; + float Angle; + float Length; + int NumFeatures; + FPOINT Center; + FPOINT Delta; + int i; + + Angle = NormalizedAngleFrom (Start, End, 1.0); + Length = DistanceBetween (*Start, *End); + NumFeatures = static_cast<int>(floor (Length / classify_pico_feature_length + 0.5)); + if (NumFeatures < 1) + NumFeatures = 1; + + /* compute vector for one pico feature */ + Delta.x = XDelta (*Start, *End) / NumFeatures; + Delta.y = YDelta (*Start, *End) / NumFeatures; + + /* compute position of first pico feature */ + Center.x = Start->x + Delta.x / 2.0; + Center.y = Start->y + Delta.y / 2.0; + + /* compute each pico feature in segment and add to feature set */ + for (i = 0; i < NumFeatures; i++) { + Feature = NewFeature (&PicoFeatDesc); + Feature->Params[PicoFeatDir] = Angle; + Feature->Params[PicoFeatX] = Center.x; + Feature->Params[PicoFeatY] = Center.y; + AddFeature(FeatureSet, Feature); + + Center.x += Delta.x; + Center.y += Delta.y; + } +} /* ConvertSegmentToPicoFeat */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine steps through the specified outline and cuts it + * up into pieces of equal length. These pieces become the + * desired pico-features. Each segment in the outline + * is converted into an integral number of pico-features. + * Results are returned in FeatureSet. + * + * Globals: + * - classify_pico_feature_length length of features to be extracted + * @param Outline outline to extract micro-features from + * @param FeatureSet set of features to add pico-features to + */ +void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) { + MFOUTLINE Next; + MFOUTLINE First; + MFOUTLINE Current; + + if (DegenerateOutline(Outline)) + return; + + First = Outline; + Current = First; + Next = NextPointAfter(Current); + do { + /* note that an edge is hidden if the ending point of the edge is + marked as hidden. This situation happens because the order of + the outlines is reversed when they are converted from the old + format. In the old format, a hidden edge is marked by the + starting point for that edge. */ + if (!(PointAt(Next)->Hidden)) + ConvertSegmentToPicoFeat (&(PointAt(Current)->Point), + &(PointAt(Next)->Point), FeatureSet); + + Current = Next; + Next = NextPointAfter(Current); + } + while (Current != First); + +} /* ConvertToPicoFeatures2 */ + + +/*---------------------------------------------------------------------------*/ +/** + * This routine computes the average x position over all + * of the pico-features in FeatureSet and then renormalizes + * the pico-features to force this average to be the x origin + * (i.e. x=0). + * FeatureSet is changed. + * @param FeatureSet pico-features to be normalized + */ +void NormalizePicoX(FEATURE_SET FeatureSet) { + int i; + FEATURE Feature; + float Origin = 0.0; + + for (i = 0; i < FeatureSet->NumFeatures; i++) { + Feature = FeatureSet->Features[i]; + Origin += Feature->Params[PicoFeatX]; + } + Origin /= FeatureSet->NumFeatures; + + for (i = 0; i < FeatureSet->NumFeatures; i++) { + Feature = FeatureSet->Features[i]; + Feature->Params[PicoFeatX] -= Origin; + } +} /* NormalizePicoX */ + +/*---------------------------------------------------------------------------*/ +/** + * @param blob blob to extract features from + * @param fx_info + * @return Integer character-normalized features for blob. + */ +FEATURE_SET Classify::ExtractIntCNFeatures( + const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) { + INT_FX_RESULT_STRUCT local_fx_info(fx_info); + std::vector<INT_FEATURE_STRUCT> bl_features; + tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample( + blob, false, &local_fx_info, &bl_features); + if (sample == nullptr) return nullptr; + + uint32_t num_features = sample->num_features(); + const INT_FEATURE_STRUCT* features = sample->features(); + FEATURE_SET feature_set = NewFeatureSet(num_features); + for (uint32_t f = 0; f < num_features; ++f) { + FEATURE feature = NewFeature(&IntFeatDesc); + + feature->Params[IntX] = features[f].X; + feature->Params[IntY] = features[f].Y; + feature->Params[IntDir] = features[f].Theta; + AddFeature(feature_set, feature); + } + delete sample; + + return feature_set; +} /* ExtractIntCNFeatures */ + +/*---------------------------------------------------------------------------*/ +/** + * @param blob blob to extract features from + * @param fx_info + * @return Geometric (top/bottom/width) features for blob. + */ +FEATURE_SET Classify::ExtractIntGeoFeatures( + const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) { + INT_FX_RESULT_STRUCT local_fx_info(fx_info); + std::vector<INT_FEATURE_STRUCT> bl_features; + tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample( + blob, false, &local_fx_info, &bl_features); + if (sample == nullptr) return nullptr; + + FEATURE_SET feature_set = NewFeatureSet(1); + FEATURE feature = NewFeature(&IntFeatDesc); + + feature->Params[GeoBottom] = sample->geo_feature(GeoBottom); + feature->Params[GeoTop] = sample->geo_feature(GeoTop); + feature->Params[GeoWidth] = sample->geo_feature(GeoWidth); + AddFeature(feature_set, feature); + delete sample; + + return feature_set; +} /* ExtractIntGeoFeatures */ + +} // namespace tesseract. diff --git a/tesseract/src/classify/picofeat.h b/tesseract/src/classify/picofeat.h new file mode 100644 index 00000000..d5e7786e --- /dev/null +++ b/tesseract/src/classify/picofeat.h @@ -0,0 +1,65 @@ +/****************************************************************************** + ** Filename: picofeat.h + ** Purpose: Definition of pico features. + ** Author: Dan Johnson + ** History: 9/4/90, DSJ, Created. + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#ifndef PICOFEAT_H +#define PICOFEAT_H + +#include "ocrfeatures.h" +#include "params.h" + +namespace tesseract { + +// Enum for the order/type of params in IntFeatDesc. +enum IntParams { + IntX, // x-position (0-255). + IntY, // y-position (0-255). + IntDir // Direction (0-255, circular). +}; + +// Enum for the order/type of params in GeoFeatDesc. +enum GeoParams { + GeoBottom, // Bounding box bottom in baseline space (0-255). + GeoTop, // Bounding box top in baseline space (0-255). + GeoWidth, // Bounding box width in baseline space (0-255). + + GeoCount // Number of geo features. +}; + +typedef enum { PicoFeatY, PicoFeatDir, PicoFeatX } PICO_FEAT_PARAM_NAME; + +#define MAX_PICO_FEATURES (1000) + +/*--------------------------------------------------------------------------- + Variables +----------------------------------------------------------------------------*/ + +extern double_VAR_H(classify_pico_feature_length, 0.05, "Pico Feature Length"); + +/**---------------------------------------------------------------------------- + Public Function Prototypes +----------------------------------------------------------------------------**/ +#define GetPicoFeatureLength() (PicoFeatureLength) + +/**---------------------------------------------------------------------------- + Global Data Definitions and Declarations +----------------------------------------------------------------------------**/ +extern TESS_API float PicoFeatureLength; + +} // namespace tesseract + +#endif diff --git a/tesseract/src/classify/protos.cpp b/tesseract/src/classify/protos.cpp new file mode 100644 index 00000000..5cbe4b2e --- /dev/null +++ b/tesseract/src/classify/protos.cpp @@ -0,0 +1,178 @@ +/****************************************************************************** + * + * File: protos.cpp (Formerly protos.c) + * Author: Mark Seaman, OCR Technology + * + * (c) Copyright 1987, Hewlett-Packard Company. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + *****************************************************************************/ +/*---------------------------------------------------------------------- + I n c l u d e s +----------------------------------------------------------------------*/ +#define _USE_MATH_DEFINES // for M_PI + +#include "protos.h" + +#include "tprintf.h" +#include "classify.h" +#include "params.h" +#include "intproto.h" + +#include <cmath> // for M_PI +#include <cstdio> + +namespace tesseract { + +#define PROTO_INCREMENT 32 +#define CONFIG_INCREMENT 16 + +/*---------------------------------------------------------------------- + F u n c t i o n s +----------------------------------------------------------------------*/ +/** + * @name AddConfigToClass + * + * Add a new config to this class. Malloc new space and copy the + * old configs if necessary. Return the config id for the new config. + * + * @param Class The class to add to + */ +int AddConfigToClass(CLASS_TYPE Class) { + int NewNumConfigs; + int NewConfig; + int MaxNumProtos; + BIT_VECTOR Config; + + MaxNumProtos = Class->MaxNumProtos; + ASSERT_HOST(MaxNumProtos <= MAX_NUM_PROTOS); + + if (Class->NumConfigs >= Class->MaxNumConfigs) { + /* add configs in CONFIG_INCREMENT chunks at a time */ + NewNumConfigs = (((Class->MaxNumConfigs + CONFIG_INCREMENT) / + CONFIG_INCREMENT) * CONFIG_INCREMENT); + + Class->Configurations = + static_cast<CONFIGS>(realloc (Class->Configurations, + sizeof (BIT_VECTOR) * NewNumConfigs)); + + Class->MaxNumConfigs = NewNumConfigs; + } + NewConfig = Class->NumConfigs++; + Config = NewBitVector(MAX_NUM_PROTOS); + Class->Configurations[NewConfig] = Config; + zero_all_bits (Config, WordsInVectorOfSize(MAX_NUM_PROTOS)); + + return (NewConfig); +} + + +/** + * @name AddProtoToClass + * + * Add a new proto to this class. Malloc new space and copy the + * old protos if necessary. Return the proto id for the new proto. + * + * @param Class The class to add to + */ +int AddProtoToClass(CLASS_TYPE Class) { + if (Class->NumProtos >= Class->MaxNumProtos) { + /* add protos in PROTO_INCREMENT chunks at a time */ + int NewNumProtos = (((Class->MaxNumProtos + PROTO_INCREMENT) / + PROTO_INCREMENT) * PROTO_INCREMENT); + + Class->Prototypes = static_cast<PROTO>(realloc (Class->Prototypes, + sizeof (PROTO_STRUCT) * + NewNumProtos)); + + Class->MaxNumProtos = NewNumProtos; + ASSERT_HOST(NewNumProtos <= MAX_NUM_PROTOS); + } + int NewProto = Class->NumProtos++; + ASSERT_HOST(Class->NumProtos <= MAX_NUM_PROTOS); + return (NewProto); +} + + +/********************************************************************** + * FillABC + * + * Fill in Protos A, B, C fields based on the X, Y, Angle fields. + **********************************************************************/ +void FillABC(PROTO Proto) { + float Slope, Intercept, Normalizer; + + Slope = tan(Proto->Angle * 2.0 * M_PI); + Intercept = Proto->Y - Slope * Proto->X; + Normalizer = 1.0 / sqrt (Slope * Slope + 1.0); + Proto->A = Slope * Normalizer; + Proto->B = -Normalizer; + Proto->C = Intercept * Normalizer; +} + + +/********************************************************************** + * FreeClass + * + * Deallocate the memory consumed by the specified class. + **********************************************************************/ +void FreeClass(CLASS_TYPE Class) { + if (Class) { + FreeClassFields(Class); + delete Class; + } +} + + +/********************************************************************** + * FreeClassFields + * + * Deallocate the memory consumed by subfields of the specified class. + **********************************************************************/ +void FreeClassFields(CLASS_TYPE Class) { + int i; + + if (Class) { + if (Class->MaxNumProtos > 0) free(Class->Prototypes); + if (Class->MaxNumConfigs > 0) { + for (i = 0; i < Class->NumConfigs; i++) + FreeBitVector (Class->Configurations[i]); + free(Class->Configurations); + } + } +} + +/********************************************************************** + * NewClass + * + * Allocate a new class with enough memory to hold the specified number + * of prototypes and configurations. + **********************************************************************/ +CLASS_TYPE NewClass(int NumProtos, int NumConfigs) { + CLASS_TYPE Class; + + Class = new CLASS_STRUCT; + + if (NumProtos > 0) + Class->Prototypes = static_cast<PROTO>(malloc (NumProtos * sizeof (PROTO_STRUCT))); + + if (NumConfigs > 0) + Class->Configurations = static_cast<CONFIGS>(malloc (NumConfigs * + sizeof (BIT_VECTOR))); + Class->MaxNumProtos = NumProtos; + Class->MaxNumConfigs = NumConfigs; + Class->NumProtos = 0; + Class->NumConfigs = 0; + return (Class); + +} + +} // namespace tesseract diff --git a/tesseract/src/classify/protos.h b/tesseract/src/classify/protos.h new file mode 100644 index 00000000..ae35b194 --- /dev/null +++ b/tesseract/src/classify/protos.h @@ -0,0 +1,107 @@ +/****************************************************************************** + * + * File: protos.h + * Author: Mark Seaman, SW Productivity + * + * (c) Copyright 1987, Hewlett-Packard Company. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + *****************************************************************************/ + +#ifndef PROTOS_H +#define PROTOS_H + +#include "bitvec.h" +#include "params.h" +#include "unicity_table.h" + +#include <tesseract/unichar.h> + +namespace tesseract { + +using CONFIGS = BIT_VECTOR*; + +typedef struct { + float A; + float B; + float C; + float X; + float Y; + float Angle; + float Length; +} PROTO_STRUCT; +using PROTO = PROTO_STRUCT*; + +struct CLASS_STRUCT { + CLASS_STRUCT() + : NumProtos(0), + MaxNumProtos(0), + NumConfigs(0), + MaxNumConfigs(0), + Prototypes(nullptr), + Configurations(nullptr) {} + int16_t NumProtos; + int16_t MaxNumProtos; + int16_t NumConfigs; + int16_t MaxNumConfigs; + PROTO Prototypes; + CONFIGS Configurations; + UnicityTable<int> font_set; +}; +using CLASS_TYPE = CLASS_STRUCT*; +using CLASSES = CLASS_STRUCT*; + +/*---------------------------------------------------------------------- + M a c r o s +----------------------------------------------------------------------*/ +/** + * AddProtoToConfig + * + * Set a single proto bit in the specified configuration. + */ + +#define AddProtoToConfig(Pid, Config) (SET_BIT(Config, Pid)) + +/** + * ProtoIn + * + * Choose the selected prototype in this class record. Return the + * pointer to it (type PROTO). + */ + +#define ProtoIn(Class, Pid) (&(Class)->Prototypes[Pid]) + +/*---------------------------------------------------------------------- + F u n c t i o n s +----------------------------------------------------------------------*/ +TESS_API +int AddConfigToClass(CLASS_TYPE Class); + +TESS_API +int AddProtoToClass(CLASS_TYPE Class); + +TESS_API +void FillABC(PROTO Proto); + +TESS_API +void FreeClass(CLASS_TYPE Class); + +TESS_API +void FreeClassFields(CLASS_TYPE Class); + +void InitPrototypes(); + +TESS_API +CLASS_TYPE NewClass(int NumProtos, int NumConfigs); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/classify/shapeclassifier.cpp b/tesseract/src/classify/shapeclassifier.cpp new file mode 100644 index 00000000..b1091a53 --- /dev/null +++ b/tesseract/src/classify/shapeclassifier.cpp @@ -0,0 +1,234 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// Author: rays@google.com (Ray Smith) +/////////////////////////////////////////////////////////////////////// +// File: shapeclassifier.cpp +// Description: Base interface class for classifiers that return a +// shape index. +// Author: Ray Smith +// +// (C) Copyright 2011, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "shapeclassifier.h" + +#include "scrollview.h" +#include "shapetable.h" +#include "svmnode.h" +#include "trainingsample.h" +#include "tprintf.h" + +#include "genericvector.h" + +namespace tesseract { + +// Classifies the given [training] sample, writing to results. +// See shapeclassifier.h for a full description. +// Default implementation calls the ShapeRating version. +int ShapeClassifier::UnicharClassifySample( + const TrainingSample& sample, Pix* page_pix, int debug, + UNICHAR_ID keep_this, std::vector<UnicharRating>* results) { + results->clear(); + std::vector<ShapeRating> shape_results; + int num_shape_results = ClassifySample(sample, page_pix, debug, keep_this, + &shape_results); + const ShapeTable* shapes = GetShapeTable(); + GenericVector<int> unichar_map; + unichar_map.init_to_size(shapes->unicharset().size(), -1); + for (int r = 0; r < num_shape_results; ++r) { + shapes->AddShapeToResults(shape_results[r], &unichar_map, results); + } + return results->size(); +} + +// Classifies the given [training] sample, writing to results. +// See shapeclassifier.h for a full description. +// Default implementation aborts. +int ShapeClassifier::ClassifySample(const TrainingSample& sample, Pix* page_pix, + int debug, int keep_this, + std::vector<ShapeRating>* results) { + ASSERT_HOST("Must implement ClassifySample!" == nullptr); + return 0; +} + +// Returns the shape that contains unichar_id that has the best result. +// If result is not nullptr, it is set with the shape_id and rating. +// Does not need to be overridden if ClassifySample respects the keep_this +// rule. +int ShapeClassifier::BestShapeForUnichar(const TrainingSample& sample, + Pix* page_pix, UNICHAR_ID unichar_id, + ShapeRating* result) { + std::vector<ShapeRating> results; + const ShapeTable* shapes = GetShapeTable(); + int num_results = ClassifySample(sample, page_pix, 0, unichar_id, &results); + for (int r = 0; r < num_results; ++r) { + if (shapes->GetShape(results[r].shape_id).ContainsUnichar(unichar_id)) { + if (result != nullptr) + *result = results[r]; + return results[r].shape_id; + } + } + return -1; +} + +// Provides access to the UNICHARSET that this classifier works with. +// Only needs to be overridden if GetShapeTable() can return nullptr. +const UNICHARSET& ShapeClassifier::GetUnicharset() const { + return GetShapeTable()->unicharset(); +} + +#ifndef GRAPHICS_DISABLED + +// Visual debugger classifies the given sample, displays the results and +// solicits user input to display other classifications. Returns when +// the user has finished with debugging the sample. +// Probably doesn't need to be overridden if the subclass provides +// DisplayClassifyAs. +void ShapeClassifier::DebugDisplay(const TrainingSample& sample, + Pix* page_pix, + UNICHAR_ID unichar_id) { + static ScrollView* terminator = nullptr; + if (terminator == nullptr) { + terminator = new ScrollView("XIT", 0, 0, 50, 50, 50, 50, true); + } + ScrollView* debug_win = CreateFeatureSpaceWindow("ClassifierDebug", 0, 0); + // Provide a right-click menu to choose the class. + auto* popup_menu = new SVMenuNode(); + popup_menu->AddChild("Choose class to debug", 0, "x", "Class to debug"); + popup_menu->BuildMenu(debug_win, false); + // Display the features in green. + const INT_FEATURE_STRUCT* features = sample.features(); + uint32_t num_features = sample.num_features(); + for (uint32_t f = 0; f < num_features; ++f) { + RenderIntFeature(debug_win, &features[f], ScrollView::GREEN); + } + debug_win->Update(); + std::vector<UnicharRating> results; + // Debug classification until the user quits. + const UNICHARSET& unicharset = GetUnicharset(); + SVEvent* ev; + SVEventType ev_type; + do { + PointerVector<ScrollView> windows; + if (unichar_id >= 0) { + tprintf("Debugging class %d = %s\n", + unichar_id, unicharset.id_to_unichar(unichar_id)); + UnicharClassifySample(sample, page_pix, 1, unichar_id, &results); + DisplayClassifyAs(sample, page_pix, unichar_id, 1, &windows); + } else { + tprintf("Invalid unichar_id: %d\n", unichar_id); + UnicharClassifySample(sample, page_pix, 1, -1, &results); + } + if (unichar_id >= 0) { + tprintf("Debugged class %d = %s\n", + unichar_id, unicharset.id_to_unichar(unichar_id)); + } + tprintf("Right-click in ClassifierDebug window to choose debug class,"); + tprintf(" Left-click or close window to quit...\n"); + UNICHAR_ID old_unichar_id; + do { + old_unichar_id = unichar_id; + ev = debug_win->AwaitEvent(SVET_ANY); + ev_type = ev->type; + if (ev_type == SVET_POPUP) { + if (unicharset.contains_unichar(ev->parameter)) { + unichar_id = unicharset.unichar_to_id(ev->parameter); + } else { + tprintf("Char class '%s' not found in unicharset", ev->parameter); + } + } + delete ev; + } while (unichar_id == old_unichar_id && + ev_type != SVET_CLICK && ev_type != SVET_DESTROY); + } while (ev_type != SVET_CLICK && ev_type != SVET_DESTROY); + delete debug_win; +} + +#endif // !GRAPHICS_DISABLED + +// Displays classification as the given shape_id. Creates as many windows +// as it feels fit, using index as a guide for placement. Adds any created +// windows to the windows output and returns a new index that may be used +// by any subsequent classifiers. Caller waits for the user to view and +// then destroys the windows by clearing the vector. +int ShapeClassifier::DisplayClassifyAs( + const TrainingSample& sample, Pix* page_pix, + UNICHAR_ID unichar_id, int index, + PointerVector<ScrollView>* windows) { + // Does nothing in the default implementation. + return index; +} + +// Prints debug information on the results. +void ShapeClassifier::UnicharPrintResults( + const char* context, const std::vector<UnicharRating>& results) const { + tprintf("%s\n", context); + for (int i = 0; i < results.size(); ++i) { + tprintf("%g: c_id=%d=%s", results[i].rating, results[i].unichar_id, + GetUnicharset().id_to_unichar(results[i].unichar_id)); + if (!results[i].fonts.empty()) { + tprintf(" Font Vector:"); + for (int f = 0; f < results[i].fonts.size(); ++f) { + tprintf(" %d", results[i].fonts[f].fontinfo_id); + } + } + tprintf("\n"); + } +} +void ShapeClassifier::PrintResults( + const char* context, const std::vector<ShapeRating>& results) const { + tprintf("%s\n", context); + for (int i = 0; i < results.size(); ++i) { + tprintf("%g:", results[i].rating); + if (results[i].joined) + tprintf("[J]"); + if (results[i].broken) + tprintf("[B]"); + tprintf(" %s\n", GetShapeTable()->DebugStr(results[i].shape_id).c_str()); + } +} + +// Removes any result that has all its unichars covered by a better choice, +// regardless of font. +void ShapeClassifier::FilterDuplicateUnichars( + std::vector<ShapeRating>* results) const { + std::vector<ShapeRating> filtered_results; + // Copy results to filtered results and knock out duplicate unichars. + const ShapeTable* shapes = GetShapeTable(); + for (int r = 0; r < results->size(); ++r) { + if (r > 0) { + const Shape& shape_r = shapes->GetShape((*results)[r].shape_id); + int c; + for (c = 0; c < shape_r.size(); ++c) { + int unichar_id = shape_r[c].unichar_id; + int s; + for (s = 0; s < r; ++s) { + const Shape& shape_s = shapes->GetShape((*results)[s].shape_id); + if (shape_s.ContainsUnichar(unichar_id)) + break; // We found unichar_id. + } + if (s == r) + break; // We didn't find unichar_id. + } + if (c == shape_r.size()) + continue; // We found all the unichar ids in previous answers. + } + filtered_results.push_back((*results)[r]); + } + *results = filtered_results; +} + +} // namespace tesseract. diff --git a/tesseract/src/classify/shapeclassifier.h b/tesseract/src/classify/shapeclassifier.h new file mode 100644 index 00000000..776880fc --- /dev/null +++ b/tesseract/src/classify/shapeclassifier.h @@ -0,0 +1,121 @@ +/////////////////////////////////////////////////////////////////////// +// File: shapeclassifier.h +// Description: Base interface class for classifiers that return a +// shape index. +// Author: Ray Smith +// +// (C) Copyright 2011, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_ +#define TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_ + +#include <tesseract/unichar.h> + +struct Pix; + +namespace tesseract { + +class ScrollView; +class UNICHARSET; + +template <typename T> class PointerVector; +struct ShapeRating; +class ShapeTable; +class TrainingSample; +class TrainingSampleSet; +struct UnicharRating; + +// Interface base class for classifiers that produce ShapeRating results. +class TESS_API ShapeClassifier { + public: + virtual ~ShapeClassifier() = default; + + // Classifies the given [training] sample, writing to results. + // If page_pix is not nullptr, the overriding function may call + // sample.GetSamplePix(padding, page_pix) to get an image of the sample + // padded (with real image data) by the given padding to extract features + // from the image of the character. Other members of TrainingSample: + // features(), micro_features(), cn_feature(), geo_feature() may be used + // to get the appropriate tesseract features. + // If debug is non-zero, then various degrees of classifier dependent debug + // information is provided. + // If keep_this (a UNICHAR_ID) is >= 0, then the results should always + // contain keep_this, and (if possible) anything of intermediate confidence. + // (Used for answering "Why didn't it get that right?" questions.) It must + // be a UNICHAR_ID as the callers have no clue how to choose the best shape + // that may contain a desired answer. + // The return value is the number of classes saved in results. + // NOTE that overriding functions MUST clear and sort the results by + // descending rating unless the classifier is working with a team of such + // classifiers. + // NOTE: Neither overload of ClassifySample is pure, but at least one must + // be overridden by a classifier in order for it to do anything. + virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix, + int debug, UNICHAR_ID keep_this, + std::vector<UnicharRating>* results); + + protected: + virtual int ClassifySample(const TrainingSample& sample, Pix* page_pix, + int debug, UNICHAR_ID keep_this, + std::vector<ShapeRating>* results); + + public: + // Returns the shape that contains unichar_id that has the best result. + // If result is not nullptr, it is set with the shape_id and rating. + // Returns -1 if ClassifySample fails to provide any result containing + // unichar_id. BestShapeForUnichar does not need to be overridden if + // ClassifySample respects the keep_this rule. + virtual int BestShapeForUnichar(const TrainingSample& sample, Pix* page_pix, + UNICHAR_ID unichar_id, ShapeRating* result); + + // Provides access to the ShapeTable that this classifier works with. + virtual const ShapeTable* GetShapeTable() const = 0; + // Provides access to the UNICHARSET that this classifier works with. + // Must be overridden IFF GetShapeTable() returns nullptr. + virtual const UNICHARSET& GetUnicharset() const; + + // Visual debugger classifies the given sample, displays the results and + // solicits user input to display other classifications. Returns when + // the user has finished with debugging the sample. + // Probably doesn't need to be overridden if the subclass provides + // DisplayClassifyAs. + void DebugDisplay(const TrainingSample& sample, Pix* page_pix, + UNICHAR_ID unichar_id); + + + // Displays classification as the given unichar_id. Creates as many windows + // as it feels fit, using index as a guide for placement. Adds any created + // windows to the windows output and returns a new index that may be used + // by any subsequent classifiers. Caller waits for the user to view and + // then destroys the windows by clearing the vector. + virtual int DisplayClassifyAs(const TrainingSample& sample, Pix* page_pix, + UNICHAR_ID unichar_id, int index, + PointerVector<ScrollView>* windows); + + // Prints debug information on the results. context is some introductory/title + // message. + virtual void UnicharPrintResults( + const char* context, const std::vector<UnicharRating>& results) const; + virtual void PrintResults(const char* context, + const std::vector<ShapeRating>& results) const; + + protected: + // Removes any result that has all its unichars covered by a better choice, + // regardless of font. + void FilterDuplicateUnichars(std::vector<ShapeRating>* results) const; +}; + +} // namespace tesseract. + +#endif // TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_ diff --git a/tesseract/src/classify/shapetable.cpp b/tesseract/src/classify/shapetable.cpp new file mode 100644 index 00000000..c68f5d82 --- /dev/null +++ b/tesseract/src/classify/shapetable.cpp @@ -0,0 +1,727 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// Author: rays@google.com (Ray Smith) +/////////////////////////////////////////////////////////////////////// +// File: shapetable.cpp +// Description: Class to map a classifier shape index to unicharset +// indices and font indices. +// Author: Ray Smith +// Created: Tue Nov 02 15:31:32 PDT 2010 +// +// (C) Copyright 2010, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "shapetable.h" + +#include "bitvector.h" +#include "fontinfo.h" +#include "intfeaturespace.h" +#include "strngs.h" +#include "unicharset.h" +#include "unicity_table.h" + +#include <algorithm> + +namespace tesseract { + +// Helper function to get the index of the first result with the required +// unichar_id. If the results are sorted by rating, this will also be the +// best result with the required unichar_id. +// Returns -1 if the unichar_id is not found +int ShapeRating::FirstResultWithUnichar( + const GenericVector<ShapeRating>& results, + const ShapeTable& shape_table, + UNICHAR_ID unichar_id) { + for (int r = 0; r < results.size(); ++r) { + const int shape_id = results[r].shape_id; + const Shape& shape = shape_table.GetShape(shape_id); + if (shape.ContainsUnichar(unichar_id)) { + return r; + } + } + return -1; +} + +// Helper function to get the index of the first result with the required +// unichar_id. If the results are sorted by rating, this will also be the +// best result with the required unichar_id. +// Returns -1 if the unichar_id is not found +int UnicharRating::FirstResultWithUnichar( + const GenericVector<UnicharRating>& results, + UNICHAR_ID unichar_id) { + for (int r = 0; r < results.size(); ++r) { + if (results[r].unichar_id == unichar_id) + return r; + } + return -1; +} + +// Writes to the given file. Returns false in case of error. +bool UnicharAndFonts::Serialize(FILE* fp) const { + return tesseract::Serialize(fp, &unichar_id) && font_ids.Serialize(fp); +} +// Reads from the given file. Returns false in case of error. + +bool UnicharAndFonts::DeSerialize(TFile* fp) { + return fp->DeSerialize(&unichar_id) && font_ids.DeSerialize(fp); +} + +// Sort function to sort a pair of UnicharAndFonts by unichar_id. +int UnicharAndFonts::SortByUnicharId(const void* v1, const void* v2) { + const auto* p1 = static_cast<const UnicharAndFonts*>(v1); + const auto* p2 = static_cast<const UnicharAndFonts*>(v2); + return p1->unichar_id - p2->unichar_id; +} + +// Writes to the given file. Returns false in case of error. +bool Shape::Serialize(FILE* fp) const { + uint8_t sorted = unichars_sorted_; + return tesseract::Serialize(fp, &sorted) && unichars_.SerializeClasses(fp); +} +// Reads from the given file. Returns false in case of error. + +bool Shape::DeSerialize(TFile* fp) { + uint8_t sorted; + if (!fp->DeSerialize(&sorted)) return false; + unichars_sorted_ = sorted != 0; + return unichars_.DeSerializeClasses(fp); +} + +// Adds a font_id for the given unichar_id. If the unichar_id is not +// in the shape, it is added. +void Shape::AddToShape(int unichar_id, int font_id) { + for (int c = 0; c < unichars_.size(); ++c) { + if (unichars_[c].unichar_id == unichar_id) { + // Found the unichar in the shape table. + GenericVector<int>& font_list = unichars_[c].font_ids; + for (int f = 0; f < font_list.size(); ++f) { + if (font_list[f] == font_id) + return; // Font is already there. + } + font_list.push_back(font_id); + return; + } + } + // Unichar_id is not in shape, so add it to shape. + unichars_.push_back(UnicharAndFonts(unichar_id, font_id)); + unichars_sorted_ = unichars_.size() <= 1; +} + +// Adds everything in other to this. +void Shape::AddShape(const Shape& other) { + for (int c = 0; c < other.unichars_.size(); ++c) { + for (int f = 0; f < other.unichars_[c].font_ids.size(); ++f) { + AddToShape(other.unichars_[c].unichar_id, + other.unichars_[c].font_ids[f]); + } + } + unichars_sorted_ = unichars_.size() <= 1; +} + +// Returns true if the shape contains the given unichar_id, font_id pair. +bool Shape::ContainsUnicharAndFont(int unichar_id, int font_id) const { + for (int c = 0; c < unichars_.size(); ++c) { + if (unichars_[c].unichar_id == unichar_id) { + // Found the unichar, so look for the font. + auto &font_list = unichars_[c].font_ids; + for (int f = 0; f < font_list.size(); ++f) { + if (font_list[f] == font_id) + return true; + } + return false; + } + } + return false; +} + +// Returns true if the shape contains the given unichar_id, ignoring font. +bool Shape::ContainsUnichar(int unichar_id) const { + for (int c = 0; c < unichars_.size(); ++c) { + if (unichars_[c].unichar_id == unichar_id) { + return true; + } + } + return false; +} + +// Returns true if the shape contains the given font, ignoring unichar_id. +bool Shape::ContainsFont(int font_id) const { + for (int c = 0; c < unichars_.size(); ++c) { + auto &font_list = unichars_[c].font_ids; + for (int f = 0; f < font_list.size(); ++f) { + if (font_list[f] == font_id) + return true; + } + } + return false; +} +// Returns true if the shape contains the given font properties, ignoring +// unichar_id. +bool Shape::ContainsFontProperties(const FontInfoTable& font_table, + uint32_t properties) const { + for (int c = 0; c < unichars_.size(); ++c) { + auto &font_list = unichars_[c].font_ids; + for (int f = 0; f < font_list.size(); ++f) { + if (font_table.get(font_list[f]).properties == properties) + return true; + } + } + return false; +} +// Returns true if the shape contains multiple different font properties, +// ignoring unichar_id. +bool Shape::ContainsMultipleFontProperties( + const FontInfoTable& font_table) const { + uint32_t properties = font_table.get(unichars_[0].font_ids[0]).properties; + for (int c = 0; c < unichars_.size(); ++c) { + auto &font_list = unichars_[c].font_ids; + for (int f = 0; f < font_list.size(); ++f) { + if (font_table.get(font_list[f]).properties != properties) + return true; + } + } + return false; +} + +// Returns true if this shape is equal to other (ignoring order of unichars +// and fonts). +bool Shape::operator==(const Shape& other) const { + return IsSubsetOf(other) && other.IsSubsetOf(*this); +} + +// Returns true if this is a subset (including equal) of other. +bool Shape::IsSubsetOf(const Shape& other) const { + for (int c = 0; c < unichars_.size(); ++c) { + int unichar_id = unichars_[c].unichar_id; + const GenericVector<int>& font_list = unichars_[c].font_ids; + for (int f = 0; f < font_list.size(); ++f) { + if (!other.ContainsUnicharAndFont(unichar_id, font_list[f])) + return false; + } + } + return true; +} + +// Returns true if the lists of unichar ids are the same in this and other, +// ignoring fonts. +// NOT const, as it will sort the unichars on demand. +bool Shape::IsEqualUnichars(Shape* other) { + if (unichars_.size() != other->unichars_.size()) return false; + if (!unichars_sorted_) SortUnichars(); + if (!other->unichars_sorted_) other->SortUnichars(); + for (int c = 0; c < unichars_.size(); ++c) { + if (unichars_[c].unichar_id != other->unichars_[c].unichar_id) + return false; + } + return true; +} + +// Sorts the unichars_ vector by unichar. +void Shape::SortUnichars() { + unichars_.sort(UnicharAndFonts::SortByUnicharId); + unichars_sorted_ = true; +} + +ShapeTable::ShapeTable() : unicharset_(nullptr), num_fonts_(0) { +} +ShapeTable::ShapeTable(const UNICHARSET& unicharset) + : unicharset_(&unicharset), num_fonts_(0) { +} + +// Writes to the given file. Returns false in case of error. +bool ShapeTable::Serialize(FILE* fp) const { + return shape_table_.Serialize(fp); +} +// Reads from the given file. Returns false in case of error. + +bool ShapeTable::DeSerialize(TFile* fp) { + if (!shape_table_.DeSerialize(fp)) return false; + num_fonts_ = 0; + return true; +} + +// Returns the number of fonts used in this ShapeTable, computing it if +// necessary. +int ShapeTable::NumFonts() const { + if (num_fonts_ <= 0) { + for (int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) { + const Shape& shape = *shape_table_[shape_id]; + for (int c = 0; c < shape.size(); ++c) { + for (int f = 0; f < shape[c].font_ids.size(); ++f) { + if (shape[c].font_ids[f] >= num_fonts_) + num_fonts_ = shape[c].font_ids[f] + 1; + } + } + } + } + return num_fonts_; +} + +// Re-indexes the class_ids in the shapetable according to the given map. +// Useful in conjunction with set_unicharset. +void ShapeTable::ReMapClassIds(const GenericVector<int>& unicharset_map) { + for (int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) { + Shape* shape = shape_table_[shape_id]; + for (int c = 0; c < shape->size(); ++c) { + shape->SetUnicharId(c, unicharset_map[(*shape)[c].unichar_id]); + } + } +} + +// Returns a string listing the classes/fonts in a shape. +STRING ShapeTable::DebugStr(int shape_id) const { + if (shape_id < 0 || shape_id >= shape_table_.size()) + return STRING("INVALID_UNICHAR_ID"); + const Shape& shape = GetShape(shape_id); + STRING result; + result.add_str_int("Shape", shape_id); + if (shape.size() > 100) { + result.add_str_int(" Num unichars=", shape.size()); + return result; + } + for (int c = 0; c < shape.size(); ++c) { + result.add_str_int(" c_id=", shape[c].unichar_id); + result += "="; + result += unicharset_->id_to_unichar(shape[c].unichar_id); + if (shape.size() < 10) { + result.add_str_int(", ", shape[c].font_ids.size()); + result += " fonts ="; + int num_fonts = shape[c].font_ids.size(); + if (num_fonts > 10) { + result.add_str_int(" ", shape[c].font_ids[0]); + result.add_str_int(" ... ", shape[c].font_ids[num_fonts - 1]); + } else { + for (int f = 0; f < num_fonts; ++f) { + result.add_str_int(" ", shape[c].font_ids[f]); + } + } + } + } + return result; +} + +// Returns a debug string summarizing the table. +STRING ShapeTable::SummaryStr() const { + int max_unichars = 0; + int num_multi_shapes = 0; + int num_master_shapes = 0; + for (int s = 0; s < shape_table_.size(); ++s) { + if (MasterDestinationIndex(s) != s) continue; + ++num_master_shapes; + int shape_size = GetShape(s).size(); + if (shape_size > 1) + ++num_multi_shapes; + if (shape_size > max_unichars) + max_unichars = shape_size; + } + STRING result; + result.add_str_int("Number of shapes = ", num_master_shapes); + result.add_str_int(" max unichars = ", max_unichars); + result.add_str_int(" number with multiple unichars = ", num_multi_shapes); + return result; +} + + +// Adds a new shape starting with the given unichar_id and font_id. +// Returns the assigned index. +int ShapeTable::AddShape(int unichar_id, int font_id) { + int index = shape_table_.size(); + auto* shape = new Shape; + shape->AddToShape(unichar_id, font_id); + shape_table_.push_back(shape); + num_fonts_ = std::max(num_fonts_, font_id + 1); + return index; +} + +// Adds a copy of the given shape unless it is already present. +// Returns the assigned index or index of existing shape if already present. +int ShapeTable::AddShape(const Shape& other) { + int index; + for (index = 0; index < shape_table_.size() && + !(other == *shape_table_[index]); ++index) + continue; + if (index == shape_table_.size()) { + auto* shape = new Shape(other); + shape_table_.push_back(shape); + } + num_fonts_ = 0; + return index; +} + +// Removes the shape given by the shape index. +void ShapeTable::DeleteShape(int shape_id) { + delete shape_table_[shape_id]; + shape_table_[shape_id] = nullptr; + shape_table_.remove(shape_id); +} + +// Adds a font_id to the given existing shape index for the given +// unichar_id. If the unichar_id is not in the shape, it is added. +void ShapeTable::AddToShape(int shape_id, int unichar_id, int font_id) { + Shape& shape = *shape_table_[shape_id]; + shape.AddToShape(unichar_id, font_id); + num_fonts_ = std::max(num_fonts_, font_id + 1); +} + +// Adds the given shape to the existing shape with the given index. +void ShapeTable::AddShapeToShape(int shape_id, const Shape& other) { + Shape& shape = *shape_table_[shape_id]; + shape.AddShape(other); + num_fonts_ = 0; +} + +// Returns the id of the shape that contains the given unichar and font. +// If not found, returns -1. +// If font_id < 0, the font_id is ignored and the first shape that matches +// the unichar_id is returned. +int ShapeTable::FindShape(int unichar_id, int font_id) const { + for (int s = 0; s < shape_table_.size(); ++s) { + const Shape& shape = GetShape(s); + for (int c = 0; c < shape.size(); ++c) { + if (shape[c].unichar_id == unichar_id) { + if (font_id < 0) + return s; // We don't care about the font. + for (int f = 0; f < shape[c].font_ids.size(); ++f) { + if (shape[c].font_ids[f] == font_id) + return s; + } + } + } + } + return -1; +} + +// Returns the first unichar_id and font_id in the given shape. +void ShapeTable::GetFirstUnicharAndFont(int shape_id, + int* unichar_id, int* font_id) const { + const UnicharAndFonts& unichar_and_fonts = (*shape_table_[shape_id])[0]; + *unichar_id = unichar_and_fonts.unichar_id; + *font_id = unichar_and_fonts.font_ids[0]; +} + +// Expands all the classes/fonts in the shape individually to build +// a ShapeTable. +int ShapeTable::BuildFromShape(const Shape& shape, + const ShapeTable& master_shapes) { + BitVector shape_map(master_shapes.NumShapes()); + for (int u_ind = 0; u_ind < shape.size(); ++u_ind) { + for (int f_ind = 0; f_ind < shape[u_ind].font_ids.size(); ++f_ind) { + int c = shape[u_ind].unichar_id; + int f = shape[u_ind].font_ids[f_ind]; + int master_id = master_shapes.FindShape(c, f); + if (master_id >= 0) { + shape_map.SetBit(master_id); + } else if (FindShape(c, f) < 0) { + AddShape(c, f); + } + } + } + int num_masters = 0; + for (int s = 0; s < master_shapes.NumShapes(); ++s) { + if (shape_map[s]) { + AddShape(master_shapes.GetShape(s)); + ++num_masters; + } + } + return num_masters; +} + +// Returns true if the shapes are already merged. +bool ShapeTable::AlreadyMerged(int shape_id1, int shape_id2) const { + return MasterDestinationIndex(shape_id1) == MasterDestinationIndex(shape_id2); +} + +// Returns true if any shape contains multiple unichars. +bool ShapeTable::AnyMultipleUnichars() const { + int num_shapes = NumShapes(); + for (int s1 = 0; s1 < num_shapes; ++s1) { + if (MasterDestinationIndex(s1) != s1) continue; + if (GetShape(s1).size() > 1) + return true; + } + return false; +} + +// Returns the maximum number of unichars over all shapes. +int ShapeTable::MaxNumUnichars() const { + int max_num_unichars = 0; + int num_shapes = NumShapes(); + for (int s = 0; s < num_shapes; ++s) { + if (GetShape(s).size() > max_num_unichars) + max_num_unichars = GetShape(s).size(); + } + return max_num_unichars; +} + + +// Merges shapes with a common unichar over the [start, end) interval. +// Assumes single unichar per shape. +void ShapeTable::ForceFontMerges(int start, int end) { + for (int s1 = start; s1 < end; ++s1) { + if (MasterDestinationIndex(s1) == s1 && GetShape(s1).size() == 1) { + int unichar_id = GetShape(s1)[0].unichar_id; + for (int s2 = s1 + 1; s2 < end; ++s2) { + if (MasterDestinationIndex(s2) == s2 && GetShape(s2).size() == 1 && + unichar_id == GetShape(s2)[0].unichar_id) { + MergeShapes(s1, s2); + } + } + } + } + ShapeTable compacted(*unicharset_); + compacted.AppendMasterShapes(*this, nullptr); + *this = compacted; +} + +// Returns the number of unichars in the master shape. +int ShapeTable::MasterUnicharCount(int shape_id) const { + int master_id = MasterDestinationIndex(shape_id); + return GetShape(master_id).size(); +} + +// Returns the sum of the font counts in the master shape. +int ShapeTable::MasterFontCount(int shape_id) const { + int master_id = MasterDestinationIndex(shape_id); + const Shape& shape = GetShape(master_id); + int font_count = 0; + for (int c = 0; c < shape.size(); ++c) { + font_count += shape[c].font_ids.size(); + } + return font_count; +} + +// Returns the number of unichars that would result from merging the shapes. +int ShapeTable::MergedUnicharCount(int shape_id1, int shape_id2) const { + // Do it the easy way for now. + int master_id1 = MasterDestinationIndex(shape_id1); + int master_id2 = MasterDestinationIndex(shape_id2); + Shape combined_shape(*shape_table_[master_id1]); + combined_shape.AddShape(*shape_table_[master_id2]); + return combined_shape.size(); +} + +// Merges two shape_ids, leaving shape_id2 marked as merged. +void ShapeTable::MergeShapes(int shape_id1, int shape_id2) { + int master_id1 = MasterDestinationIndex(shape_id1); + int master_id2 = MasterDestinationIndex(shape_id2); + // Point master_id2 (and all merged shapes) to master_id1. + shape_table_[master_id2]->set_destination_index(master_id1); + // Add all the shapes of master_id2 to master_id1. + shape_table_[master_id1]->AddShape(*shape_table_[master_id2]); +} + +// Swaps two shape_ids. +void ShapeTable::SwapShapes(int shape_id1, int shape_id2) { + Shape* tmp = shape_table_[shape_id1]; + shape_table_[shape_id1] = shape_table_[shape_id2]; + shape_table_[shape_id2] = tmp; +} + +// Returns the destination of this shape, (if merged), taking into account +// the fact that the destination may itself have been merged. +int ShapeTable::MasterDestinationIndex(int shape_id) const { + int dest_id = shape_table_[shape_id]->destination_index(); + if (dest_id == shape_id || dest_id < 0) + return shape_id; // Is master already. + int master_id = shape_table_[dest_id]->destination_index(); + if (master_id == dest_id || master_id < 0) + return dest_id; // Dest is the master and shape_id points to it. + master_id = MasterDestinationIndex(master_id); + return master_id; +} + +// Returns false if the unichars in neither shape is a subset of the other. +bool ShapeTable::SubsetUnichar(int shape_id1, int shape_id2) const { + const Shape& shape1 = GetShape(shape_id1); + const Shape& shape2 = GetShape(shape_id2); + int c1, c2; + for (c1 = 0; c1 < shape1.size(); ++c1) { + int unichar_id1 = shape1[c1].unichar_id; + if (!shape2.ContainsUnichar(unichar_id1)) + break; + } + for (c2 = 0; c2 < shape2.size(); ++c2) { + int unichar_id2 = shape2[c2].unichar_id; + if (!shape1.ContainsUnichar(unichar_id2)) + break; + } + return c1 == shape1.size() || c2 == shape2.size(); +} + +// Returns false if the unichars in neither shape is a subset of the other. +bool ShapeTable::MergeSubsetUnichar(int merge_id1, int merge_id2, + int shape_id) const { + const Shape& merge1 = GetShape(merge_id1); + const Shape& merge2 = GetShape(merge_id2); + const Shape& shape = GetShape(shape_id); + int cm1, cm2, cs; + for (cs = 0; cs < shape.size(); ++cs) { + int unichar_id = shape[cs].unichar_id; + if (!merge1.ContainsUnichar(unichar_id) && + !merge2.ContainsUnichar(unichar_id)) + break; // Shape is not a subset of the merge. + } + for (cm1 = 0; cm1 < merge1.size(); ++cm1) { + int unichar_id1 = merge1[cm1].unichar_id; + if (!shape.ContainsUnichar(unichar_id1)) + break; // Merge is not a subset of shape + } + for (cm2 = 0; cm2 < merge2.size(); ++cm2) { + int unichar_id2 = merge2[cm2].unichar_id; + if (!shape.ContainsUnichar(unichar_id2)) + break; // Merge is not a subset of shape + } + return cs == shape.size() || (cm1 == merge1.size() && cm2 == merge2.size()); +} + +// Returns true if the unichar sets are equal between the shapes. +bool ShapeTable::EqualUnichars(int shape_id1, int shape_id2) const { + const Shape& shape1 = GetShape(shape_id1); + const Shape& shape2 = GetShape(shape_id2); + for (int c1 = 0; c1 < shape1.size(); ++c1) { + int unichar_id1 = shape1[c1].unichar_id; + if (!shape2.ContainsUnichar(unichar_id1)) + return false; + } + for (int c2 = 0; c2 < shape2.size(); ++c2) { + int unichar_id2 = shape2[c2].unichar_id; + if (!shape1.ContainsUnichar(unichar_id2)) + return false; + } + return true; +} + +// Returns true if the unichar sets are equal between the shapes. +bool ShapeTable::MergeEqualUnichars(int merge_id1, int merge_id2, + int shape_id) const { + const Shape& merge1 = GetShape(merge_id1); + const Shape& merge2 = GetShape(merge_id2); + const Shape& shape = GetShape(shape_id); + for (int cs = 0; cs < shape.size(); ++cs) { + int unichar_id = shape[cs].unichar_id; + if (!merge1.ContainsUnichar(unichar_id) && + !merge2.ContainsUnichar(unichar_id)) + return false; // Shape has a unichar that appears in neither merge. + } + for (int cm1 = 0; cm1 < merge1.size(); ++cm1) { + int unichar_id1 = merge1[cm1].unichar_id; + if (!shape.ContainsUnichar(unichar_id1)) + return false; // Merge has a unichar that is not in shape. + } + for (int cm2 = 0; cm2 < merge2.size(); ++cm2) { + int unichar_id2 = merge2[cm2].unichar_id; + if (!shape.ContainsUnichar(unichar_id2)) + return false; // Merge has a unichar that is not in shape. + } + return true; +} + +// Returns true if there is a common unichar between the shapes. +bool ShapeTable::CommonUnichars(int shape_id1, int shape_id2) const { + const Shape& shape1 = GetShape(shape_id1); + const Shape& shape2 = GetShape(shape_id2); + for (int c1 = 0; c1 < shape1.size(); ++c1) { + int unichar_id1 = shape1[c1].unichar_id; + if (shape2.ContainsUnichar(unichar_id1)) + return true; + } + return false; +} + +// Returns true if there is a common font id between the shapes. +bool ShapeTable::CommonFont(int shape_id1, int shape_id2) const { + const Shape& shape1 = GetShape(shape_id1); + const Shape& shape2 = GetShape(shape_id2); + for (int c1 = 0; c1 < shape1.size(); ++c1) { + const GenericVector<int>& font_list1 = shape1[c1].font_ids; + for (int f = 0; f < font_list1.size(); ++f) { + if (shape2.ContainsFont(font_list1[f])) + return true; + } + } + return false; +} + +// Appends the master shapes from other to this. +// If not nullptr, shape_map is set to map other shape_ids to this's shape_ids. +void ShapeTable::AppendMasterShapes(const ShapeTable& other, + GenericVector<int>* shape_map) { + if (shape_map != nullptr) + shape_map->init_to_size(other.NumShapes(), -1); + for (int s = 0; s < other.shape_table_.size(); ++s) { + if (other.shape_table_[s]->destination_index() < 0) { + int index = AddShape(*other.shape_table_[s]); + if (shape_map != nullptr) + (*shape_map)[s] = index; + } + } +} + +// Returns the number of master shapes remaining after merging. +int ShapeTable::NumMasterShapes() const { + int num_shapes = 0; + for (int s = 0; s < shape_table_.size(); ++s) { + if (shape_table_[s]->destination_index() < 0) + ++num_shapes; + } + return num_shapes; +} + + +// Adds the unichars of the given shape_id to the vector of results. Any +// unichar_id that is already present just has the fonts added to the +// font set for that result without adding a new entry in the vector. +// NOTE: it is assumed that the results are given to this function in order +// of decreasing rating. +// The unichar_map vector indicates the index of the results entry containing +// each unichar, or -1 if the unichar is not yet included in results. +void ShapeTable::AddShapeToResults(const ShapeRating& shape_rating, + GenericVector<int>* unichar_map, + std::vector<UnicharRating>* results) const { + if (shape_rating.joined) { + AddUnicharToResults(UNICHAR_JOINED, shape_rating.rating, unichar_map, + results); + } + if (shape_rating.broken) { + AddUnicharToResults(UNICHAR_BROKEN, shape_rating.rating, unichar_map, + results); + } + const Shape& shape = GetShape(shape_rating.shape_id); + for (int u = 0; u < shape.size(); ++u) { + int result_index = AddUnicharToResults(shape[u].unichar_id, + shape_rating.rating, + unichar_map, results); + for (int f = 0; f < shape[u].font_ids.size(); ++f) { + (*results)[result_index].fonts.push_back( + ScoredFont(shape[u].font_ids[f], + IntCastRounded(shape_rating.rating * INT16_MAX))); + } + } +} + +// Adds the given unichar_id to the results if needed, updating unichar_map +// and returning the index of unichar in results. +int ShapeTable::AddUnicharToResults( + int unichar_id, float rating, GenericVector<int>* unichar_map, + std::vector<UnicharRating>* results) const { + int result_index = unichar_map->get(unichar_id); + if (result_index < 0) { + UnicharRating result(unichar_id, rating); + result_index = results->size(); + results->push_back(result); + (*unichar_map)[unichar_id] = result_index; + } + return result_index; +} + + +} // namespace tesseract diff --git a/tesseract/src/classify/shapetable.h b/tesseract/src/classify/shapetable.h new file mode 100644 index 00000000..5a551401 --- /dev/null +++ b/tesseract/src/classify/shapetable.h @@ -0,0 +1,379 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// Author: rays@google.com (Ray Smith) +/////////////////////////////////////////////////////////////////////// +// File: shapetable.h +// Description: Class to map a classifier shape index to unicharset +// indices and font indices. +// Author: Ray Smith +// +// (C) Copyright 2010, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_CLASSIFY_SHAPETABLE_H_ +#define TESSERACT_CLASSIFY_SHAPETABLE_H_ + +#include "bitvector.h" +#include "fontinfo.h" +#include "genericheap.h" +#include "intmatcher.h" + +#include "genericvector.h" + +namespace tesseract { + +class STRING; +class UNICHARSET; +class ShapeTable; + +// Simple struct to hold a single classifier unichar selection, a corresponding +// rating, and a list of appropriate fonts. +struct UnicharRating { + UnicharRating() + : unichar_id(0), rating(0.0f), adapted(false), config(0), + feature_misses(0) {} + UnicharRating(int u, float r) + : unichar_id(u), rating(r), adapted(false), config(0), feature_misses(0) {} + + // Print debug info. + void Print() const { + tprintf("Unichar-id=%d, rating=%g, adapted=%d, config=%d, misses=%u," + " %zu fonts\n", unichar_id, rating, adapted, config, feature_misses, + fonts.size()); + } + + // Helper function to get the index of the first result with the required + // unichar_id. If the results are sorted by rating, this will also be the + // best result with the required unichar_id. + // Returns -1 if the unichar_id is not found + static int FirstResultWithUnichar(const GenericVector<UnicharRating>& results, + UNICHAR_ID unichar_id); + + // Index into some UNICHARSET table indicates the class of the answer. + UNICHAR_ID unichar_id; + // Rating from classifier with 1.0 perfect and 0.0 impossible. + // Call it a probability if you must. + float rating; + // True if this result is from the adaptive classifier. + bool adapted; + // Index of best matching font configuration of result. + uint8_t config; + // Number of features that were total misses - were liked by no classes. + uint16_t feature_misses; + // Unsorted collection of fontinfo ids and scores. Note that a raw result + // from the IntegerMatch will contain config ids, that require transforming + // to fontinfo ids via fontsets and (possibly) shapetable. + std::vector<ScoredFont> fonts; +}; + +// Classifier result from a low-level classification is an index into some +// ShapeTable and a rating. +struct ShapeRating { + ShapeRating() + : shape_id(0), rating(0.0f), raw(0.0f), font(0.0f), + joined(false), broken(false) {} + ShapeRating(int s, float r) + : shape_id(s), rating(r), raw(1.0f), font(0.0f), + joined(false), broken(false) {} + + // Helper function to get the index of the first result with the required + // unichar_id. If the results are sorted by rating, this will also be the + // best result with the required unichar_id. + // Returns -1 if the unichar_id is not found + static int FirstResultWithUnichar(const GenericVector<ShapeRating>& results, + const ShapeTable& shape_table, + UNICHAR_ID unichar_id); + + // Index into some shape table indicates the class of the answer. + int shape_id; + // Rating from classifier with 1.0 perfect and 0.0 impossible. + // Call it a probability if you must. + float rating; + // Subsidiary rating that a classifier may use internally. + float raw; + // Subsidiary rating that a classifier may use internally. + float font; + // Flag indicating that the input may be joined. + bool joined; + // Flag indicating that the input may be broken (a fragment). + bool broken; +}; + +// Simple struct to hold an entry for a heap-based priority queue of +// ShapeRating. +struct ShapeQueueEntry { + ShapeQueueEntry() : result(ShapeRating(0, 0.0f)), level(0) {} + ShapeQueueEntry(const ShapeRating& rating, int level0) + : result(rating), level(level0) {} + + // Sort by decreasing rating and decreasing level for equal rating. + bool operator<(const ShapeQueueEntry& other) const { + if (result.rating > other.result.rating) return true; + if (result.rating == other.result.rating) + return level > other.level; + return false; + } + + // Output from classifier. + ShapeRating result; + // Which level in the tree did this come from? + int level; +}; +using ShapeQueue = GenericHeap<ShapeQueueEntry>; + +// Simple struct to hold a set of fonts associated with a single unichar-id. +// A vector of UnicharAndFonts makes a shape. +struct UnicharAndFonts { + UnicharAndFonts() : unichar_id(0) { + } + UnicharAndFonts(int uni_id, int font_id) : unichar_id(uni_id) { + font_ids.push_back(font_id); + } + + // Writes to the given file. Returns false in case of error. + bool Serialize(FILE* fp) const; + // Reads from the given file. Returns false in case of error. + bool DeSerialize(TFile* fp); + + // Sort function to sort a pair of UnicharAndFonts by unichar_id. + static int SortByUnicharId(const void* v1, const void* v2); + + GenericVector<int32_t> font_ids; + int32_t unichar_id; +}; + +// A Shape is a collection of unichar-ids and a list of fonts associated with +// each, organized as a vector of UnicharAndFonts. Conceptually a Shape is +// a classifiable unit, and represents a group of characters or parts of +// characters that have a similar or identical shape. Shapes/ShapeTables may +// be organized hierarchically from identical shapes at the leaves to vaguely +// similar shapes near the root. +class TESS_API Shape { + public: + Shape() : destination_index_(-1) {} + + // Writes to the given file. Returns false in case of error. + bool Serialize(FILE* fp) const; + // Reads from the given file. Returns false in case of error. + bool DeSerialize(TFile* fp); + + int destination_index() const { + return destination_index_; + } + void set_destination_index(int index) { + destination_index_ = index; + } + int size() const { + return unichars_.size(); + } + // Returns a UnicharAndFonts entry for the given index, which must be + // in the range [0, size()). + const UnicharAndFonts& operator[](int index) const { + return unichars_[index]; + } + // Sets the unichar_id of the given index to the new unichar_id. + void SetUnicharId(int index, int unichar_id) { + unichars_[index].unichar_id = unichar_id; + } + // Adds a font_id for the given unichar_id. If the unichar_id is not + // in the shape, it is added. + void AddToShape(int unichar_id, int font_id); + // Adds everything in other to this. + void AddShape(const Shape& other); + // Returns true if the shape contains the given unichar_id, font_id pair. + bool ContainsUnicharAndFont(int unichar_id, int font_id) const; + // Returns true if the shape contains the given unichar_id, ignoring font. + bool ContainsUnichar(int unichar_id) const; + // Returns true if the shape contains the given font, ignoring unichar_id. + bool ContainsFont(int font_id) const; + // Returns true if the shape contains the given font properties, ignoring + // unichar_id. + bool ContainsFontProperties(const FontInfoTable& font_table, + uint32_t properties) const; + // Returns true if the shape contains multiple different font properties, + // ignoring unichar_id. + bool ContainsMultipleFontProperties(const FontInfoTable& font_table) const; + // Returns true if this shape is equal to other (ignoring order of unichars + // and fonts). + bool operator==(const Shape& other) const; + // Returns true if this is a subset (including equal) of other. + bool IsSubsetOf(const Shape& other) const; + // Returns true if the lists of unichar ids are the same in this and other, + // ignoring fonts. + // NOT const, as it will sort the unichars on demand. + bool IsEqualUnichars(Shape* other); + + private: + // Sorts the unichars_ vector by unichar. + void SortUnichars(); + + // Flag indicates that the unichars are sorted, allowing faster set + // operations with another shape. + bool unichars_sorted_ = false; + // If this Shape is part of a ShapeTable the destiation_index_ is the index + // of some other shape in the ShapeTable with which this shape is merged. + int destination_index_ = 0; + // Array of unichars, each with a set of fonts. Each unichar has at most + // one entry in the vector. + GenericVector<UnicharAndFonts> unichars_; +}; + +// ShapeTable is a class to encapsulate the triple indirection that is +// used here. +// ShapeTable is a vector of shapes. +// Each shape is a vector of UnicharAndFonts representing the set of unichars +// that the shape represents. +// Each UnicharAndFonts also lists the fonts of the unichar_id that were +// mapped to the shape during training. +class TESS_API ShapeTable { + public: + ShapeTable(); + // The UNICHARSET reference supplied here, or in set_unicharset below must + // exist for the entire life of the ShapeTable. It is used only by DebugStr. + explicit ShapeTable(const UNICHARSET& unicharset); + + // Writes to the given file. Returns false in case of error. + bool Serialize(FILE* fp) const; + // Reads from the given file. Returns false in case of error. + bool DeSerialize(TFile* fp); + + // Accessors. + int NumShapes() const { + return shape_table_.size(); + } + const UNICHARSET& unicharset() const { + return *unicharset_; + } + // Returns the number of fonts used in this ShapeTable, computing it if + // necessary. + int NumFonts() const; + // Shapetable takes a pointer to the UNICHARSET, so it must persist for the + // entire life of the ShapeTable. + void set_unicharset(const UNICHARSET& unicharset) { + unicharset_ = &unicharset; + } + // Re-indexes the class_ids in the shapetable according to the given map. + // Useful in conjunction with set_unicharset. + void ReMapClassIds(const GenericVector<int>& unicharset_map); + // Returns a string listing the classes/fonts in a shape. + STRING DebugStr(int shape_id) const; + // Returns a debug string summarizing the table. + STRING SummaryStr() const; + + // Adds a new shape starting with the given unichar_id and font_id. + // Returns the assigned index. + int AddShape(int unichar_id, int font_id); + // Adds a copy of the given shape unless it is already present. + // Returns the assigned index or index of existing shape if already present. + int AddShape(const Shape& other); + // Removes the shape given by the shape index. All indices above are changed! + void DeleteShape(int shape_id); + // Adds a font_id to the given existing shape index for the given + // unichar_id. If the unichar_id is not in the shape, it is added. + void AddToShape(int shape_id, int unichar_id, int font_id); + // Adds the given shape to the existing shape with the given index. + void AddShapeToShape(int shape_id, const Shape& other); + // Returns the id of the shape that contains the given unichar and font. + // If not found, returns -1. + // If font_id < 0, the font_id is ignored and the first shape that matches + // the unichar_id is returned. + int FindShape(int unichar_id, int font_id) const; + // Returns the first unichar_id and font_id in the given shape. + void GetFirstUnicharAndFont(int shape_id, + int* unichar_id, int* font_id) const; + + // Accessors for the Shape with the given shape_id. + const Shape& GetShape(int shape_id) const { + return *shape_table_[shape_id]; + } + Shape* MutableShape(int shape_id) { + return shape_table_[shape_id]; + } + + // Expands all the classes/fonts in the shape individually to build + // a ShapeTable. + int BuildFromShape(const Shape& shape, const ShapeTable& master_shapes); + + // Returns true if the shapes are already merged. + bool AlreadyMerged(int shape_id1, int shape_id2) const; + // Returns true if any shape contains multiple unichars. + bool AnyMultipleUnichars() const; + // Returns the maximum number of unichars over all shapes. + int MaxNumUnichars() const; + // Merges shapes with a common unichar over the [start, end) interval. + // Assumes single unichar per shape. + void ForceFontMerges(int start, int end); + // Returns the number of unichars in the master shape. + int MasterUnicharCount(int shape_id) const; + // Returns the sum of the font counts in the master shape. + int MasterFontCount(int shape_id) const; + // Returns the number of unichars that would result from merging the shapes. + int MergedUnicharCount(int shape_id1, int shape_id2) const; + // Merges two shape_ids, leaving shape_id2 marked as merged. + void MergeShapes(int shape_id1, int shape_id2); + // Swaps two shape_ids. + void SwapShapes(int shape_id1, int shape_id2); + // Appends the master shapes from other to this. + // Used to create a clean ShapeTable from a merged one, or to create a + // copy of a ShapeTable. + // If not nullptr, shape_map is set to map other shape_ids to this's shape_ids. + void AppendMasterShapes(const ShapeTable& other, + GenericVector<int>* shape_map); + // Returns the number of master shapes remaining after merging. + int NumMasterShapes() const; + // Returns the destination of this shape, (if merged), taking into account + // the fact that the destination may itself have been merged. + // For a non-merged shape, returns the input shape_id. + int MasterDestinationIndex(int shape_id) const; + + // Returns false if the unichars in neither shape is a subset of the other.. + bool SubsetUnichar(int shape_id1, int shape_id2) const; + // Returns false if the unichars in neither shape is a subset of the other.. + bool MergeSubsetUnichar(int merge_id1, int merge_id2, int shape_id) const; + // Returns true if the unichar sets are equal between the shapes. + bool EqualUnichars(int shape_id1, int shape_id2) const; + bool MergeEqualUnichars(int merge_id1, int merge_id2, int shape_id) const; + // Returns true if there is a common unichar between the shapes. + bool CommonUnichars(int shape_id1, int shape_id2) const; + // Returns true if there is a common font id between the shapes. + bool CommonFont(int shape_id1, int shape_id2) const; + + // Adds the unichars of the given shape_id to the vector of results. Any + // unichar_id that is already present just has the fonts added to the + // font set for that result without adding a new entry in the vector. + // NOTE: it is assumed that the results are given to this function in order + // of decreasing rating. + // The unichar_map vector indicates the index of the results entry containing + // each unichar, or -1 if the unichar is not yet included in results. + void AddShapeToResults(const ShapeRating& shape_rating, + GenericVector<int>* unichar_map, + std::vector<UnicharRating>* results) const; + + private: + // Adds the given unichar_id to the results if needed, updating unichar_map + // and returning the index of unichar in results. + int AddUnicharToResults(int unichar_id, float rating, + GenericVector<int>* unichar_map, + std::vector<UnicharRating>* results) const; + + // Pointer to a provided unicharset used only by the Debugstr member. + const UNICHARSET* unicharset_; + // Vector of pointers to the Shapes in this ShapeTable. + PointerVector<Shape> shape_table_; + + // Cached data calculated on demand. + mutable int num_fonts_; +}; + +} // namespace tesseract. + +#endif // TESSERACT_CLASSIFY_SHAPETABLE_H_ diff --git a/tesseract/src/classify/tessclassifier.cpp b/tesseract/src/classify/tessclassifier.cpp new file mode 100644 index 00000000..c7819d66 --- /dev/null +++ b/tesseract/src/classify/tessclassifier.cpp @@ -0,0 +1,84 @@ +/////////////////////////////////////////////////////////////////////// +// File: tessclassifier.cpp +// Description: Tesseract implementation of a ShapeClassifier. +// Author: Ray Smith +// +// (C) Copyright 2011, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "tessclassifier.h" + +#include "classify.h" +#include "trainingsample.h" + +namespace tesseract { + +// Classifies the given [training] sample, writing to results. +// See ShapeClassifier for a full description. +int TessClassifier::UnicharClassifySample( + const TrainingSample& sample, Pix* page_pix, int debug, + UNICHAR_ID keep_this, std::vector<UnicharRating>* results) { + const int old_matcher_level = classify_->matcher_debug_level; + const int old_matcher_flags = classify_->matcher_debug_flags; + const int old_classify_level = classify_->classify_debug_level; + if (debug) { + // Explicitly set values of various control parameters to generate debug + // output if required, restoring the old values after classifying. + classify_->matcher_debug_level.set_value(2); + classify_->matcher_debug_flags.set_value(25); + classify_->classify_debug_level.set_value(3); + } + classify_->CharNormTrainingSample(pruner_only_, keep_this, sample, results); + if (debug) { + classify_->matcher_debug_level.set_value(old_matcher_level); + classify_->matcher_debug_flags.set_value(old_matcher_flags); + classify_->classify_debug_level.set_value(old_classify_level); + } + return results->size(); +} + +// Provides access to the ShapeTable that this classifier works with. +const ShapeTable* TessClassifier::GetShapeTable() const { + return classify_->shape_table(); +} +// Provides access to the UNICHARSET that this classifier works with. +// Only needs to be overridden if GetShapeTable() can return nullptr. +const UNICHARSET& TessClassifier::GetUnicharset() const { + return classify_->unicharset; +} + +// Displays classification as the given shape_id. Creates as many windows +// as it feels fit, using index as a guide for placement. Adds any created +// windows to the windows output and returns a new index that may be used +// by any subsequent classifiers. Caller waits for the user to view and +// then destroys the windows by clearing the vector. +int TessClassifier::DisplayClassifyAs( + const TrainingSample& sample, Pix* page_pix, int unichar_id, int index, + PointerVector<ScrollView>* windows) { + int shape_id = unichar_id; + // TODO(rays) Fix this so it works with both flat and real shapetables. + // if (GetShapeTable() != nullptr) + // shape_id = BestShapeForUnichar(sample, page_pix, unichar_id, nullptr); + if (shape_id < 0) return index; + if (UnusedClassIdIn(classify_->PreTrainedTemplates, shape_id)) { + tprintf("No built-in templates for class/shape %d\n", shape_id); + return index; + } +#ifndef GRAPHICS_DISABLED + classify_->ShowBestMatchFor(shape_id, sample.features(), + sample.num_features()); +#endif + return index; +} + +} // namespace tesseract diff --git a/tesseract/src/classify/tessclassifier.h b/tesseract/src/classify/tessclassifier.h new file mode 100644 index 00000000..a8b3f753 --- /dev/null +++ b/tesseract/src/classify/tessclassifier.h @@ -0,0 +1,72 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// Author: rays@google.com (Ray Smith) +/////////////////////////////////////////////////////////////////////// +// File: tessclassifier.h +// Description: Tesseract implementation of a ShapeClassifier. +// Author: Ray Smith +// Created: Tue Nov 22 14:10:45 PST 2011 +// +// (C) Copyright 2011, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef THIRD_PARTY_TESSERACT_CLASSIFY_TESSCLASSIFIER_H_ +#define THIRD_PARTY_TESSERACT_CLASSIFY_TESSCLASSIFIER_H_ + +#include "shapeclassifier.h" + +namespace tesseract { + +class Classify; +class TrainingSample; + +// Tesseract implementation of a ShapeClassifier. +// Due to limitations in the content of TrainingSample, this currently +// only works for the static classifier and only works if the ShapeTable +// in classify is not nullptr. +class TESS_API TessClassifier : public ShapeClassifier { + public: + TessClassifier(bool pruner_only, tesseract::Classify* classify) + : pruner_only_(pruner_only), classify_(classify) {} + ~TessClassifier() override = default; + + // Classifies the given [training] sample, writing to results. + // See ShapeClassifier for a full description. + int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix, + int debug, UNICHAR_ID keep_this, + std::vector<UnicharRating>* results) override; + // Provides access to the ShapeTable that this classifier works with. + const ShapeTable* GetShapeTable() const override; + // Provides access to the UNICHARSET that this classifier works with. + // Only needs to be overridden if GetShapeTable() can return nullptr. + const UNICHARSET& GetUnicharset() const override; + + // Displays classification as the given shape_id. Creates as many windows + // as it feels fit, using index as a guide for placement. Adds any created + // windows to the windows output and returns a new index that may be used + // by any subsequent classifiers. Caller waits for the user to view and + // then destroys the windows by clearing the vector. + int DisplayClassifyAs(const TrainingSample& sample, Pix* page_pix, + int unichar_id, int index, + PointerVector<ScrollView>* windows) override; + + private: + // Indicates that this classifier is to use just the ClassPruner, or the + // full classifier if false. + bool pruner_only_; + // Borrowed pointer to the actual Tesseract classifier. + tesseract::Classify* classify_; +}; + +} // namespace tesseract + +#endif /* THIRD_PARTY_TESSERACT_CLASSIFY_TESSCLASSIFIER_H_ */ diff --git a/tesseract/src/classify/trainingsample.cpp b/tesseract/src/classify/trainingsample.cpp new file mode 100644 index 00000000..003fb97b --- /dev/null +++ b/tesseract/src/classify/trainingsample.cpp @@ -0,0 +1,339 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// Author: rays@google.com (Ray Smith) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#define _USE_MATH_DEFINES // for M_PI +// Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "trainingsample.h" + +#include "intfeaturespace.h" +#include "helpers.h" +#include "normfeat.h" +#include "shapetable.h" + +#include "allheaders.h" + +#include <cmath> // for M_PI + +namespace tesseract { + +ELISTIZE(TrainingSample) + +// Center of randomizing operations. +const int kRandomizingCenter = 128; + +// Randomizing factors. +const int TrainingSample::kYShiftValues[kSampleYShiftSize] = { + 6, 3, -3, -6, 0 +}; +const double TrainingSample::kScaleValues[kSampleScaleSize] = { + 1.0625, 0.9375, 1.0 +}; + +TrainingSample::~TrainingSample() { + delete [] features_; + delete [] micro_features_; +} + +// WARNING! Serialize/DeSerialize do not save/restore the "cache" data +// members, which is mostly the mapped features, and the weight. +// It is assumed these can all be reconstructed from what is saved. +// Writes to the given file. Returns false in case of error. +bool TrainingSample::Serialize(FILE* fp) const { + if (fwrite(&class_id_, sizeof(class_id_), 1, fp) != 1) return false; + if (fwrite(&font_id_, sizeof(font_id_), 1, fp) != 1) return false; + if (fwrite(&page_num_, sizeof(page_num_), 1, fp) != 1) return false; + if (!bounding_box_.Serialize(fp)) return false; + if (fwrite(&num_features_, sizeof(num_features_), 1, fp) != 1) return false; + if (fwrite(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1) + return false; + if (fwrite(&outline_length_, sizeof(outline_length_), 1, fp) != 1) + return false; + if (fwrite(features_, sizeof(*features_), num_features_, fp) != num_features_) + return false; + if (fwrite(micro_features_, sizeof(*micro_features_), num_micro_features_, + fp) != num_micro_features_) + return false; + if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != + kNumCNParams) return false; + if (fwrite(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount) + return false; + return true; +} + +// Creates from the given file. Returns nullptr in case of error. +// If swap is true, assumes a big/little-endian swap is needed. +TrainingSample* TrainingSample::DeSerializeCreate(bool swap, FILE* fp) { + auto* sample = new TrainingSample; + if (sample->DeSerialize(swap, fp)) return sample; + delete sample; + return nullptr; +} + +// Reads from the given file. Returns false in case of error. +// If swap is true, assumes a big/little-endian swap is needed. +bool TrainingSample::DeSerialize(bool swap, FILE* fp) { + if (fread(&class_id_, sizeof(class_id_), 1, fp) != 1) return false; + if (fread(&font_id_, sizeof(font_id_), 1, fp) != 1) return false; + if (fread(&page_num_, sizeof(page_num_), 1, fp) != 1) return false; + if (!bounding_box_.DeSerialize(swap, fp)) return false; + if (fread(&num_features_, sizeof(num_features_), 1, fp) != 1) return false; + if (fread(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1) + return false; + if (fread(&outline_length_, sizeof(outline_length_), 1, fp) != 1) + return false; + if (swap) { + ReverseN(&class_id_, sizeof(class_id_)); + ReverseN(&num_features_, sizeof(num_features_)); + ReverseN(&num_micro_features_, sizeof(num_micro_features_)); + ReverseN(&outline_length_, sizeof(outline_length_)); + } + // Arbitrarily limit the number of elements to protect against bad data. + if (num_features_ > UINT16_MAX) return false; + if (num_micro_features_ > UINT16_MAX) return false; + delete [] features_; + features_ = new INT_FEATURE_STRUCT[num_features_]; + if (fread(features_, sizeof(*features_), num_features_, fp) + != num_features_) + return false; + delete [] micro_features_; + micro_features_ = new MicroFeature[num_micro_features_]; + if (fread(micro_features_, sizeof(*micro_features_), num_micro_features_, + fp) != num_micro_features_) + return false; + if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != + kNumCNParams) return false; + if (fread(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount) + return false; + return true; +} + +// Saves the given features into a TrainingSample. +TrainingSample* TrainingSample::CopyFromFeatures( + const INT_FX_RESULT_STRUCT& fx_info, + const TBOX& bounding_box, + const INT_FEATURE_STRUCT* features, + int num_features) { + auto* sample = new TrainingSample; + sample->num_features_ = num_features; + sample->features_ = new INT_FEATURE_STRUCT[num_features]; + sample->outline_length_ = fx_info.Length; + memcpy(sample->features_, features, num_features * sizeof(features[0])); + sample->geo_feature_[GeoBottom] = bounding_box.bottom(); + sample->geo_feature_[GeoTop] = bounding_box.top(); + sample->geo_feature_[GeoWidth] = bounding_box.width(); + + // Generate the cn_feature_ from the fx_info. + sample->cn_feature_[CharNormY] = + MF_SCALE_FACTOR * (fx_info.Ymean - kBlnBaselineOffset); + sample->cn_feature_[CharNormLength] = + MF_SCALE_FACTOR * fx_info.Length / LENGTH_COMPRESSION; + sample->cn_feature_[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx; + sample->cn_feature_[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry; + + sample->features_are_indexed_ = false; + sample->features_are_mapped_ = false; + return sample; +} + +// Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining. +FEATURE_STRUCT* TrainingSample::GetCNFeature() const { + FEATURE feature = NewFeature(&CharNormDesc); + for (int i = 0; i < kNumCNParams; ++i) + feature->Params[i] = cn_feature_[i]; + return feature; +} + +// Constructs and returns a copy randomized by the method given by +// the randomizer index. If index is out of [0, kSampleRandomSize) then +// an exact copy is returned. +TrainingSample* TrainingSample::RandomizedCopy(int index) const { + TrainingSample* sample = Copy(); + if (index >= 0 && index < kSampleRandomSize) { + ++index; // Remove the first combination. + const int yshift = kYShiftValues[index / kSampleScaleSize]; + double scaling = kScaleValues[index % kSampleScaleSize]; + for (uint32_t i = 0; i < num_features_; ++i) { + double result = (features_[i].X - kRandomizingCenter) * scaling; + result += kRandomizingCenter; + sample->features_[i].X = ClipToRange<int>(result + 0.5, 0, UINT8_MAX); + result = (features_[i].Y - kRandomizingCenter) * scaling; + result += kRandomizingCenter + yshift; + sample->features_[i].Y = ClipToRange<int>(result + 0.5, 0, UINT8_MAX); + } + } + return sample; +} + +// Constructs and returns an exact copy. +TrainingSample* TrainingSample::Copy() const { + auto* sample = new TrainingSample; + sample->class_id_ = class_id_; + sample->font_id_ = font_id_; + sample->weight_ = weight_; + sample->sample_index_ = sample_index_; + sample->num_features_ = num_features_; + if (num_features_ > 0) { + sample->features_ = new INT_FEATURE_STRUCT[num_features_]; + memcpy(sample->features_, features_, num_features_ * sizeof(features_[0])); + } + sample->num_micro_features_ = num_micro_features_; + if (num_micro_features_ > 0) { + sample->micro_features_ = new MicroFeature[num_micro_features_]; + memcpy(sample->micro_features_, micro_features_, + num_micro_features_ * sizeof(micro_features_[0])); + } + memcpy(sample->cn_feature_, cn_feature_, sizeof(*cn_feature_) * kNumCNParams); + memcpy(sample->geo_feature_, geo_feature_, sizeof(*geo_feature_) * GeoCount); + return sample; +} + +// Extracts the needed information from the CHAR_DESC_STRUCT. +void TrainingSample::ExtractCharDesc(int int_feature_type, + int micro_type, + int cn_type, + int geo_type, + CHAR_DESC_STRUCT* char_desc) { + // Extract the INT features. + delete[] features_; + FEATURE_SET_STRUCT* char_features = char_desc->FeatureSets[int_feature_type]; + if (char_features == nullptr) { + tprintf("Error: no features to train on of type %s\n", + kIntFeatureType); + num_features_ = 0; + features_ = nullptr; + } else { + num_features_ = char_features->NumFeatures; + features_ = new INT_FEATURE_STRUCT[num_features_]; + for (uint32_t f = 0; f < num_features_; ++f) { + features_[f].X = + static_cast<uint8_t>(char_features->Features[f]->Params[IntX]); + features_[f].Y = + static_cast<uint8_t>(char_features->Features[f]->Params[IntY]); + features_[f].Theta = + static_cast<uint8_t>(char_features->Features[f]->Params[IntDir]); + features_[f].CP_misses = 0; + } + } + // Extract the Micro features. + delete[] micro_features_; + char_features = char_desc->FeatureSets[micro_type]; + if (char_features == nullptr) { + tprintf("Error: no features to train on of type %s\n", + kMicroFeatureType); + num_micro_features_ = 0; + micro_features_ = nullptr; + } else { + num_micro_features_ = char_features->NumFeatures; + micro_features_ = new MicroFeature[num_micro_features_]; + for (uint32_t f = 0; f < num_micro_features_; ++f) { + for (int d = 0; d < MFCount; ++d) { + micro_features_[f][d] = char_features->Features[f]->Params[d]; + } + } + } + // Extract the CN feature. + char_features = char_desc->FeatureSets[cn_type]; + if (char_features == nullptr) { + tprintf("Error: no CN feature to train on.\n"); + } else { + ASSERT_HOST(char_features->NumFeatures == 1); + cn_feature_[CharNormY] = char_features->Features[0]->Params[CharNormY]; + cn_feature_[CharNormLength] = + char_features->Features[0]->Params[CharNormLength]; + cn_feature_[CharNormRx] = char_features->Features[0]->Params[CharNormRx]; + cn_feature_[CharNormRy] = char_features->Features[0]->Params[CharNormRy]; + } + // Extract the Geo feature. + char_features = char_desc->FeatureSets[geo_type]; + if (char_features == nullptr) { + tprintf("Error: no Geo feature to train on.\n"); + } else { + ASSERT_HOST(char_features->NumFeatures == 1); + geo_feature_[GeoBottom] = char_features->Features[0]->Params[GeoBottom]; + geo_feature_[GeoTop] = char_features->Features[0]->Params[GeoTop]; + geo_feature_[GeoWidth] = char_features->Features[0]->Params[GeoWidth]; + } + features_are_indexed_ = false; + features_are_mapped_ = false; +} + +// Sets the mapped_features_ from the features_ using the provided +// feature_space to the indexed versions of the features. +void TrainingSample::IndexFeatures(const IntFeatureSpace& feature_space) { + GenericVector<int> indexed_features; + feature_space.IndexAndSortFeatures(features_, num_features_, + &mapped_features_); + features_are_indexed_ = true; + features_are_mapped_ = false; +} + +// Returns a pix representing the sample. (Int features only.) +Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const { + Pix* pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1); + for (uint32_t f = 0; f < num_features_; ++f) { + int start_x = features_[f].X; + int start_y = kIntFeatureExtent - features_[f].Y; + double dx = cos((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI); + double dy = -sin((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI); + for (int i = 0; i <= 5; ++i) { + int x = static_cast<int>(start_x + dx * i); + int y = static_cast<int>(start_y + dy * i); + if (x >= 0 && x < 256 && y >= 0 && y < 256) + pixSetPixel(pix, x, y, 1); + } + } + if (unicharset != nullptr) + pixSetText(pix, unicharset->id_to_unichar(class_id_)); + return pix; +} + +#ifndef GRAPHICS_DISABLED + +// Displays the features in the given window with the given color. +void TrainingSample::DisplayFeatures(ScrollView::Color color, + ScrollView* window) const { + for (uint32_t f = 0; f < num_features_; ++f) { + RenderIntFeature(window, &features_[f], color); + } +} + +#endif // !GRAPHICS_DISABLED + +// Returns a pix of the original sample image. The pix is padded all round +// by padding wherever possible. +// The returned Pix must be pixDestroyed after use. +// If the input page_pix is nullptr, nullptr is returned. +Pix* TrainingSample::GetSamplePix(int padding, Pix* page_pix) const { + if (page_pix == nullptr) + return nullptr; + int page_width = pixGetWidth(page_pix); + int page_height = pixGetHeight(page_pix); + TBOX padded_box = bounding_box(); + padded_box.pad(padding, padding); + // Clip the padded_box to the limits of the page + TBOX page_box(0, 0, page_width, page_height); + padded_box &= page_box; + Box* box = boxCreate(page_box.left(), page_height - page_box.top(), + page_box.width(), page_box.height()); + Pix* sample_pix = pixClipRectangle(page_pix, box, nullptr); + boxDestroy(&box); + return sample_pix; +} + +} // namespace tesseract diff --git a/tesseract/src/classify/trainingsample.h b/tesseract/src/classify/trainingsample.h new file mode 100644 index 00000000..0ac2cc4f --- /dev/null +++ b/tesseract/src/classify/trainingsample.h @@ -0,0 +1,252 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// Author: rays@google.com (Ray Smith) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_TRAINING_TRAININGSAMPLE_H_ +#define TESSERACT_TRAINING_TRAININGSAMPLE_H_ + +#include "elst.h" +#include "featdefs.h" +#include "intfx.h" +#include "intmatcher.h" +#include "matrix.h" +#include "mf.h" +#include "picofeat.h" +#include "shapetable.h" +#include "unicharset.h" + +struct Pix; + +namespace tesseract { + +class IntFeatureMap; +class IntFeatureSpace; +class ShapeTable; + +// Number of elements of cn_feature_. +static const int kNumCNParams = 4; +// Number of ways to shift the features when randomizing. +static const int kSampleYShiftSize = 5; +// Number of ways to scale the features when randomizing. +static const int kSampleScaleSize = 3; +// Total number of different ways to manipulate the features when randomizing. +// The first and last combinations are removed to avoid an excessive +// top movement (first) and an identity transformation (last). +// WARNING: To avoid patterned duplication of samples, be sure to keep +// kSampleRandomSize prime! +// Eg with current values (kSampleYShiftSize = 5 and TkSampleScaleSize = 3) +// kSampleRandomSize is 13, which is prime. +static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2; +// ASSERT_IS_PRIME(kSampleRandomSize) !! + +class TESS_API TrainingSample : public ELIST_LINK { + public: + TrainingSample() + : class_id_(INVALID_UNICHAR_ID), font_id_(0), page_num_(0), + num_features_(0), num_micro_features_(0), outline_length_(0), + features_(nullptr), micro_features_(nullptr), weight_(1.0), + max_dist_(0.0), sample_index_(0), + features_are_indexed_(false), features_are_mapped_(false), + is_error_(false) { + } + ~TrainingSample(); + + // Saves the given features into a TrainingSample. The features are copied, + // so may be deleted afterwards. Delete the return value after use. + static TrainingSample* CopyFromFeatures(const INT_FX_RESULT_STRUCT& fx_info, + const TBOX& bounding_box, + const INT_FEATURE_STRUCT* features, + int num_features); + // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining. + FEATURE_STRUCT* GetCNFeature() const; + // Constructs and returns a copy "randomized" by the method given by + // the randomizer index. If index is out of [0, kSampleRandomSize) then + // an exact copy is returned. + TrainingSample* RandomizedCopy(int index) const; + // Constructs and returns an exact copy. + TrainingSample* Copy() const; + + // WARNING! Serialize/DeSerialize do not save/restore the "cache" data + // members, which is mostly the mapped features, and the weight. + // It is assumed these can all be reconstructed from what is saved. + // Writes to the given file. Returns false in case of error. + bool Serialize(FILE* fp) const; + // Creates from the given file. Returns nullptr in case of error. + // If swap is true, assumes a big/little-endian swap is needed. + static TrainingSample* DeSerializeCreate(bool swap, FILE* fp); + // Reads from the given file. Returns false in case of error. + // If swap is true, assumes a big/little-endian swap is needed. + bool DeSerialize(bool swap, FILE* fp); + + // Extracts the needed information from the CHAR_DESC_STRUCT. + void ExtractCharDesc(int feature_type, int micro_type, + int cn_type, int geo_type, + CHAR_DESC_STRUCT* char_desc); + + // Sets the mapped_features_ from the features_ using the provided + // feature_space to the indexed versions of the features. + void IndexFeatures(const IntFeatureSpace& feature_space); + + // Returns a pix representing the sample. (Int features only.) + Pix* RenderToPix(const UNICHARSET* unicharset) const; + // Displays the features in the given window with the given color. + void DisplayFeatures(ScrollView::Color color, ScrollView* window) const; + + // Returns a pix of the original sample image. The pix is padded all round + // by padding wherever possible. + // The returned Pix must be pixDestroyed after use. + // If the input page_pix is nullptr, nullptr is returned. + Pix* GetSamplePix(int padding, Pix* page_pix) const; + + // Accessors. + UNICHAR_ID class_id() const { + return class_id_; + } + void set_class_id(int id) { + class_id_ = id; + } + int font_id() const { + return font_id_; + } + void set_font_id(int id) { + font_id_ = id; + } + int page_num() const { + return page_num_; + } + void set_page_num(int page) { + page_num_ = page; + } + const TBOX& bounding_box() const { + return bounding_box_; + } + void set_bounding_box(const TBOX& box) { + bounding_box_ = box; + } + uint32_t num_features() const { + return num_features_; + } + const INT_FEATURE_STRUCT* features() const { + return features_; + } + uint32_t num_micro_features() const { + return num_micro_features_; + } + const MicroFeature* micro_features() const { + return micro_features_; + } + int outline_length() const { + return outline_length_; + } + float cn_feature(int index) const { + return cn_feature_[index]; + } + int geo_feature(int index) const { + return geo_feature_[index]; + } + double weight() const { + return weight_; + } + void set_weight(double value) { + weight_ = value; + } + double max_dist() const { + return max_dist_; + } + void set_max_dist(double value) { + max_dist_ = value; + } + int sample_index() const { + return sample_index_; + } + void set_sample_index(int value) { + sample_index_ = value; + } + bool features_are_mapped() const { + return features_are_mapped_; + } + const GenericVector<int>& mapped_features() const { + ASSERT_HOST(features_are_mapped_); + return mapped_features_; + } + const GenericVector<int>& indexed_features() const { + ASSERT_HOST(features_are_indexed_); + return mapped_features_; + } + bool is_error() const { + return is_error_; + } + void set_is_error(bool value) { + is_error_ = value; + } + + private: + // Unichar id that this sample represents. There obviously must be a + // reference UNICHARSET somewhere. Usually in TrainingSampleSet. + UNICHAR_ID class_id_; + // Font id in which this sample was printed. Refers to a fontinfo_table_ in + // MasterTrainer. + int font_id_; + // Number of page that the sample came from. + int page_num_; + // Bounding box of sample in original image. + TBOX bounding_box_; + // Number of INT_FEATURE_STRUCT in features_ array. + uint32_t num_features_; + // Number of MicroFeature in micro_features_ array. + uint32_t num_micro_features_; + // Total length of outline in the baseline normalized coordinate space. + // See comment in WERD_RES class definition for a discussion of coordinate + // spaces. + int outline_length_; + // Array of features. + INT_FEATURE_STRUCT* features_; + // Array of features. + MicroFeature* micro_features_; + // The one and only CN feature. Indexed by NORM_PARAM_NAME enum. + float cn_feature_[kNumCNParams]; + // The one and only geometric feature. (Aims at replacing cn_feature_). + // Indexed by GeoParams enum in picofeat.h + int geo_feature_[GeoCount]; + + // Non-serialized cache data. + // Weight used for boosting training. + double weight_; + // Maximum distance to other samples of same class/font used in computing + // the canonical sample. + double max_dist_; + // Global index of this sample. + int sample_index_; +public: + // both are used in training tools + // hide after refactoring + + // Indexed/mapped features, as indicated by the bools below. + GenericVector<int> mapped_features_; + bool features_are_indexed_; + bool features_are_mapped_; +private: + // True if the last classification was an error by the current definition. + bool is_error_; + + // Randomizing factors. + static const int kYShiftValues[kSampleYShiftSize]; + static const double kScaleValues[kSampleScaleSize]; +}; + +ELISTIZEH(TrainingSample) + +} // namespace tesseract + +#endif // TESSERACT_TRAINING_TRAININGSAMPLE_H_ |