56 files changed, 17075 insertions, 0 deletions
diff --git a/tesseract/src/classify/adaptive.cpp b/tesseract/src/classify/adaptive.cpp
new file mode 100644
index 00000000..92f0d3da
--- /dev/null
+++ b/tesseract/src/classify/adaptive.cpp
@@ -0,0 +1,498 @@
+/******************************************************************************
+ ** Filename:    adaptive.c
+ ** Purpose:     Adaptive matcher.
+ ** Author:      Dan Johnson
+ ** History:     Fri Mar  8 10:00:21 1991, DSJ, Created.
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#include "adaptive.h"
+
+#include "classify.h"
+
+#include <cassert>
+#include <cstdio>
+
+namespace tesseract {
+
+/*----------------------------------------------------------------------------
+              Public Code
+----------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine adds a new adapted class to an existing
+ * set of adapted templates.
+ *
+ * @param Templates set of templates to add new class to
+ * @param Class new class to add to templates
+ * @param ClassId class id to associate with new class
+ *
+ * @note Globals: none
+ */
+void AddAdaptedClass(ADAPT_TEMPLATES Templates,
+                     ADAPT_CLASS Class,
+                     CLASS_ID ClassId) {
+  INT_CLASS IntClass;
+
+  assert (Templates != nullptr);
+  assert (Class != nullptr);
+  assert (LegalClassId (ClassId));
+  assert (UnusedClassIdIn (Templates->Templates, ClassId));
+  assert (Class->NumPermConfigs == 0);
+
+  IntClass = NewIntClass (1, 1);
+  AddIntClass (Templates->Templates, ClassId, IntClass);
+
+  assert (Templates->Class[ClassId] == nullptr);
+  Templates->Class[ClassId] = Class;
+
+}                                /* AddAdaptedClass */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine frees all memory consumed by a temporary
+ * configuration.
+ *
+ * @param Config  config to be freed
+ *
+ * @note Globals: none
+ */
+void FreeTempConfig(TEMP_CONFIG Config) {
+  assert (Config != nullptr);
+  FreeBitVector (Config->Protos);
+  free(Config);
+}                                /* FreeTempConfig */
+
+/*---------------------------------------------------------------------------*/
+void FreeTempProto(void *arg) {
+  auto proto = static_cast<PROTO>(arg);
+
+  free(proto);
+}
+
+static void FreePermConfig(PERM_CONFIG Config) {
+  assert(Config != nullptr);
+  delete [] Config->Ambigs;
+  free(Config);
+}
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This operation allocates and initializes a new adapted
+ * class data structure and returns a ptr to it.
+ *
+ * @return Ptr to new class data structure.
+ *
+ * @note Globals: none
+ */
+ADAPT_CLASS NewAdaptedClass() {
+  ADAPT_CLASS Class;
+
+  Class = static_cast<ADAPT_CLASS>(malloc (sizeof (ADAPT_CLASS_STRUCT)));
+  Class->NumPermConfigs = 0;
+  Class->MaxNumTimesSeen = 0;
+  Class->TempProtos = NIL_LIST;
+
+  Class->PermProtos = NewBitVector (MAX_NUM_PROTOS);
+  Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS);
+  zero_all_bits (Class->PermProtos, WordsInVectorOfSize (MAX_NUM_PROTOS));
+  zero_all_bits (Class->PermConfigs, WordsInVectorOfSize (MAX_NUM_CONFIGS));
+
+  for (int i = 0; i < MAX_NUM_CONFIGS; i++)
+    TempConfigFor (Class, i) = nullptr;
+
+  return (Class);
+
+}                                /* NewAdaptedClass */
+
+
+/*-------------------------------------------------------------------------*/
+void free_adapted_class(ADAPT_CLASS adapt_class) {
+  for (int i = 0; i < MAX_NUM_CONFIGS; i++) {
+    if (ConfigIsPermanent (adapt_class, i)
+      && PermConfigFor (adapt_class, i) != nullptr)
+      FreePermConfig (PermConfigFor (adapt_class, i));
+    else if (!ConfigIsPermanent (adapt_class, i)
+      && TempConfigFor (adapt_class, i) != nullptr)
+      FreeTempConfig (TempConfigFor (adapt_class, i));
+  }
+  FreeBitVector (adapt_class->PermProtos);
+  FreeBitVector (adapt_class->PermConfigs);
+  destroy_nodes (adapt_class->TempProtos, FreeTempProto);
+  free(adapt_class);
+}
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * Allocates memory for adapted templates.
+ * each char in unicharset to the newly created templates
+ *
+ * @param InitFromUnicharset if true, add an empty class for
+ * @return Ptr to new adapted templates.
+ *
+ * @note Globals: none
+ */
+ADAPT_TEMPLATES Classify::NewAdaptedTemplates(bool InitFromUnicharset) {
+  ADAPT_TEMPLATES Templates;
+
+  Templates = static_cast<ADAPT_TEMPLATES>(malloc (sizeof (ADAPT_TEMPLATES_STRUCT)));
+
+  Templates->Templates = NewIntTemplates ();
+  Templates->NumPermClasses = 0;
+  Templates->NumNonEmptyClasses = 0;
+
+  /* Insert an empty class for each unichar id in unicharset */
+  for (int i = 0; i < MAX_NUM_CLASSES; i++) {
+    Templates->Class[i] = nullptr;
+    if (InitFromUnicharset && i < unicharset.size()) {
+      AddAdaptedClass(Templates, NewAdaptedClass(), i);
+    }
+  }
+
+  return (Templates);
+
+}                                /* NewAdaptedTemplates */
+
+// Returns FontinfoId of the given config of the given adapted class.
+int Classify::GetFontinfoId(ADAPT_CLASS Class, uint8_t ConfigId) {
+  return (ConfigIsPermanent(Class, ConfigId) ?
+      PermConfigFor(Class, ConfigId)->FontinfoId :
+      TempConfigFor(Class, ConfigId)->FontinfoId);
+}
+
+/*----------------------------------------------------------------------------*/
+void free_adapted_templates(ADAPT_TEMPLATES templates) {
+
+  if (templates != nullptr) {
+    for (int i = 0; i < (templates->Templates)->NumClasses; i++)
+      free_adapted_class (templates->Class[i]);
+    free_int_templates (templates->Templates);
+    free(templates);
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine allocates and returns a new temporary config.
+ *
+ * @param MaxProtoId  max id of any proto in new config
+ * @param FontinfoId font information from pre-trained templates
+ * @return Ptr to new temp config.
+ *
+ * @note Globals: none
+ */
+TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId) {
+  int NumProtos = MaxProtoId + 1;
+
+  auto Config = static_cast<TEMP_CONFIG>(malloc(sizeof(TEMP_CONFIG_STRUCT)));
+  Config->Protos = NewBitVector (NumProtos);
+
+  Config->NumTimesSeen = 1;
+  Config->MaxProtoId = MaxProtoId;
+  Config->ProtoVectorSize = WordsInVectorOfSize (NumProtos);
+  zero_all_bits (Config->Protos, Config->ProtoVectorSize);
+  Config->FontinfoId = FontinfoId;
+
+  return (Config);
+
+}                                /* NewTempConfig */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine allocates and returns a new temporary proto.
+ *
+ * @return Ptr to new temporary proto.
+ *
+ * @note Globals: none
+ */
+TEMP_PROTO NewTempProto() {
+  return static_cast<TEMP_PROTO>(malloc(sizeof(TEMP_PROTO_STRUCT)));
+}                                /* NewTempProto */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine prints a summary of the adapted templates
+ *  in Templates to File.
+ *
+ * @param File    open text file to print Templates to
+ * @param Templates adapted templates to print to File
+ *
+ * @note Globals: none
+ */
+void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) {
+  INT_CLASS IClass;
+  ADAPT_CLASS AClass;
+
+  fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
+  fprintf (File, "Num classes = %d;  Num permanent classes = %d\n\n",
+           Templates->NumNonEmptyClasses, Templates->NumPermClasses);
+  fprintf (File, "   Id  NC NPC  NP NPP\n");
+  fprintf (File, "------------------------\n");
+
+  for (int i = 0; i < (Templates->Templates)->NumClasses; i++) {
+    IClass = Templates->Templates->Class[i];
+    AClass = Templates->Class[i];
+    if (!IsEmptyAdaptedClass (AClass)) {
+      fprintf (File, "%5d  %s %3d %3d %3d %3d\n",
+        i, unicharset.id_to_unichar(i),
+      IClass->NumConfigs, AClass->NumPermConfigs,
+      IClass->NumProtos,
+      IClass->NumProtos - count (AClass->TempProtos));
+    }
+  }
+  fprintf (File, "\n");
+
+}                                /* PrintAdaptedTemplates */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * Read an adapted class description from file and return
+ * a ptr to the adapted class.
+ *
+ * @param fp open file to read adapted class from
+ * @return Ptr to new adapted class.
+ *
+ * @note Globals: none
+ */
+ADAPT_CLASS ReadAdaptedClass(TFile *fp) {
+  int NumTempProtos;
+  int NumConfigs;
+  int i;
+  ADAPT_CLASS Class;
+
+  /* first read high level adapted class structure */
+  Class = static_cast<ADAPT_CLASS>(malloc (sizeof (ADAPT_CLASS_STRUCT)));
+  fp->FRead(Class, sizeof(ADAPT_CLASS_STRUCT), 1);
+
+  /* then read in the definitions of the permanent protos and configs */
+  Class->PermProtos = NewBitVector (MAX_NUM_PROTOS);
+  Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS);
+  fp->FRead(Class->PermProtos, sizeof(uint32_t),
+            WordsInVectorOfSize(MAX_NUM_PROTOS));
+  fp->FRead(Class->PermConfigs, sizeof(uint32_t),
+            WordsInVectorOfSize(MAX_NUM_CONFIGS));
+
+  /* then read in the list of temporary protos */
+  fp->FRead(&NumTempProtos, sizeof(int), 1);
+  Class->TempProtos = NIL_LIST;
+  for (i = 0; i < NumTempProtos; i++) {
+    auto TempProto = static_cast<TEMP_PROTO>(malloc(sizeof(TEMP_PROTO_STRUCT)));
+    fp->FRead(TempProto, sizeof(TEMP_PROTO_STRUCT), 1);
+    Class->TempProtos = push_last (Class->TempProtos, TempProto);
+  }
+
+  /* then read in the adapted configs */
+  fp->FRead(&NumConfigs, sizeof(int), 1);
+  for (i = 0; i < NumConfigs; i++)
+    if (test_bit (Class->PermConfigs, i))
+      Class->Config[i].Perm = ReadPermConfig(fp);
+    else
+      Class->Config[i].Temp = ReadTempConfig(fp);
+
+  return (Class);
+
+}                                /* ReadAdaptedClass */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * Read a set of adapted templates from file and return
+ * a ptr to the templates.
+ *
+ * @param fp open text file to read adapted templates from
+ * @return Ptr to adapted templates read from file.
+ *
+ * @note Globals: none
+ */
+ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(TFile *fp) {
+  ADAPT_TEMPLATES Templates;
+
+  /* first read the high level adaptive template struct */
+  Templates = static_cast<ADAPT_TEMPLATES>(malloc (sizeof (ADAPT_TEMPLATES_STRUCT)));
+  fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1);
+
+  /* then read in the basic integer templates */
+  Templates->Templates = ReadIntTemplates(fp);
+
+  /* then read in the adaptive info for each class */
+  for (int i = 0; i < (Templates->Templates)->NumClasses; i++) {
+    Templates->Class[i] = ReadAdaptedClass(fp);
+  }
+  return (Templates);
+
+}                                /* ReadAdaptedTemplates */
+
+/*---------------------------------------------------------------------------*/
+/**
+ * Read a permanent configuration description from file
+ * and return a ptr to it.
+ *
+ * @param fp open file to read permanent config from
+ * @return Ptr to new permanent configuration description.
+ *
+ * @note Globals: none
+ */
+PERM_CONFIG ReadPermConfig(TFile *fp) {
+  auto Config = static_cast<PERM_CONFIG>(malloc(sizeof(PERM_CONFIG_STRUCT)));
+  uint8_t NumAmbigs;
+  fp->FRead(&NumAmbigs, sizeof(NumAmbigs), 1);
+  Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1];
+  fp->FRead(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs);
+  Config->Ambigs[NumAmbigs] = -1;
+  fp->FRead(&(Config->FontinfoId), sizeof(int), 1);
+
+  return (Config);
+
+}                                /* ReadPermConfig */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * Read a temporary configuration description from file
+ * and return a ptr to it.
+ *
+ * @param fp open file to read temporary config from
+ * @return Ptr to new temporary configuration description.
+ *
+ * @note Globals: none
+ */
+TEMP_CONFIG ReadTempConfig(TFile *fp) {
+  auto Config = static_cast<TEMP_CONFIG>(malloc(sizeof(TEMP_CONFIG_STRUCT)));
+  fp->FRead(Config, sizeof(TEMP_CONFIG_STRUCT), 1);
+
+  Config->Protos = NewBitVector (Config->ProtoVectorSize * BITSINLONG);
+  fp->FRead(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize);
+
+  return (Config);
+
+}                                /* ReadTempConfig */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine writes a binary representation of Class
+ * to File.
+ *
+ * @param File    open file to write Class to
+ * @param Class   adapted class to write to File
+ * @param NumConfigs  number of configs in Class
+ *
+ * @note Globals: none
+ */
+void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs) {
+  int NumTempProtos;
+  LIST TempProtos;
+  int i;
+
+  /* first write high level adapted class structure */
+  fwrite(Class, sizeof(ADAPT_CLASS_STRUCT), 1, File);
+
+  /* then write out the definitions of the permanent protos and configs */
+  fwrite(Class->PermProtos, sizeof(uint32_t),
+    WordsInVectorOfSize(MAX_NUM_PROTOS), File);
+  fwrite(Class->PermConfigs, sizeof(uint32_t),
+    WordsInVectorOfSize(MAX_NUM_CONFIGS), File);
+
+  /* then write out the list of temporary protos */
+  NumTempProtos = count (Class->TempProtos);
+  fwrite(&NumTempProtos, sizeof(int), 1, File);
+  TempProtos = Class->TempProtos;
+  iterate (TempProtos) {
+    void* proto = first_node(TempProtos);
+    fwrite(proto, sizeof(TEMP_PROTO_STRUCT), 1, File);
+  }
+
+  /* then write out the adapted configs */
+  fwrite(&NumConfigs, sizeof(int), 1, File);
+  for (i = 0; i < NumConfigs; i++)
+    if (test_bit (Class->PermConfigs, i))
+      WritePermConfig (File, Class->Config[i].Perm);
+    else
+      WriteTempConfig (File, Class->Config[i].Temp);
+
+}                                /* WriteAdaptedClass */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine saves Templates to File in a binary format.
+ *
+ * @param File    open text file to write Templates to
+ * @param Templates set of adapted templates to write to File
+ *
+ * @note Globals: none
+ */
+void Classify::WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) {
+  int i;
+
+  /* first write the high level adaptive template struct */
+  fwrite(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1, File);
+
+  /* then write out the basic integer templates */
+  WriteIntTemplates (File, Templates->Templates, unicharset);
+
+  /* then write out the adaptive info for each class */
+  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
+    WriteAdaptedClass (File, Templates->Class[i],
+      Templates->Templates->Class[i]->NumConfigs);
+  }
+}                                /* WriteAdaptedTemplates */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine writes a binary representation of a
+ * permanent configuration to File.
+ *
+ * @param File  open file to write Config to
+ * @param Config  permanent config to write to File
+ *
+ * @note Globals: none
+ */
+void WritePermConfig(FILE *File, PERM_CONFIG Config) {
+  uint8_t NumAmbigs = 0;
+
+  assert (Config != nullptr);
+  while (Config->Ambigs[NumAmbigs] > 0) ++NumAmbigs;
+
+  fwrite(&NumAmbigs, sizeof(uint8_t), 1, File);
+  fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
+  fwrite(&(Config->FontinfoId), sizeof(int), 1, File);
+}                                /* WritePermConfig */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine writes a binary representation of a
+ * temporary configuration to File.
+ *
+ * @param File  open file to write Config to
+ * @param Config  temporary config to write to File
+ *
+ * @note Globals: none
+ */
+void WriteTempConfig(FILE *File, TEMP_CONFIG Config) {
+  assert (Config != nullptr);
+
+  fwrite(Config, sizeof (TEMP_CONFIG_STRUCT), 1, File);
+  fwrite(Config->Protos, sizeof (uint32_t), Config->ProtoVectorSize, File);
+
+}                                /* WriteTempConfig */
+
+} // namespace tesseract
diff --git a/tesseract/src/classify/adaptive.h b/tesseract/src/classify/adaptive.h
new file mode 100644
index 00000000..b1bf6a2e
--- /dev/null
+++ b/tesseract/src/classify/adaptive.h
@@ -0,0 +1,128 @@
+/******************************************************************************
+ ** Filename:   adaptive.h
+ ** Purpose:    Interface to adaptive matcher.
+ ** Author:     Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+#ifndef ADAPTIVE_H
+#define ADAPTIVE_H
+
+#include "intproto.h"
+#include "oldlist.h"
+
+#include <cstdio>
+
+namespace tesseract {
+
+typedef struct {
+  uint16_t ProtoId;
+  PROTO_STRUCT Proto;
+}
+
+TEMP_PROTO_STRUCT;
+using TEMP_PROTO = TEMP_PROTO_STRUCT*;
+
+typedef struct {
+  uint8_t NumTimesSeen;
+  uint8_t ProtoVectorSize;
+  PROTO_ID MaxProtoId;
+  BIT_VECTOR Protos;
+  int FontinfoId;  // font information inferred from pre-trained templates
+} TEMP_CONFIG_STRUCT;
+using TEMP_CONFIG = TEMP_CONFIG_STRUCT*;
+
+typedef struct {
+  UNICHAR_ID* Ambigs;
+  int FontinfoId;  // font information inferred from pre-trained templates
+} PERM_CONFIG_STRUCT;
+using PERM_CONFIG = PERM_CONFIG_STRUCT*;
+
+typedef union {
+  TEMP_CONFIG Temp;
+  PERM_CONFIG Perm;
+} ADAPTED_CONFIG;
+
+typedef struct {
+  uint8_t NumPermConfigs;
+  uint8_t MaxNumTimesSeen;  // maximum number of times any TEMP_CONFIG was seen
+                            // (cut at matcher_min_examples_for_prototyping)
+  BIT_VECTOR PermProtos;
+  BIT_VECTOR PermConfigs;
+  LIST TempProtos;
+  ADAPTED_CONFIG Config[MAX_NUM_CONFIGS];
+} ADAPT_CLASS_STRUCT;
+using ADAPT_CLASS = ADAPT_CLASS_STRUCT*;
+
+typedef struct {
+  INT_TEMPLATES Templates;
+  int NumNonEmptyClasses;
+  uint8_t NumPermClasses;
+  ADAPT_CLASS Class[MAX_NUM_CLASSES];
+} ADAPT_TEMPLATES_STRUCT;
+using ADAPT_TEMPLATES = ADAPT_TEMPLATES_STRUCT*;
+
+/*----------------------------------------------------------------------------
+          Public Function Prototypes
+----------------------------------------------------------------------------*/
+#define NumNonEmptyClassesIn(Template) ((Template)->NumNonEmptyClasses)
+
+#define IsEmptyAdaptedClass(Class) \
+  ((Class)->NumPermConfigs == 0 && (Class)->TempProtos == NIL_LIST)
+
+#define ConfigIsPermanent(Class, ConfigId) \
+  (test_bit((Class)->PermConfigs, ConfigId))
+
+#define MakeConfigPermanent(Class, ConfigId) \
+  (SET_BIT((Class)->PermConfigs, ConfigId))
+
+#define MakeProtoPermanent(Class, ProtoId) \
+  (SET_BIT((Class)->PermProtos, ProtoId))
+
+#define TempConfigFor(Class, ConfigId) ((Class)->Config[ConfigId].Temp)
+
+#define PermConfigFor(Class, ConfigId) ((Class)->Config[ConfigId].Perm)
+
+#define IncreaseConfidence(TempConfig) ((TempConfig)->NumTimesSeen++)
+
+void AddAdaptedClass(ADAPT_TEMPLATES Templates, ADAPT_CLASS Class,
+                     CLASS_ID ClassId);
+
+void FreeTempProto(void* arg);
+
+void FreeTempConfig(TEMP_CONFIG Config);
+
+ADAPT_CLASS NewAdaptedClass();
+
+void free_adapted_class(ADAPT_CLASS adapt_class);
+
+void free_adapted_templates(ADAPT_TEMPLATES templates);
+
+TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId);
+
+TEMP_PROTO NewTempProto();
+
+ADAPT_CLASS ReadAdaptedClass(tesseract::TFile* File);
+
+PERM_CONFIG ReadPermConfig(tesseract::TFile* File);
+
+TEMP_CONFIG ReadTempConfig(tesseract::TFile* File);
+
+void WriteAdaptedClass(FILE* File, ADAPT_CLASS Class, int NumConfigs);
+
+void WritePermConfig(FILE* File, PERM_CONFIG Config);
+
+void WriteTempConfig(FILE* File, TEMP_CONFIG Config);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/classify/adaptmatch.cpp b/tesseract/src/classify/adaptmatch.cpp
new file mode 100644
index 00000000..65254b8a
--- /dev/null
+++ b/tesseract/src/classify/adaptmatch.cpp
@@ -0,0 +1,2317 @@
+/******************************************************************************
+ ** Filename:    adaptmatch.cpp
+ ** Purpose:     High level adaptive matcher.
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+/*-----------------------------------------------------------------------------
+          Include Files and Type Defines
+-----------------------------------------------------------------------------*/
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "adaptive.h"           // for ADAPT_CLASS, free_adapted_templates
+#include "ambigs.h"             // for UnicharIdVector, UnicharAmbigs
+#include "bitvec.h"             // for FreeBitVector, NewBitVector, BIT_VECTOR
+#include "blobs.h"              // for TBLOB, TWERD
+#include "classify.h"           // for Classify, CST_FRAGMENT, CST_WHOLE
+#include "dict.h"               // for Dict
+#include "errcode.h"            // for ASSERT_HOST
+#include "featdefs.h"           // for CharNormDesc
+#include "float2int.h"          // for BASELINE_Y_SHIFT
+#include "fontinfo.h"           // for ScoredFont, FontSet
+#include "intfx.h"              // for BlobToTrainingSample, INT_FX_RESULT_S...
+#include "intmatcher.h"         // for CP_RESULT_STRUCT, IntegerMatcher
+#include "intproto.h"           // for INT_FEATURE_STRUCT, (anonymous), Clas...
+#include "matchdefs.h"          // for CLASS_ID, FEATURE_ID, PROTO_ID, NO_PROTO
+#include "mfoutline.h"          // for baseline, character, MF_SCALE_FACTOR
+#include "normalis.h"           // for DENORM, kBlnBaselineOffset, kBlnXHeight
+#include "normfeat.h"           // for ActualOutlineLength, CharNormLength
+#include "ocrfeatures.h"        // for FEATURE_STRUCT, FreeFeatureSet, FEATURE
+#include "oldlist.h"            // for push, delete_d
+#include "outfeat.h"            // for OutlineFeatDir, OutlineFeatLength
+#include "pageres.h"            // for WERD_RES
+#include "params.h"             // for IntParam, BoolParam, DoubleParam, Str...
+#include "picofeat.h"           // for PicoFeatDir, PicoFeatX, PicoFeatY
+#include "protos.h"             // for PROTO_STRUCT, FillABC, PROTO
+#include "ratngs.h"             // for BLOB_CHOICE_IT, BLOB_CHOICE_LIST, BLO...
+#include "rect.h"               // for TBOX
+#include "scrollview.h"         // for ScrollView, ScrollView::BROWN, Scroll...
+#include "seam.h"               // for SEAM
+#include "shapeclassifier.h"    // for ShapeClassifier
+#include "shapetable.h"         // for UnicharRating, ShapeTable, Shape, Uni...
+#include "tessclassifier.h"     // for TessClassifier
+#include "tessdatamanager.h"    // for TessdataManager, TESSDATA_INTTEMP
+#include "tprintf.h"            // for tprintf
+#include "trainingsample.h"     // for TrainingSample
+#include "unicharset.h"         // for UNICHARSET, CHAR_FRAGMENT, UNICHAR_SPACE
+#include "unicity_table.h"      // for UnicityTable
+
+#include "genericvector.h"      // for GenericVector
+#include "serialis.h"           // for TFile
+#include "strngs.h"             // for STRING
+#include "helpers.h"            // for IntCastRounded, ClipToRange
+#include <tesseract/unichar.h>            // for UNICHAR_ID, INVALID_UNICHAR_ID
+
+#include <algorithm>            // for max, min
+#include <cassert>              // for assert
+#include <cmath>                // for fabs
+#include <cstdint>              // for INT32_MAX, UINT8_MAX
+#include <cstdio>               // for fflush, fclose, fopen, stdout, FILE
+#include <cstdlib>              // for malloc
+#include <cstring>              // for strstr, memset, strcmp
+
+namespace tesseract {
+
+#define ADAPT_TEMPLATE_SUFFIX ".a"
+
+#define MAX_MATCHES         10
+#define UNLIKELY_NUM_FEAT 200
+#define NO_DEBUG      0
+#define MAX_ADAPTABLE_WERD_SIZE 40
+
+#define ADAPTABLE_WERD_ADJUSTMENT    (0.05)
+
+#define Y_DIM_OFFSET    (Y_SHIFT - BASELINE_Y_SHIFT)
+
+#define WORST_POSSIBLE_RATING (0.0f)
+
+struct ADAPT_RESULTS {
+  int32_t BlobLength;
+  bool HasNonfragment;
+  UNICHAR_ID best_unichar_id;
+  int best_match_index;
+  float best_rating;
+  std::vector<UnicharRating> match;
+  std::vector<CP_RESULT_STRUCT> CPResults;
+
+  /// Initializes data members to the default values. Sets the initial
+  /// rating of each class to be the worst possible rating (1.0).
+  inline void Initialize() {
+    BlobLength = INT32_MAX;
+    HasNonfragment = false;
+    ComputeBest();
+  }
+  // Computes best_unichar_id, best_match_index and best_rating.
+  void ComputeBest() {
+    best_unichar_id = INVALID_UNICHAR_ID;
+    best_match_index = -1;
+    best_rating = WORST_POSSIBLE_RATING;
+    for (int i = 0; i < match.size(); ++i) {
+      if (match[i].rating > best_rating) {
+        best_rating = match[i].rating;
+        best_unichar_id = match[i].unichar_id;
+        best_match_index = i;
+      }
+    }
+  }
+};
+
+struct PROTO_KEY {
+  ADAPT_TEMPLATES Templates;
+  CLASS_ID ClassId;
+  int ConfigId;
+};
+
+// Sort function to sort ratings appropriately by descending rating.
+static bool SortDescendingRating(const UnicharRating &a, const UnicharRating &b) {
+  if (a.rating != b.rating) {
+    return a.rating > b.rating;
+  } else {
+    return a.unichar_id < b.unichar_id;
+  }
+}
+
+/*-----------------------------------------------------------------------------
+          Private Macros
+-----------------------------------------------------------------------------*/
+inline bool MarginalMatch(float confidence, float matcher_great_threshold) {
+  return (1.0f - confidence) > matcher_great_threshold;
+}
+
+/*-----------------------------------------------------------------------------
+          Private Function Prototypes
+-----------------------------------------------------------------------------*/
+// Returns the index of the given id in results, if present, or the size of the
+// vector (index it will go at) if not present.
+static int FindScoredUnichar(UNICHAR_ID id, const ADAPT_RESULTS& results) {
+  for (int i = 0; i < results.match.size(); i++) {
+    if (results.match[i].unichar_id == id)
+      return i;
+  }
+  return results.match.size();
+}
+
+// Returns the current rating for a unichar id if we have rated it, defaulting
+// to WORST_POSSIBLE_RATING.
+static float ScoredUnichar(UNICHAR_ID id, const ADAPT_RESULTS& results) {
+  int index = FindScoredUnichar(id, results);
+  if (index >= results.match.size()) return WORST_POSSIBLE_RATING;
+  return results.match[index].rating;
+}
+
+void InitMatcherRatings(float *Rating);
+
+int MakeTempProtoPerm(void *item1, void *item2);
+
+void SetAdaptiveThreshold(float Threshold);
+
+
+/*-----------------------------------------------------------------------------
+              Public Code
+-----------------------------------------------------------------------------*/
+/**
+ * This routine calls the adaptive matcher
+ * which returns (in an array) the class id of each
+ * class matched.
+ *
+ * It also returns the number of classes matched.
+ * For each class matched it places the best rating
+ * found for that class into the Ratings array.
+ *
+ * Bad matches are then removed so that they don't
+ * need to be sorted.  The remaining good matches are
+ * then sorted and converted to choices.
+ *
+ * This routine also performs some simple speckle
+ * filtering.
+ *
+ * @param Blob    blob to be classified
+ * @param[out] Choices    List of choices found by adaptive matcher.
+ * filled on return with the choices found by the
+ * class pruner and the ratings therefrom. Also
+ * contains the detailed results of the integer matcher.
+ *
+ */
+void Classify::AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices) {
+  assert(Choices != nullptr);
+  auto *Results = new ADAPT_RESULTS;
+  Results->Initialize();
+
+  ASSERT_HOST(AdaptedTemplates != nullptr);
+
+  DoAdaptiveMatch(Blob, Results);
+
+  RemoveBadMatches(Results);
+  std::sort(Results->match.begin(), Results->match.end(), SortDescendingRating);
+  RemoveExtraPuncs(Results);
+  Results->ComputeBest();
+  ConvertMatchesToChoices(Blob->denorm(), Blob->bounding_box(), Results,
+                          Choices);
+
+  // TODO(rays) Move to before ConvertMatchesToChoices!
+  if (LargeSpeckle(*Blob) || Choices->length() == 0)
+    AddLargeSpeckleTo(Results->BlobLength, Choices);
+
+  if (matcher_debug_level >= 1) {
+    tprintf("AD Matches =  ");
+    PrintAdaptiveMatchResults(*Results);
+  }
+
+#ifndef GRAPHICS_DISABLED
+  if (classify_enable_adaptive_debugger)
+    DebugAdaptiveClassifier(Blob, Results);
+#endif
+
+  delete Results;
+}                                /* AdaptiveClassifier */
+
+#ifndef GRAPHICS_DISABLED
+
+// If *win is nullptr, sets it to a new ScrollView() object with title msg.
+// Clears the window and draws baselines.
+void Classify::RefreshDebugWindow(ScrollView **win, const char *msg,
+                                  int y_offset, const TBOX &wbox) {
+  const int kSampleSpaceWidth = 500;
+  if (*win == nullptr) {
+    *win = new ScrollView(msg, 100, y_offset, kSampleSpaceWidth * 2, 200,
+                          kSampleSpaceWidth * 2, 200, true);
+  }
+  (*win)->Clear();
+  (*win)->Pen(64, 64, 64);
+  (*win)->Line(-kSampleSpaceWidth, kBlnBaselineOffset,
+               kSampleSpaceWidth, kBlnBaselineOffset);
+  (*win)->Line(-kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset,
+               kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset);
+  (*win)->ZoomToRectangle(wbox.left(), wbox.top(),
+                          wbox.right(), wbox.bottom());
+}
+
+#endif // !GRAPHICS_DISABLED
+
+// Learns the given word using its chopped_word, seam_array, denorm,
+// box_word, best_state, and correct_text to learn both correctly and
+// incorrectly segmented blobs. If fontname is not nullptr, then LearnBlob
+// is called and the data will be saved in an internal buffer.
+// Otherwise AdaptToBlob is called for adaption within a document.
+void Classify::LearnWord(const char* fontname, WERD_RES* word) {
+  int word_len = word->correct_text.size();
+  if (word_len == 0) return;
+
+  float* thresholds = nullptr;
+  if (fontname == nullptr) {
+    // Adaption mode.
+    if (!EnableLearning || word->best_choice == nullptr)
+      return;  // Can't or won't adapt.
+
+    if (classify_learning_debug_level >= 1)
+      tprintf("\n\nAdapting to word = %s\n",
+              word->best_choice->debug_string().c_str());
+    thresholds = new float[word_len];
+    word->ComputeAdaptionThresholds(certainty_scale,
+                                    matcher_perfect_threshold,
+                                    matcher_good_threshold,
+                                    matcher_rating_margin, thresholds);
+  }
+  int start_blob = 0;
+
+  #ifndef GRAPHICS_DISABLED
+  if (classify_debug_character_fragments) {
+    if (learn_fragmented_word_debug_win_ != nullptr) {
+      learn_fragmented_word_debug_win_->Wait();
+    }
+    RefreshDebugWindow(&learn_fragments_debug_win_, "LearnPieces", 400,
+                       word->chopped_word->bounding_box());
+    RefreshDebugWindow(&learn_fragmented_word_debug_win_, "LearnWord", 200,
+                       word->chopped_word->bounding_box());
+    word->chopped_word->plot(learn_fragmented_word_debug_win_);
+    ScrollView::Update();
+  }
+  #endif // !GRAPHICS_DISABLED
+
+  for (int ch = 0; ch < word_len; ++ch) {
+    if (classify_debug_character_fragments) {
+      tprintf("\nLearning %s\n",  word->correct_text[ch].c_str());
+    }
+    if (word->correct_text[ch].length() > 0) {
+      float threshold = thresholds != nullptr ? thresholds[ch] : 0.0f;
+
+      LearnPieces(fontname, start_blob, word->best_state[ch], threshold,
+                  CST_WHOLE, word->correct_text[ch].c_str(), word);
+
+      if (word->best_state[ch] > 1 && !disable_character_fragments) {
+        // Check that the character breaks into meaningful fragments
+        // that each match a whole character with at least
+        // classify_character_fragments_garbage_certainty_threshold
+        bool garbage = false;
+        int frag;
+        for (frag = 0; frag < word->best_state[ch]; ++frag) {
+          TBLOB* frag_blob = word->chopped_word->blobs[start_blob + frag];
+          if (classify_character_fragments_garbage_certainty_threshold < 0) {
+            garbage |= LooksLikeGarbage(frag_blob);
+          }
+        }
+        // Learn the fragments.
+        if (!garbage) {
+          bool pieces_all_natural = word->PiecesAllNatural(start_blob,
+              word->best_state[ch]);
+          if (pieces_all_natural || !prioritize_division) {
+            for (frag = 0; frag < word->best_state[ch]; ++frag) {
+              std::vector<STRING> tokens;
+              word->correct_text[ch].split(' ', &tokens);
+
+              tokens[0] = CHAR_FRAGMENT::to_string(
+                  tokens[0].c_str(), frag, word->best_state[ch],
+                  pieces_all_natural);
+
+              STRING full_string;
+              for (int i = 0; i < tokens.size(); i++) {
+                full_string += tokens[i];
+                if (i != tokens.size() - 1)
+                  full_string += ' ';
+              }
+              LearnPieces(fontname, start_blob + frag, 1, threshold,
+                          CST_FRAGMENT, full_string.c_str(), word);
+            }
+          }
+        }
+      }
+
+      // TODO(rays): re-enable this part of the code when we switch to the
+      // new classifier that needs to see examples of garbage.
+      /*
+      if (word->best_state[ch] > 1) {
+        // If the next blob is good, make junk with the rightmost fragment.
+        if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
+          LearnPieces(fontname, start_blob + word->best_state[ch] - 1,
+                      word->best_state[ch + 1] + 1,
+                      threshold, CST_IMPROPER, INVALID_UNICHAR, word);
+        }
+        // If the previous blob is good, make junk with the leftmost fragment.
+        if (ch > 0 && word->correct_text[ch - 1].length() > 0) {
+          LearnPieces(fontname, start_blob - word->best_state[ch - 1],
+                      word->best_state[ch - 1] + 1,
+                      threshold, CST_IMPROPER, INVALID_UNICHAR, word);
+        }
+      }
+      // If the next blob is good, make a join with it.
+      if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
+        STRING joined_text = word->correct_text[ch];
+        joined_text += word->correct_text[ch + 1];
+        LearnPieces(fontname, start_blob,
+                    word->best_state[ch] + word->best_state[ch + 1],
+                    threshold, CST_NGRAM, joined_text.c_str(), word);
+      }
+      */
+    }
+    start_blob += word->best_state[ch];
+  }
+  delete [] thresholds;
+}  // LearnWord.
+
+// Builds a blob of length fragments, from the word, starting at start,
+// and then learns it, as having the given correct_text.
+// If fontname is not nullptr, then LearnBlob is called and the data will be
+// saved in an internal buffer for static training.
+// Otherwise AdaptToBlob is called for adaption within a document.
+// threshold is a magic number required by AdaptToChar and generated by
+// ComputeAdaptionThresholds.
+// Although it can be partly inferred from the string, segmentation is
+// provided to explicitly clarify the character segmentation.
+void Classify::LearnPieces(const char* fontname, int start, int length,
+                           float threshold, CharSegmentationType segmentation,
+                           const char* correct_text, WERD_RES* word) {
+  // TODO(daria) Remove/modify this if/when we want
+  // to train and/or adapt to n-grams.
+  if (segmentation != CST_WHOLE &&
+      (segmentation != CST_FRAGMENT || disable_character_fragments))
+    return;
+
+  if (length > 1) {
+    SEAM::JoinPieces(word->seam_array, word->chopped_word->blobs, start,
+                     start + length - 1);
+  }
+  TBLOB* blob = word->chopped_word->blobs[start];
+  // Rotate the blob if needed for classification.
+  TBLOB* rotated_blob = blob->ClassifyNormalizeIfNeeded();
+  if (rotated_blob == nullptr)
+    rotated_blob = blob;
+
+  #ifndef GRAPHICS_DISABLED
+  // Draw debug windows showing the blob that is being learned if needed.
+  if (strcmp(classify_learn_debug_str.c_str(), correct_text) == 0) {
+    RefreshDebugWindow(&learn_debug_win_, "LearnPieces", 600,
+                       word->chopped_word->bounding_box());
+    rotated_blob->plot(learn_debug_win_, ScrollView::GREEN, ScrollView::BROWN);
+    learn_debug_win_->Update();
+    learn_debug_win_->Wait();
+  }
+  if (classify_debug_character_fragments && segmentation == CST_FRAGMENT) {
+    ASSERT_HOST(learn_fragments_debug_win_ != nullptr);  // set up in LearnWord
+    blob->plot(learn_fragments_debug_win_,
+               ScrollView::BLUE, ScrollView::BROWN);
+    learn_fragments_debug_win_->Update();
+  }
+  #endif // !GRAPHICS_DISABLED
+
+  if (fontname != nullptr) {
+    classify_norm_method.set_value(character);  // force char norm spc 30/11/93
+    tess_bn_matching.set_value(false);    // turn it off
+    tess_cn_matching.set_value(false);
+    DENORM bl_denorm, cn_denorm;
+    INT_FX_RESULT_STRUCT fx_info;
+    SetupBLCNDenorms(*rotated_blob, classify_nonlinear_norm,
+                     &bl_denorm, &cn_denorm, &fx_info);
+    LearnBlob(fontname, rotated_blob, cn_denorm, fx_info, correct_text);
+  } else if (unicharset.contains_unichar(correct_text)) {
+    UNICHAR_ID class_id = unicharset.unichar_to_id(correct_text);
+    int font_id = word->fontinfo != nullptr
+                ? fontinfo_table_.get_id(*word->fontinfo)
+                : 0;
+    if (classify_learning_debug_level >= 1)
+      tprintf("Adapting to char = %s, thr= %g font_id= %d\n",
+              unicharset.id_to_unichar(class_id), threshold, font_id);
+    // If filename is not nullptr we are doing recognition
+    // (as opposed to training), so we must have already set word fonts.
+    AdaptToChar(rotated_blob, class_id, font_id, threshold, AdaptedTemplates);
+    if (BackupAdaptedTemplates != nullptr) {
+      // Adapt the backup templates too. They will be used if the primary gets
+      // too full.
+      AdaptToChar(rotated_blob, class_id, font_id, threshold,
+                  BackupAdaptedTemplates);
+    }
+  } else if (classify_debug_level >= 1) {
+    tprintf("Can't adapt to %s not in unicharset\n", correct_text);
+  }
+  if (rotated_blob != blob) {
+    delete rotated_blob;
+  }
+
+  SEAM::BreakPieces(word->seam_array, word->chopped_word->blobs, start,
+                    start + length - 1);
+}  // LearnPieces.
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine performs cleanup operations
+ * on the adaptive classifier.  It should be called
+ * before the program is terminated.  Its main function
+ * is to save the adapted templates to a file.
+ *
+ * Globals:
+ * - #AdaptedTemplates current set of adapted templates
+ * - #classify_save_adapted_templates true if templates should be saved
+ * - #classify_enable_adaptive_matcher true if adaptive matcher is enabled
+ */
+void Classify::EndAdaptiveClassifier() {
+  STRING Filename;
+  FILE *File;
+
+  if (AdaptedTemplates != nullptr &&
+      classify_enable_adaptive_matcher && classify_save_adapted_templates) {
+    Filename = imagefile + ADAPT_TEMPLATE_SUFFIX;
+    File = fopen (Filename.c_str(), "wb");
+    if (File == nullptr)
+      tprintf ("Unable to save adapted templates to %s!\n", Filename.c_str());
+    else {
+      tprintf ("\nSaving adapted templates to %s ...", Filename.c_str());
+      fflush(stdout);
+      WriteAdaptedTemplates(File, AdaptedTemplates);
+      tprintf ("\n");
+      fclose(File);
+    }
+  }
+
+  if (AdaptedTemplates != nullptr) {
+    free_adapted_templates(AdaptedTemplates);
+    AdaptedTemplates = nullptr;
+  }
+  if (BackupAdaptedTemplates != nullptr) {
+    free_adapted_templates(BackupAdaptedTemplates);
+    BackupAdaptedTemplates = nullptr;
+  }
+
+  if (PreTrainedTemplates != nullptr) {
+    free_int_templates(PreTrainedTemplates);
+    PreTrainedTemplates = nullptr;
+  }
+  getDict().EndDangerousAmbigs();
+  FreeNormProtos();
+  if (AllProtosOn != nullptr) {
+    FreeBitVector(AllProtosOn);
+    FreeBitVector(AllConfigsOn);
+    FreeBitVector(AllConfigsOff);
+    FreeBitVector(TempProtoMask);
+    AllProtosOn = nullptr;
+    AllConfigsOn = nullptr;
+    AllConfigsOff = nullptr;
+    TempProtoMask = nullptr;
+  }
+  delete shape_table_;
+  shape_table_ = nullptr;
+  delete static_classifier_;
+  static_classifier_ = nullptr;
+}                                /* EndAdaptiveClassifier */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine reads in the training
+ * information needed by the adaptive classifier
+ * and saves it into global variables.
+ *  Parameters:
+ *      load_pre_trained_templates  Indicates whether the pre-trained
+ *                     templates (inttemp, normproto and pffmtable components)
+ *                     should be loaded. Should only be set to true if the
+ *                     necessary classifier components are present in the
+ *                     [lang].traineddata file.
+ *  Globals:
+ *      BuiltInTemplatesFile  file to get built-in temps from
+ *      BuiltInCutoffsFile    file to get avg. feat per class from
+ *      classify_use_pre_adapted_templates
+ *                            enables use of pre-adapted templates
+ */
+void Classify::InitAdaptiveClassifier(TessdataManager* mgr) {
+  if (!classify_enable_adaptive_matcher)
+    return;
+  if (AllProtosOn != nullptr)
+    EndAdaptiveClassifier();  // Don't leak with multiple inits.
+
+  // If there is no language_data_path_prefix, the classifier will be
+  // adaptive only.
+  if (language_data_path_prefix.length() > 0 && mgr != nullptr) {
+    TFile fp;
+    ASSERT_HOST(mgr->GetComponent(TESSDATA_INTTEMP, &fp));
+    PreTrainedTemplates = ReadIntTemplates(&fp);
+
+    if (mgr->GetComponent(TESSDATA_SHAPE_TABLE, &fp)) {
+      shape_table_ = new ShapeTable(unicharset);
+      if (!shape_table_->DeSerialize(&fp)) {
+        tprintf("Error loading shape table!\n");
+        delete shape_table_;
+        shape_table_ = nullptr;
+      }
+    }
+
+    ASSERT_HOST(mgr->GetComponent(TESSDATA_PFFMTABLE, &fp));
+    ReadNewCutoffs(&fp, CharNormCutoffs);
+
+    ASSERT_HOST(mgr->GetComponent(TESSDATA_NORMPROTO, &fp));
+    NormProtos = ReadNormProtos(&fp);
+    static_classifier_ = new TessClassifier(false, this);
+  }
+
+  InitIntegerFX();
+
+  AllProtosOn = NewBitVector(MAX_NUM_PROTOS);
+  AllConfigsOn = NewBitVector(MAX_NUM_CONFIGS);
+  AllConfigsOff = NewBitVector(MAX_NUM_CONFIGS);
+  TempProtoMask = NewBitVector(MAX_NUM_PROTOS);
+  set_all_bits(AllProtosOn, WordsInVectorOfSize(MAX_NUM_PROTOS));
+  set_all_bits(AllConfigsOn, WordsInVectorOfSize(MAX_NUM_CONFIGS));
+  zero_all_bits(AllConfigsOff, WordsInVectorOfSize(MAX_NUM_CONFIGS));
+
+  for (uint16_t& BaselineCutoff : BaselineCutoffs) {
+     BaselineCutoff = 0;
+  }
+
+  if (classify_use_pre_adapted_templates) {
+    TFile fp;
+    STRING Filename;
+
+    Filename = imagefile;
+    Filename += ADAPT_TEMPLATE_SUFFIX;
+    if (!fp.Open(Filename.c_str(), nullptr)) {
+      AdaptedTemplates = NewAdaptedTemplates(true);
+    } else {
+      tprintf("\nReading pre-adapted templates from %s ...\n",
+              Filename.c_str());
+      fflush(stdout);
+      AdaptedTemplates = ReadAdaptedTemplates(&fp);
+      tprintf("\n");
+      PrintAdaptedTemplates(stdout, AdaptedTemplates);
+
+      for (int i = 0; i < AdaptedTemplates->Templates->NumClasses; i++) {
+        BaselineCutoffs[i] = CharNormCutoffs[i];
+      }
+    }
+  } else {
+    if (AdaptedTemplates != nullptr)
+      free_adapted_templates(AdaptedTemplates);
+    AdaptedTemplates = NewAdaptedTemplates(true);
+  }
+}                                /* InitAdaptiveClassifier */
+
+void Classify::ResetAdaptiveClassifierInternal() {
+  if (classify_learning_debug_level > 0) {
+    tprintf("Resetting adaptive classifier (NumAdaptationsFailed=%d)\n",
+            NumAdaptationsFailed);
+  }
+  free_adapted_templates(AdaptedTemplates);
+  AdaptedTemplates = NewAdaptedTemplates(true);
+  if (BackupAdaptedTemplates != nullptr)
+    free_adapted_templates(BackupAdaptedTemplates);
+  BackupAdaptedTemplates = nullptr;
+  NumAdaptationsFailed = 0;
+}
+
+// If there are backup adapted templates, switches to those, otherwise resets
+// the main adaptive classifier (because it is full.)
+void Classify::SwitchAdaptiveClassifier() {
+  if (BackupAdaptedTemplates == nullptr) {
+    ResetAdaptiveClassifierInternal();
+    return;
+  }
+  if (classify_learning_debug_level > 0) {
+    tprintf("Switch to backup adaptive classifier (NumAdaptationsFailed=%d)\n",
+            NumAdaptationsFailed);
+  }
+  free_adapted_templates(AdaptedTemplates);
+  AdaptedTemplates = BackupAdaptedTemplates;
+  BackupAdaptedTemplates = nullptr;
+  NumAdaptationsFailed = 0;
+}
+
+// Resets the backup adaptive classifier to empty.
+void Classify::StartBackupAdaptiveClassifier() {
+  if (BackupAdaptedTemplates != nullptr)
+    free_adapted_templates(BackupAdaptedTemplates);
+  BackupAdaptedTemplates = NewAdaptedTemplates(true);
+}
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine prepares the adaptive
+ * matcher for the start
+ * of the first pass.  Learning is enabled (unless it
+ * is disabled for the whole program).
+ *
+ * @note this is somewhat redundant, it simply says that if learning is
+ * enabled then it will remain enabled on the first pass.  If it is
+ * disabled, then it will remain disabled.  This is only put here to
+ * make it very clear that learning is controlled directly by the global
+ * setting of EnableLearning.
+ *
+ * Globals:
+ * - #EnableLearning
+ * set to true by this routine
+ */
+void Classify::SettupPass1() {
+  EnableLearning = classify_enable_learning;
+
+  getDict().SettupStopperPass1();
+
+}                                /* SettupPass1 */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine prepares the adaptive
+ * matcher for the start of the second pass.  Further
+ * learning is disabled.
+ *
+ * Globals:
+ * - #EnableLearning set to false by this routine
+ */
+void Classify::SettupPass2() {
+  EnableLearning = false;
+  getDict().SettupStopperPass2();
+
+}                                /* SettupPass2 */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine creates a new adapted
+ * class and uses Blob as the model for the first
+ * config in that class.
+ *
+ * @param Blob blob to model new class after
+ * @param ClassId id of the class to be initialized
+ * @param FontinfoId font information inferred from pre-trained templates
+ * @param Class adapted class to be initialized
+ * @param Templates adapted templates to add new class to
+ *
+ * Globals:
+ * - #AllProtosOn dummy mask with all 1's
+ * - BaselineCutoffs kludge needed to get cutoffs
+ * - #PreTrainedTemplates kludge needed to get cutoffs
+ */
+void Classify::InitAdaptedClass(TBLOB *Blob,
+                                CLASS_ID ClassId,
+                                int FontinfoId,
+                                ADAPT_CLASS Class,
+                                ADAPT_TEMPLATES Templates) {
+  FEATURE_SET Features;
+  int Fid, Pid;
+  FEATURE Feature;
+  int NumFeatures;
+  TEMP_PROTO TempProto;
+  PROTO Proto;
+  INT_CLASS IClass;
+  TEMP_CONFIG Config;
+
+  classify_norm_method.set_value(baseline);
+  Features = ExtractOutlineFeatures(Blob);
+  NumFeatures = Features->NumFeatures;
+  if (NumFeatures > UNLIKELY_NUM_FEAT || NumFeatures <= 0) {
+    FreeFeatureSet(Features);
+    return;
+  }
+
+  Config = NewTempConfig(NumFeatures - 1, FontinfoId);
+  TempConfigFor(Class, 0) = Config;
+
+  /* this is a kludge to construct cutoffs for adapted templates */
+  if (Templates == AdaptedTemplates)
+    BaselineCutoffs[ClassId] = CharNormCutoffs[ClassId];
+
+  IClass = ClassForClassId (Templates->Templates, ClassId);
+
+  for (Fid = 0; Fid < Features->NumFeatures; Fid++) {
+    Pid = AddIntProto (IClass);
+    assert (Pid != NO_PROTO);
+
+    Feature = Features->Features[Fid];
+    TempProto = NewTempProto ();
+    Proto = &(TempProto->Proto);
+
+    /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
+       ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
+       instead of the -0.25 to 0.75 used in baseline normalization */
+    Proto->Angle = Feature->Params[OutlineFeatDir];
+    Proto->X = Feature->Params[OutlineFeatX];
+    Proto->Y = Feature->Params[OutlineFeatY] - Y_DIM_OFFSET;
+    Proto->Length = Feature->Params[OutlineFeatLength];
+    FillABC(Proto);
+
+    TempProto->ProtoId = Pid;
+    SET_BIT (Config->Protos, Pid);
+
+    ConvertProto(Proto, Pid, IClass);
+    AddProtoToProtoPruner(Proto, Pid, IClass,
+                          classify_learning_debug_level >= 2);
+
+    Class->TempProtos = push (Class->TempProtos, TempProto);
+  }
+  FreeFeatureSet(Features);
+
+  AddIntConfig(IClass);
+  ConvertConfig (AllProtosOn, 0, IClass);
+
+  if (classify_learning_debug_level >= 1) {
+    tprintf("Added new class '%s' with class id %d and %d protos.\n",
+            unicharset.id_to_unichar(ClassId), ClassId, NumFeatures);
+#ifndef GRAPHICS_DISABLED
+    if (classify_learning_debug_level > 1)
+      DisplayAdaptedChar(Blob, IClass);
+#endif
+  }
+
+  if (IsEmptyAdaptedClass(Class))
+    (Templates->NumNonEmptyClasses)++;
+}                                /* InitAdaptedClass */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine sets up the feature
+ * extractor to extract baseline normalized
+ * pico-features.
+ *
+ * The extracted pico-features are converted
+ * to integer form and placed in IntFeatures. The
+ * original floating-pt. features are returned in
+ * FloatFeatures.
+ *
+ * Globals: none
+ * @param Blob blob to extract features from
+ * @param[out] IntFeatures array to fill with integer features
+ * @param[out] FloatFeatures place to return actual floating-pt features
+ *
+ * @return Number of pico-features returned (0 if
+ * an error occurred)
+ */
+int Classify::GetAdaptiveFeatures(TBLOB *Blob,
+                                  INT_FEATURE_ARRAY IntFeatures,
+                                  FEATURE_SET *FloatFeatures) {
+  FEATURE_SET Features;
+  int NumFeatures;
+
+  classify_norm_method.set_value(baseline);
+  Features = ExtractPicoFeatures(Blob);
+
+  NumFeatures = Features->NumFeatures;
+  if (NumFeatures == 0 || NumFeatures > UNLIKELY_NUM_FEAT) {
+    FreeFeatureSet(Features);
+    return 0;
+  }
+
+  ComputeIntFeatures(Features, IntFeatures);
+  *FloatFeatures = Features;
+
+  return NumFeatures;
+}                                /* GetAdaptiveFeatures */
+
+
+/*-----------------------------------------------------------------------------
+              Private Code
+-----------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------*/
+/**
+ * Return true if the specified word is acceptable for adaptation.
+ *
+ * Globals: none
+ *
+ * @param word current word
+ *
+ * @return true or false
+ */
+bool Classify::AdaptableWord(WERD_RES* word) {
+  if (word->best_choice == nullptr) return false;
+  int BestChoiceLength = word->best_choice->length();
+  float adaptable_score =
+    getDict().segment_penalty_dict_case_ok + ADAPTABLE_WERD_ADJUSTMENT;
+  return   // rules that apply in general - simplest to compute first
+      BestChoiceLength > 0 &&
+      BestChoiceLength == word->rebuild_word->NumBlobs() &&
+      BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE &&
+      // This basically ensures that the word is at least a dictionary match
+      // (freq word, user word, system dawg word, etc).
+      // Since all the other adjustments will make adjust factor higher
+      // than higher than adaptable_score=1.1+0.05=1.15
+      // Since these are other flags that ensure that the word is dict word,
+      // this check could be at times redundant.
+      word->best_choice->adjust_factor() <= adaptable_score &&
+      // Make sure that alternative choices are not dictionary words.
+      word->AlternativeChoiceAdjustmentsWorseThan(adaptable_score);
+}
+
+/*---------------------------------------------------------------------------*/
+/**
+ * @param Blob blob to add to templates for ClassId
+ * @param ClassId class to add blob to
+ * @param FontinfoId font information from pre-trained templates
+ * @param Threshold minimum match rating to existing template
+ * @param adaptive_templates current set of adapted templates
+ *
+ * Globals:
+ * - AllProtosOn dummy mask to match against all protos
+ * - AllConfigsOn dummy mask to match against all configs
+ */
+void Classify::AdaptToChar(TBLOB* Blob, CLASS_ID ClassId, int FontinfoId,
+                           float Threshold,
+                           ADAPT_TEMPLATES adaptive_templates) {
+  int NumFeatures;
+  INT_FEATURE_ARRAY IntFeatures;
+  UnicharRating int_result;
+  INT_CLASS IClass;
+  ADAPT_CLASS Class;
+  TEMP_CONFIG TempConfig;
+  FEATURE_SET FloatFeatures;
+  int NewTempConfigId;
+
+  if (!LegalClassId (ClassId))
+    return;
+
+  int_result.unichar_id = ClassId;
+  Class = adaptive_templates->Class[ClassId];
+  assert(Class != nullptr);
+  if (IsEmptyAdaptedClass(Class)) {
+    InitAdaptedClass(Blob, ClassId, FontinfoId, Class, adaptive_templates);
+  } else {
+    IClass = ClassForClassId(adaptive_templates->Templates, ClassId);
+
+    NumFeatures = GetAdaptiveFeatures(Blob, IntFeatures, &FloatFeatures);
+    if (NumFeatures <= 0) {
+      return;  // Features already freed by GetAdaptiveFeatures.
+    }
+
+    // Only match configs with the matching font.
+    BIT_VECTOR MatchingFontConfigs = NewBitVector(MAX_NUM_PROTOS);
+    for (int cfg = 0; cfg < IClass->NumConfigs; ++cfg) {
+      if (GetFontinfoId(Class, cfg) == FontinfoId) {
+        SET_BIT(MatchingFontConfigs, cfg);
+      } else {
+        reset_bit(MatchingFontConfigs, cfg);
+      }
+    }
+    im_.Match(IClass, AllProtosOn, MatchingFontConfigs,
+              NumFeatures, IntFeatures,
+              &int_result, classify_adapt_feature_threshold,
+              NO_DEBUG, matcher_debug_separate_windows);
+    FreeBitVector(MatchingFontConfigs);
+
+    SetAdaptiveThreshold(Threshold);
+
+    if (1.0f - int_result.rating <= Threshold) {
+      if (ConfigIsPermanent(Class, int_result.config)) {
+        if (classify_learning_debug_level >= 1)
+          tprintf("Found good match to perm config %d = %4.1f%%.\n",
+                  int_result.config, int_result.rating * 100.0);
+        FreeFeatureSet(FloatFeatures);
+        return;
+      }
+
+      TempConfig = TempConfigFor(Class, int_result.config);
+      IncreaseConfidence(TempConfig);
+      if (TempConfig->NumTimesSeen > Class->MaxNumTimesSeen) {
+        Class->MaxNumTimesSeen = TempConfig->NumTimesSeen;
+      }
+      if (classify_learning_debug_level >= 1)
+        tprintf("Increasing reliability of temp config %d to %d.\n",
+                int_result.config, TempConfig->NumTimesSeen);
+
+      if (TempConfigReliable(ClassId, TempConfig)) {
+        MakePermanent(adaptive_templates, ClassId, int_result.config, Blob);
+        UpdateAmbigsGroup(ClassId, Blob);
+      }
+    } else {
+      if (classify_learning_debug_level >= 1) {
+        tprintf("Found poor match to temp config %d = %4.1f%%.\n",
+                int_result.config, int_result.rating * 100.0);
+#ifndef GRAPHICS_DISABLED
+        if (classify_learning_debug_level > 2)
+          DisplayAdaptedChar(Blob, IClass);
+#endif
+      }
+      NewTempConfigId =
+          MakeNewTemporaryConfig(adaptive_templates, ClassId, FontinfoId,
+                                 NumFeatures, IntFeatures, FloatFeatures);
+      if (NewTempConfigId >= 0 &&
+          TempConfigReliable(ClassId, TempConfigFor(Class, NewTempConfigId))) {
+        MakePermanent(adaptive_templates, ClassId, NewTempConfigId, Blob);
+        UpdateAmbigsGroup(ClassId, Blob);
+      }
+
+#ifndef GRAPHICS_DISABLED
+      if (classify_learning_debug_level > 1) {
+        DisplayAdaptedChar(Blob, IClass);
+      }
+#endif
+    }
+    FreeFeatureSet(FloatFeatures);
+  }
+}                                /* AdaptToChar */
+
+#ifndef GRAPHICS_DISABLED
+
+void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
+  INT_FX_RESULT_STRUCT fx_info;
+  std::vector<INT_FEATURE_STRUCT> bl_features;
+  TrainingSample* sample =
+      BlobToTrainingSample(*blob, classify_nonlinear_norm, &fx_info,
+                           &bl_features);
+  if (sample == nullptr) return;
+
+  UnicharRating int_result;
+  im_.Match(int_class, AllProtosOn, AllConfigsOn,
+            bl_features.size(), &bl_features[0],
+            &int_result, classify_adapt_feature_threshold,
+            NO_DEBUG, matcher_debug_separate_windows);
+  tprintf("Best match to temp config %d = %4.1f%%.\n",
+          int_result.config, int_result.rating * 100.0);
+  if (classify_learning_debug_level >= 2) {
+    uint32_t ConfigMask;
+    ConfigMask = 1 << int_result.config;
+    ShowMatchDisplay();
+    im_.Match(int_class, AllProtosOn, static_cast<BIT_VECTOR>(&ConfigMask),
+              bl_features.size(), &bl_features[0],
+              &int_result, classify_adapt_feature_threshold,
+              6 | 0x19, matcher_debug_separate_windows);
+    UpdateMatchDisplay();
+  }
+
+  delete sample;
+}
+
+#endif
+
+/**
+ * This routine adds the result of a classification into
+ * Results.  If the new rating is much worse than the current
+ * best rating, it is not entered into results because it
+ * would end up being stripped later anyway.  If the new rating
+ * is better than the old rating for the class, it replaces the
+ * old rating.  If this is the first rating for the class, the
+ * class is added to the list of matched classes in Results.
+ * If the new rating is better than the best so far, it
+ * becomes the best so far.
+ *
+ * Globals:
+ * - #matcher_bad_match_pad defines limits of an acceptable match
+ *
+ * @param new_result new result to add
+ * @param[out] results results to add new result to
+ */
+void Classify::AddNewResult(const UnicharRating& new_result,
+                            ADAPT_RESULTS *results) {
+  int old_match = FindScoredUnichar(new_result.unichar_id, *results);
+
+  if (new_result.rating + matcher_bad_match_pad < results->best_rating ||
+      (old_match < results->match.size() &&
+       new_result.rating <= results->match[old_match].rating))
+    return;  // New one not good enough.
+
+  if (!unicharset.get_fragment(new_result.unichar_id))
+    results->HasNonfragment = true;
+
+  if (old_match < results->match.size()) {
+    results->match[old_match].rating = new_result.rating;
+  } else {
+    results->match.push_back(new_result);
+  }
+
+  if (new_result.rating > results->best_rating &&
+      // Ensure that fragments do not affect best rating, class and config.
+      // This is needed so that at least one non-fragmented character is
+      // always present in the results.
+      // TODO(daria): verify that this helps accuracy and does not
+      // hurt performance.
+      !unicharset.get_fragment(new_result.unichar_id)) {
+    results->best_match_index = old_match;
+    results->best_rating = new_result.rating;
+    results->best_unichar_id = new_result.unichar_id;
+  }
+}                                /* AddNewResult */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine is identical to CharNormClassifier()
+ * except that it does no class pruning.  It simply matches
+ * the unknown blob against the classes listed in
+ * Ambiguities.
+ *
+ * Globals:
+ * - #AllProtosOn mask that enables all protos
+ * - #AllConfigsOn mask that enables all configs
+ *
+ * @param blob blob to be classified
+ * @param templates built-in templates to classify against
+ * @param classes adapted class templates
+ * @param ambiguities array of unichar id's to match against
+ * @param[out] results place to put match results
+ * @param int_features
+ * @param fx_info
+ */
+void Classify::AmbigClassifier(
+    const std::vector<INT_FEATURE_STRUCT>& int_features,
+    const INT_FX_RESULT_STRUCT& fx_info,
+    const TBLOB *blob,
+    INT_TEMPLATES templates,
+    ADAPT_CLASS *classes,
+    UNICHAR_ID *ambiguities,
+    ADAPT_RESULTS *results) {
+  if (int_features.empty()) return;
+  auto* CharNormArray = new uint8_t[unicharset.size()];
+  UnicharRating int_result;
+
+  results->BlobLength = GetCharNormFeature(fx_info, templates, nullptr,
+                                           CharNormArray);
+  bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
+  if (debug)
+    tprintf("AM Matches =  ");
+
+  int top = blob->bounding_box().top();
+  int bottom = blob->bounding_box().bottom();
+  while (*ambiguities >= 0) {
+    CLASS_ID class_id = *ambiguities;
+
+    int_result.unichar_id = class_id;
+    im_.Match(ClassForClassId(templates, class_id),
+              AllProtosOn, AllConfigsOn,
+              int_features.size(), &int_features[0],
+              &int_result,
+              classify_adapt_feature_threshold, NO_DEBUG,
+              matcher_debug_separate_windows);
+
+    ExpandShapesAndApplyCorrections(nullptr, debug, class_id, bottom, top, 0,
+                                    results->BlobLength,
+                                    classify_integer_matcher_multiplier,
+                                    CharNormArray, &int_result, results);
+    ambiguities++;
+  }
+  delete [] CharNormArray;
+}                                /* AmbigClassifier */
+
+/*---------------------------------------------------------------------------*/
+/// Factored-out calls to IntegerMatcher based on class pruner results.
+/// Returns integer matcher results inside CLASS_PRUNER_RESULTS structure.
+void Classify::MasterMatcher(INT_TEMPLATES templates,
+                             int16_t num_features,
+                             const INT_FEATURE_STRUCT* features,
+                             const uint8_t* norm_factors,
+                             ADAPT_CLASS* classes,
+                             int debug,
+                             int matcher_multiplier,
+                             const TBOX& blob_box,
+                             const std::vector<CP_RESULT_STRUCT>& results,
+                             ADAPT_RESULTS* final_results) {
+  int top = blob_box.top();
+  int bottom = blob_box.bottom();
+  UnicharRating int_result;
+  for (int c = 0; c < results.size(); c++) {
+    CLASS_ID class_id = results[c].Class;
+    BIT_VECTOR protos = classes != nullptr ? classes[class_id]->PermProtos
+                                        : AllProtosOn;
+    BIT_VECTOR configs = classes != nullptr ? classes[class_id]->PermConfigs
+                                         : AllConfigsOn;
+
+    int_result.unichar_id = class_id;
+    im_.Match(ClassForClassId(templates, class_id),
+              protos, configs,
+              num_features, features,
+              &int_result, classify_adapt_feature_threshold, debug,
+              matcher_debug_separate_windows);
+    bool is_debug = matcher_debug_level >= 2 || classify_debug_level > 1;
+    ExpandShapesAndApplyCorrections(classes, is_debug, class_id, bottom, top,
+                                    results[c].Rating,
+                                    final_results->BlobLength,
+                                    matcher_multiplier, norm_factors,
+                                    &int_result, final_results);
+  }
+}
+
+// Converts configs to fonts, and if the result is not adapted, and a
+// shape_table_ is present, the shape is expanded to include all
+// unichar_ids represented, before applying a set of corrections to the
+// distance rating in int_result, (see ComputeCorrectedRating.)
+// The results are added to the final_results output.
+void Classify::ExpandShapesAndApplyCorrections(
+    ADAPT_CLASS* classes, bool debug, int class_id, int bottom, int top,
+    float cp_rating, int blob_length, int matcher_multiplier,
+    const uint8_t* cn_factors,
+    UnicharRating* int_result, ADAPT_RESULTS* final_results) {
+  if (classes != nullptr) {
+    // Adapted result. Convert configs to fontinfo_ids.
+    int_result->adapted = true;
+    for (int f = 0; f < int_result->fonts.size(); ++f) {
+      int_result->fonts[f].fontinfo_id =
+          GetFontinfoId(classes[class_id], int_result->fonts[f].fontinfo_id);
+    }
+  } else {
+    // Pre-trained result. Map fonts using font_sets_.
+    int_result->adapted = false;
+    for (int f = 0; f < int_result->fonts.size(); ++f) {
+      int_result->fonts[f].fontinfo_id =
+          ClassAndConfigIDToFontOrShapeID(class_id,
+                                          int_result->fonts[f].fontinfo_id);
+    }
+    if (shape_table_ != nullptr) {
+      // Two possible cases:
+      // 1. Flat shapetable. All unichar-ids of the shapes referenced by
+      // int_result->fonts are the same. In this case build a new vector of
+      // mapped fonts and replace the fonts in int_result.
+      // 2. Multi-unichar shapetable. Variable unichars in the shapes referenced
+      // by int_result. In this case, build a vector of UnicharRating to
+      // gather together different font-ids for each unichar. Also covers case1.
+      GenericVector<UnicharRating> mapped_results;
+      for (int f = 0; f < int_result->fonts.size(); ++f) {
+        int shape_id = int_result->fonts[f].fontinfo_id;
+        const Shape& shape = shape_table_->GetShape(shape_id);
+        for (int c = 0; c < shape.size(); ++c) {
+          int unichar_id = shape[c].unichar_id;
+          if (!unicharset.get_enabled(unichar_id)) continue;
+          // Find the mapped_result for unichar_id.
+          int r = 0;
+          for (r = 0; r < mapped_results.size() &&
+               mapped_results[r].unichar_id != unichar_id; ++r) {}
+          if (r == mapped_results.size()) {
+            mapped_results.push_back(*int_result);
+            mapped_results[r].unichar_id = unichar_id;
+            mapped_results[r].fonts.clear();
+          }
+          for (int i = 0; i < shape[c].font_ids.size(); ++i) {
+            mapped_results[r].fonts.push_back(
+                ScoredFont(shape[c].font_ids[i], int_result->fonts[f].score));
+          }
+        }
+      }
+      for (int m = 0; m < mapped_results.size(); ++m) {
+        mapped_results[m].rating =
+            ComputeCorrectedRating(debug, mapped_results[m].unichar_id,
+                                   cp_rating, int_result->rating,
+                                   int_result->feature_misses, bottom, top,
+                                   blob_length, matcher_multiplier, cn_factors);
+        AddNewResult(mapped_results[m], final_results);
+      }
+      return;
+    }
+  }
+  if (unicharset.get_enabled(class_id)) {
+    int_result->rating = ComputeCorrectedRating(debug, class_id, cp_rating,
+                                                int_result->rating,
+                                                int_result->feature_misses,
+                                                bottom, top, blob_length,
+                                                matcher_multiplier, cn_factors);
+    AddNewResult(*int_result, final_results);
+  }
+}
+
+// Applies a set of corrections to the confidence im_rating,
+// including the cn_correction, miss penalty and additional penalty
+// for non-alnums being vertical misfits. Returns the corrected confidence.
+double Classify::ComputeCorrectedRating(bool debug, int unichar_id,
+                                        double cp_rating, double im_rating,
+                                        int feature_misses,
+                                        int bottom, int top,
+                                        int blob_length, int matcher_multiplier,
+                                        const uint8_t* cn_factors) {
+  // Compute class feature corrections.
+  double cn_corrected = im_.ApplyCNCorrection(1.0 - im_rating, blob_length,
+                                              cn_factors[unichar_id],
+                                              matcher_multiplier);
+  double miss_penalty = tessedit_class_miss_scale * feature_misses;
+  double vertical_penalty = 0.0;
+  // Penalize non-alnums for being vertical misfits.
+  if (!unicharset.get_isalpha(unichar_id) &&
+      !unicharset.get_isdigit(unichar_id) &&
+      cn_factors[unichar_id] != 0 && classify_misfit_junk_penalty > 0.0) {
+    int min_bottom, max_bottom, min_top, max_top;
+    unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom,
+                              &min_top, &max_top);
+    if (debug) {
+      tprintf("top=%d, vs [%d, %d], bottom=%d, vs [%d, %d]\n",
+              top, min_top, max_top, bottom, min_bottom, max_bottom);
+    }
+    if (top < min_top || top > max_top ||
+        bottom < min_bottom || bottom > max_bottom) {
+      vertical_penalty = classify_misfit_junk_penalty;
+    }
+  }
+  double result = 1.0 - (cn_corrected + miss_penalty + vertical_penalty);
+  if (result < WORST_POSSIBLE_RATING)
+    result = WORST_POSSIBLE_RATING;
+  if (debug) {
+    tprintf("%s: %2.1f%%(CP%2.1f, IM%2.1f + CN%.2f(%d) + MP%2.1f + VP%2.1f)\n",
+            unicharset.id_to_unichar(unichar_id),
+            result * 100.0,
+            cp_rating * 100.0,
+            (1.0 - im_rating) * 100.0,
+            (cn_corrected - (1.0 - im_rating)) * 100.0,
+            cn_factors[unichar_id],
+            miss_penalty * 100.0,
+            vertical_penalty * 100.0);
+  }
+  return result;
+}
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine extracts baseline normalized features
+ * from the unknown character and matches them against the
+ * specified set of templates.  The classes which match
+ * are added to Results.
+ *
+ * Globals:
+ * - BaselineCutoffs expected num features for each class
+ *
+ * @param Blob blob to be classified
+ * @param Templates current set of adapted templates
+ * @param Results place to put match results
+ * @param int_features
+ * @param fx_info
+ *
+ * @return Array of possible ambiguous chars that should be checked.
+ */
+UNICHAR_ID *Classify::BaselineClassifier(
+    TBLOB *Blob, const std::vector<INT_FEATURE_STRUCT>& int_features,
+    const INT_FX_RESULT_STRUCT& fx_info,
+    ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) {
+  if (int_features.empty()) return nullptr;
+  auto* CharNormArray = new uint8_t[unicharset.size()];
+  ClearCharNormArray(CharNormArray);
+
+  Results->BlobLength = IntCastRounded(fx_info.Length / kStandardFeatureLength);
+  PruneClasses(Templates->Templates, int_features.size(), -1, &int_features[0],
+               CharNormArray, BaselineCutoffs, &Results->CPResults);
+
+  if (matcher_debug_level >= 2 || classify_debug_level > 1)
+    tprintf("BL Matches =  ");
+
+  MasterMatcher(Templates->Templates, int_features.size(), &int_features[0],
+                CharNormArray,
+                Templates->Class, matcher_debug_flags, 0,
+                Blob->bounding_box(), Results->CPResults, Results);
+
+  delete [] CharNormArray;
+  CLASS_ID ClassId = Results->best_unichar_id;
+  if (ClassId == INVALID_UNICHAR_ID || Results->best_match_index < 0)
+    return nullptr;
+
+  return Templates->Class[ClassId]->
+      Config[Results->match[Results->best_match_index].config].Perm->Ambigs;
+}                                /* BaselineClassifier */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine extracts character normalized features
+ * from the unknown character and matches them against the
+ * specified set of templates.  The classes which match
+ * are added to Results.
+ *
+ * @param blob blob to be classified
+ * @param sample templates to classify unknown against
+ * @param adapt_results place to put match results
+ *
+ * Globals:
+ * - CharNormCutoffs expected num features for each class
+ * - AllProtosOn mask that enables all protos
+ * - AllConfigsOn mask that enables all configs
+ */
+int Classify::CharNormClassifier(TBLOB *blob,
+                                 const TrainingSample& sample,
+                                 ADAPT_RESULTS *adapt_results) {
+  // This is the length that is used for scaling ratings vs certainty.
+  adapt_results->BlobLength =
+      IntCastRounded(sample.outline_length() / kStandardFeatureLength);
+  std::vector<UnicharRating> unichar_results;
+  static_classifier_->UnicharClassifySample(sample, blob->denorm().pix(), 0,
+                                            -1, &unichar_results);
+  // Convert results to the format used internally by AdaptiveClassifier.
+  for (int r = 0; r < unichar_results.size(); ++r) {
+    AddNewResult(unichar_results[r], adapt_results);
+  }
+  return sample.num_features();
+}                                /* CharNormClassifier */
+
+// As CharNormClassifier, but operates on a TrainingSample and outputs to
+// a GenericVector of ShapeRating without conversion to classes.
+int Classify::CharNormTrainingSample(bool pruner_only,
+                                     int keep_this,
+                                     const TrainingSample& sample,
+                                     std::vector<UnicharRating>* results) {
+  results->clear();
+  auto* adapt_results = new ADAPT_RESULTS();
+  adapt_results->Initialize();
+  // Compute the bounding box of the features.
+  uint32_t num_features = sample.num_features();
+  // Only the top and bottom of the blob_box are used by MasterMatcher, so
+  // fabricate right and left using top and bottom.
+  TBOX blob_box(sample.geo_feature(GeoBottom), sample.geo_feature(GeoBottom),
+                sample.geo_feature(GeoTop), sample.geo_feature(GeoTop));
+  // Compute the char_norm_array from the saved cn_feature.
+  FEATURE norm_feature = sample.GetCNFeature();
+  auto* char_norm_array = new uint8_t[unicharset.size()];
+  int num_pruner_classes = std::max(unicharset.size(),
+                               PreTrainedTemplates->NumClasses);
+  auto* pruner_norm_array = new uint8_t[num_pruner_classes];
+  adapt_results->BlobLength =
+      static_cast<int>(ActualOutlineLength(norm_feature) * 20 + 0.5);
+  ComputeCharNormArrays(norm_feature, PreTrainedTemplates, char_norm_array,
+                        pruner_norm_array);
+
+  PruneClasses(PreTrainedTemplates, num_features, keep_this, sample.features(),
+               pruner_norm_array,
+               shape_table_ != nullptr ? &shapetable_cutoffs_[0] : CharNormCutoffs,
+               &adapt_results->CPResults);
+  delete [] pruner_norm_array;
+  if (keep_this >= 0) {
+    adapt_results->CPResults[0].Class = keep_this;
+    adapt_results->CPResults.resize(1);
+  }
+  if (pruner_only) {
+    // Convert pruner results to output format.
+    for (int i = 0; i < adapt_results->CPResults.size(); ++i) {
+      int class_id = adapt_results->CPResults[i].Class;
+      results->push_back(
+          UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating));
+    }
+  } else {
+    MasterMatcher(PreTrainedTemplates, num_features, sample.features(),
+                  char_norm_array,
+                  nullptr, matcher_debug_flags,
+                  classify_integer_matcher_multiplier,
+                  blob_box, adapt_results->CPResults, adapt_results);
+    // Convert master matcher results to output format.
+    for (int i = 0; i < adapt_results->match.size(); i++) {
+      results->push_back(adapt_results->match[i]);
+    }
+    if (results->size() > 1) {
+      std::sort(results->begin(), results->end(), SortDescendingRating);
+    }
+  }
+  delete [] char_norm_array;
+  delete adapt_results;
+  return num_features;
+}                                /* CharNormTrainingSample */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine computes a rating which reflects the
+ * likelihood that the blob being classified is a noise
+ * blob.  NOTE: assumes that the blob length has already been
+ * computed and placed into Results.
+ *
+ * @param results results to add noise classification to
+ *
+ * Globals:
+ * - matcher_avg_noise_size avg. length of a noise blob
+ */
+void Classify::ClassifyAsNoise(ADAPT_RESULTS *results) {
+  float rating = results->BlobLength / matcher_avg_noise_size;
+  rating *= rating;
+  rating /= 1.0 + rating;
+
+  AddNewResult(UnicharRating(UNICHAR_SPACE, 1.0f - rating), results);
+}                                /* ClassifyAsNoise */
+
+/// The function converts the given match ratings to the list of blob
+/// choices with ratings and certainties (used by the context checkers).
+/// If character fragments are present in the results, this function also makes
+/// sure that there is at least one non-fragmented classification included.
+/// For each classification result check the unicharset for "definite"
+/// ambiguities and modify the resulting Choices accordingly.
+void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
+                                       ADAPT_RESULTS *Results,
+                                       BLOB_CHOICE_LIST *Choices) {
+  assert(Choices != nullptr);
+  float Rating;
+  float Certainty;
+  BLOB_CHOICE_IT temp_it;
+  bool contains_nonfrag = false;
+  temp_it.set_to_list(Choices);
+  int choices_length = 0;
+  // With no shape_table_ maintain the previous MAX_MATCHES as the maximum
+  // number of returned results, but with a shape_table_ we want to have room
+  // for at least the biggest shape (which might contain hundreds of Indic
+  // grapheme fragments) and more, so use double the size of the biggest shape
+  // if that is more than the default.
+  int max_matches = MAX_MATCHES;
+  if (shape_table_ != nullptr) {
+    max_matches = shape_table_->MaxNumUnichars() * 2;
+    if (max_matches < MAX_MATCHES)
+      max_matches = MAX_MATCHES;
+  }
+
+  float best_certainty = -FLT_MAX;
+  for (int i = 0; i < Results->match.size(); i++) {
+    const UnicharRating& result = Results->match[i];
+    bool adapted = result.adapted;
+    bool current_is_frag = (unicharset.get_fragment(result.unichar_id) != nullptr);
+    if (temp_it.length()+1 == max_matches &&
+        !contains_nonfrag && current_is_frag) {
+      continue;  // look for a non-fragmented character to fill the
+                 // last spot in Choices if only fragments are present
+    }
+    // BlobLength can never be legally 0, this means recognition failed.
+    // But we must return a classification result because some invoking
+    // functions (chopper/permuter) do not anticipate a null blob choice.
+    // So we need to assign a poor, but not infinitely bad score.
+    if (Results->BlobLength == 0) {
+      Certainty = -20;
+      Rating = 100;    // should be -certainty * real_blob_length
+    } else {
+      Rating = Certainty = (1.0f - result.rating);
+      Rating *= rating_scale * Results->BlobLength;
+      Certainty *= -(getDict().certainty_scale);
+    }
+    // Adapted results, by their very nature, should have good certainty.
+    // Those that don't are at best misleading, and often lead to errors,
+    // so don't accept adapted results that are too far behind the best result,
+    // whether adapted or static.
+    // TODO(rays) find some way of automatically tuning these constants.
+    if (Certainty > best_certainty) {
+      best_certainty = std::min(Certainty, static_cast<float>(classify_adapted_pruning_threshold));
+    } else if (adapted &&
+               Certainty / classify_adapted_pruning_factor < best_certainty) {
+      continue;  // Don't accept bad adapted results.
+    }
+
+    float min_xheight, max_xheight, yshift;
+    denorm.XHeightRange(result.unichar_id, unicharset, box,
+                        &min_xheight, &max_xheight, &yshift);
+    auto* choice =
+        new BLOB_CHOICE(result.unichar_id, Rating, Certainty,
+                        unicharset.get_script(result.unichar_id),
+                        min_xheight, max_xheight, yshift,
+                        adapted ? BCC_ADAPTED_CLASSIFIER
+                                : BCC_STATIC_CLASSIFIER);
+    choice->set_fonts(result.fonts);
+    temp_it.add_to_end(choice);
+    contains_nonfrag |= !current_is_frag;  // update contains_nonfrag
+    choices_length++;
+    if (choices_length >= max_matches) break;
+  }
+  Results->match.resize(choices_length);
+}  // ConvertMatchesToChoices
+
+
+/*---------------------------------------------------------------------------*/
+#ifndef GRAPHICS_DISABLED
+/**
+ *
+ * @param blob blob whose classification is being debugged
+ * @param Results results of match being debugged
+ *
+ * Globals: none
+ */
+void Classify::DebugAdaptiveClassifier(TBLOB *blob,
+                                       ADAPT_RESULTS *Results) {
+  if (static_classifier_ == nullptr) return;
+  INT_FX_RESULT_STRUCT fx_info;
+  std::vector<INT_FEATURE_STRUCT> bl_features;
+  TrainingSample* sample =
+      BlobToTrainingSample(*blob, false, &fx_info, &bl_features);
+  if (sample == nullptr) return;
+  static_classifier_->DebugDisplay(*sample, blob->denorm().pix(),
+                                   Results->best_unichar_id);
+}                                /* DebugAdaptiveClassifier */
+#endif
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine performs an adaptive classification.
+ * If we have not yet adapted to enough classes, a simple
+ * classification to the pre-trained templates is performed.
+ * Otherwise, we match the blob against the adapted templates.
+ * If the adapted templates do not match well, we try a
+ * match against the pre-trained templates.  If an adapted
+ * template match is found, we do a match to any pre-trained
+ * templates which could be ambiguous.  The results from all
+ * of these classifications are merged together into Results.
+ *
+ * @param Blob blob to be classified
+ * @param Results place to put match results
+ *
+ * Globals:
+ * - PreTrainedTemplates built-in training templates
+ * - AdaptedTemplates templates adapted for this page
+ * - matcher_reliable_adaptive_result rating limit for a great match
+ */
+void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) {
+  UNICHAR_ID *Ambiguities;
+
+  INT_FX_RESULT_STRUCT fx_info;
+  std::vector<INT_FEATURE_STRUCT> bl_features;
+  TrainingSample* sample =
+      BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info,
+                           &bl_features);
+  if (sample == nullptr) return;
+
+  // TODO: With LSTM, static_classifier_ is nullptr.
+  // Return to avoid crash in CharNormClassifier.
+  if (static_classifier_ == nullptr) {
+    delete sample;
+    return;
+  }
+
+  if (AdaptedTemplates->NumPermClasses < matcher_permanent_classes_min ||
+      tess_cn_matching) {
+    CharNormClassifier(Blob, *sample, Results);
+  } else {
+    Ambiguities = BaselineClassifier(Blob, bl_features, fx_info,
+                                     AdaptedTemplates, Results);
+    if ((!Results->match.empty() &&
+         MarginalMatch(Results->best_rating,
+                       matcher_reliable_adaptive_result) &&
+         !tess_bn_matching) ||
+        Results->match.empty()) {
+      CharNormClassifier(Blob, *sample, Results);
+    } else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
+      AmbigClassifier(bl_features, fx_info, Blob,
+                      PreTrainedTemplates,
+                      AdaptedTemplates->Class,
+                      Ambiguities,
+                      Results);
+    }
+  }
+
+  // Force the blob to be classified as noise
+  // if the results contain only fragments.
+  // TODO(daria): verify that this is better than
+  // just adding a nullptr classification.
+  if (!Results->HasNonfragment || Results->match.empty())
+    ClassifyAsNoise(Results);
+  delete sample;
+}   /* DoAdaptiveMatch */
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine matches blob to the built-in templates
+ * to find out if there are any classes other than the correct
+ * class which are potential ambiguities.
+ *
+ * @param Blob blob to get classification ambiguities for
+ * @param CorrectClass correct class for Blob
+ *
+ * Globals:
+ * - CurrentRatings used by qsort compare routine
+ * - PreTrainedTemplates built-in templates
+ *
+ * @return String containing all possible ambiguous classes.
+ */
+UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob,
+                                     CLASS_ID CorrectClass) {
+  auto *Results = new ADAPT_RESULTS();
+  UNICHAR_ID *Ambiguities;
+  int i;
+
+  Results->Initialize();
+  INT_FX_RESULT_STRUCT fx_info;
+  std::vector<INT_FEATURE_STRUCT> bl_features;
+  TrainingSample* sample =
+      BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info,
+                           &bl_features);
+  if (sample == nullptr) {
+    delete Results;
+    return nullptr;
+  }
+
+  CharNormClassifier(Blob, *sample, Results);
+  delete sample;
+  RemoveBadMatches(Results);
+  std::sort(Results->match.begin(), Results->match.end(), SortDescendingRating);
+
+  /* copy the class id's into an string of ambiguities - don't copy if
+     the correct class is the only class id matched */
+  Ambiguities = new UNICHAR_ID[Results->match.size() + 1];
+  if (Results->match.size() > 1 ||
+      (Results->match.size() == 1 &&
+          Results->match[0].unichar_id != CorrectClass)) {
+    for (i = 0; i < Results->match.size(); i++)
+      Ambiguities[i] = Results->match[i].unichar_id;
+    Ambiguities[i] = -1;
+  } else {
+    Ambiguities[0] = -1;
+  }
+
+  delete Results;
+  return Ambiguities;
+}                              /* GetAmbiguities */
+
+// Returns true if the given blob looks too dissimilar to any character
+// present in the classifier templates.
+bool Classify::LooksLikeGarbage(TBLOB *blob) {
+  auto *ratings = new BLOB_CHOICE_LIST();
+  AdaptiveClassifier(blob, ratings);
+  BLOB_CHOICE_IT ratings_it(ratings);
+  const UNICHARSET &unicharset = getDict().getUnicharset();
+  if (classify_debug_character_fragments) {
+    print_ratings_list("======================\nLooksLikeGarbage() got ",
+                       ratings, unicharset);
+  }
+  for (ratings_it.mark_cycle_pt(); !ratings_it.cycled_list();
+       ratings_it.forward()) {
+    if (unicharset.get_fragment(ratings_it.data()->unichar_id()) != nullptr) {
+      continue;
+    }
+    float certainty = ratings_it.data()->certainty();
+    delete ratings;
+    return certainty <
+            classify_character_fragments_garbage_certainty_threshold;
+  }
+  delete ratings;
+  return true;  // no whole characters in ratings
+}
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine calls the integer (Hardware) feature
+ * extractor if it has not been called before for this blob.
+ *
+ * The results from the feature extractor are placed into
+ * globals so that they can be used in other routines without
+ * re-extracting the features.
+ *
+ * It then copies the char norm features into the IntFeatures
+ * array provided by the caller.
+ *
+ * @param templates used to compute char norm adjustments
+ * @param pruner_norm_array Array of factors from blob normalization
+ *        process
+ * @param char_norm_array array to fill with dummy char norm adjustments
+ * @param fx_info
+ *
+ * Globals:
+ *
+ * @return Number of features extracted or 0 if an error occurred.
+ */
+int Classify::GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info,
+                                 INT_TEMPLATES templates,
+                                 uint8_t* pruner_norm_array,
+                                 uint8_t* char_norm_array) {
+  FEATURE norm_feature = NewFeature(&CharNormDesc);
+  float baseline = kBlnBaselineOffset;
+  float scale = MF_SCALE_FACTOR;
+  norm_feature->Params[CharNormY] = (fx_info.Ymean - baseline) * scale;
+  norm_feature->Params[CharNormLength] =
+      fx_info.Length * scale / LENGTH_COMPRESSION;
+  norm_feature->Params[CharNormRx] = fx_info.Rx * scale;
+  norm_feature->Params[CharNormRy] = fx_info.Ry * scale;
+  // Deletes norm_feature.
+  ComputeCharNormArrays(norm_feature, templates, char_norm_array,
+                        pruner_norm_array);
+  return IntCastRounded(fx_info.Length / kStandardFeatureLength);
+}                              /* GetCharNormFeature */
+
+// Computes the char_norm_array for the unicharset and, if not nullptr, the
+// pruner_array as appropriate according to the existence of the shape_table.
+void Classify::ComputeCharNormArrays(FEATURE_STRUCT* norm_feature,
+                                     INT_TEMPLATES_STRUCT* templates,
+                                     uint8_t* char_norm_array,
+                                     uint8_t* pruner_array) {
+  ComputeIntCharNormArray(*norm_feature, char_norm_array);
+  if (pruner_array != nullptr) {
+    if (shape_table_ == nullptr) {
+      ComputeIntCharNormArray(*norm_feature, pruner_array);
+    } else {
+      memset(pruner_array, UINT8_MAX,
+             templates->NumClasses * sizeof(pruner_array[0]));
+      // Each entry in the pruner norm array is the MIN of all the entries of
+      // the corresponding unichars in the CharNormArray.
+      for (int id = 0; id < templates->NumClasses; ++id) {
+        int font_set_id = templates->Class[id]->font_set_id;
+        const FontSet &fs = fontset_table_.get(font_set_id);
+        for (int config = 0; config < fs.size; ++config) {
+          const Shape& shape = shape_table_->GetShape(fs.configs[config]);
+          for (int c = 0; c < shape.size(); ++c) {
+            if (char_norm_array[shape[c].unichar_id] < pruner_array[id])
+              pruner_array[id] = char_norm_array[shape[c].unichar_id];
+          }
+        }
+      }
+    }
+  }
+  FreeFeature(norm_feature);
+}
+
+/*---------------------------------------------------------------------------*/
+/**
+ *
+ * @param Templates adapted templates to add new config to
+ * @param ClassId class id to associate with new config
+ * @param FontinfoId font information inferred from pre-trained templates
+ * @param NumFeatures number of features in IntFeatures
+ * @param Features features describing model for new config
+ * @param FloatFeatures floating-pt representation of features
+ *
+ * @return The id of the new config created, a negative integer in
+ * case of error.
+ */
+int Classify::MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates,
+                           CLASS_ID ClassId,
+                           int FontinfoId,
+                           int NumFeatures,
+                           INT_FEATURE_ARRAY Features,
+                           FEATURE_SET FloatFeatures) {
+  INT_CLASS IClass;
+  ADAPT_CLASS Class;
+  PROTO_ID OldProtos[MAX_NUM_PROTOS];
+  FEATURE_ID BadFeatures[MAX_NUM_INT_FEATURES];
+  int NumOldProtos;
+  int NumBadFeatures;
+  int MaxProtoId, OldMaxProtoId;
+  int MaskSize;
+  int ConfigId;
+  TEMP_CONFIG Config;
+  int i;
+  int debug_level = NO_DEBUG;
+
+  if (classify_learning_debug_level >= 3)
+    debug_level =
+        PRINT_MATCH_SUMMARY | PRINT_FEATURE_MATCHES | PRINT_PROTO_MATCHES;
+
+  IClass = ClassForClassId(Templates->Templates, ClassId);
+  Class = Templates->Class[ClassId];
+
+  if (IClass->NumConfigs >= MAX_NUM_CONFIGS) {
+    ++NumAdaptationsFailed;
+    if (classify_learning_debug_level >= 1)
+      tprintf("Cannot make new temporary config: maximum number exceeded.\n");
+    return -1;
+  }
+
+  OldMaxProtoId = IClass->NumProtos - 1;
+
+  NumOldProtos = im_.FindGoodProtos(IClass, AllProtosOn, AllConfigsOff,
+                                    NumFeatures, Features,
+                                    OldProtos, classify_adapt_proto_threshold,
+                                    debug_level);
+
+  MaskSize = WordsInVectorOfSize(MAX_NUM_PROTOS);
+  zero_all_bits(TempProtoMask, MaskSize);
+  for (i = 0; i < NumOldProtos; i++)
+    SET_BIT(TempProtoMask, OldProtos[i]);
+
+  NumBadFeatures = im_.FindBadFeatures(IClass, TempProtoMask, AllConfigsOn,
+                                       NumFeatures, Features,
+                                       BadFeatures,
+                                       classify_adapt_feature_threshold,
+                                       debug_level);
+
+  MaxProtoId = MakeNewTempProtos(FloatFeatures, NumBadFeatures, BadFeatures,
+                                 IClass, Class, TempProtoMask);
+  if (MaxProtoId == NO_PROTO) {
+    ++NumAdaptationsFailed;
+    if (classify_learning_debug_level >= 1)
+      tprintf("Cannot make new temp protos: maximum number exceeded.\n");
+    return -1;
+  }
+
+  ConfigId = AddIntConfig(IClass);
+  ConvertConfig(TempProtoMask, ConfigId, IClass);
+  Config = NewTempConfig(MaxProtoId, FontinfoId);
+  TempConfigFor(Class, ConfigId) = Config;
+  copy_all_bits(TempProtoMask, Config->Protos, Config->ProtoVectorSize);
+
+  if (classify_learning_debug_level >= 1)
+    tprintf("Making new temp config %d fontinfo id %d"
+            " using %d old and %d new protos.\n",
+            ConfigId, Config->FontinfoId,
+            NumOldProtos, MaxProtoId - OldMaxProtoId);
+
+  return ConfigId;
+}                              /* MakeNewTemporaryConfig */
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine finds sets of sequential bad features
+ * that all have the same angle and converts each set into
+ * a new temporary proto.  The temp proto is added to the
+ * proto pruner for IClass, pushed onto the list of temp
+ * protos in Class, and added to TempProtoMask.
+ *
+ * @param Features floating-pt features describing new character
+ * @param NumBadFeat number of bad features to turn into protos
+ * @param BadFeat feature id's of bad features
+ * @param IClass integer class templates to add new protos to
+ * @param Class adapted class templates to add new protos to
+ * @param TempProtoMask proto mask to add new protos to
+ *
+ * Globals: none
+ *
+ * @return Max proto id in class after all protos have been added.
+ */
+PROTO_ID Classify::MakeNewTempProtos(FEATURE_SET Features,
+                                     int NumBadFeat,
+                                     FEATURE_ID BadFeat[],
+                                     INT_CLASS IClass,
+                                     ADAPT_CLASS Class,
+                                     BIT_VECTOR TempProtoMask) {
+  FEATURE_ID *ProtoStart;
+  FEATURE_ID *ProtoEnd;
+  FEATURE_ID *LastBad;
+  TEMP_PROTO TempProto;
+  PROTO Proto;
+  FEATURE F1, F2;
+  float X1, X2, Y1, Y2;
+  float A1, A2, AngleDelta;
+  float SegmentLength;
+  PROTO_ID Pid;
+
+  for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat;
+       ProtoStart < LastBad; ProtoStart = ProtoEnd) {
+    F1 = Features->Features[*ProtoStart];
+    X1 = F1->Params[PicoFeatX];
+    Y1 = F1->Params[PicoFeatY];
+    A1 = F1->Params[PicoFeatDir];
+
+    for (ProtoEnd = ProtoStart + 1,
+         SegmentLength = GetPicoFeatureLength();
+         ProtoEnd < LastBad;
+         ProtoEnd++, SegmentLength += GetPicoFeatureLength()) {
+      F2 = Features->Features[*ProtoEnd];
+      X2 = F2->Params[PicoFeatX];
+      Y2 = F2->Params[PicoFeatY];
+      A2 = F2->Params[PicoFeatDir];
+
+      AngleDelta = fabs(A1 - A2);
+      if (AngleDelta > 0.5)
+        AngleDelta = 1.0 - AngleDelta;
+
+      if (AngleDelta > matcher_clustering_max_angle_delta ||
+          fabs(X1 - X2) > SegmentLength ||
+          fabs(Y1 - Y2) > SegmentLength)
+        break;
+    }
+
+    F2 = Features->Features[*(ProtoEnd - 1)];
+    X2 = F2->Params[PicoFeatX];
+    Y2 = F2->Params[PicoFeatY];
+    A2 = F2->Params[PicoFeatDir];
+
+    Pid = AddIntProto(IClass);
+    if (Pid == NO_PROTO)
+      return (NO_PROTO);
+
+    TempProto = NewTempProto();
+    Proto = &(TempProto->Proto);
+
+    /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
+       ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
+       instead of the -0.25 to 0.75 used in baseline normalization */
+    Proto->Length = SegmentLength;
+    Proto->Angle = A1;
+    Proto->X = (X1 + X2) / 2.0;
+    Proto->Y = (Y1 + Y2) / 2.0 - Y_DIM_OFFSET;
+    FillABC(Proto);
+
+    TempProto->ProtoId = Pid;
+    SET_BIT(TempProtoMask, Pid);
+
+    ConvertProto(Proto, Pid, IClass);
+    AddProtoToProtoPruner(Proto, Pid, IClass,
+                          classify_learning_debug_level >= 2);
+
+    Class->TempProtos = push(Class->TempProtos, TempProto);
+  }
+  return IClass->NumProtos - 1;
+}                              /* MakeNewTempProtos */
+
+/*---------------------------------------------------------------------------*/
+/**
+ *
+ * @param Templates current set of adaptive templates
+ * @param ClassId class containing config to be made permanent
+ * @param ConfigId config to be made permanent
+ * @param Blob current blob being adapted to
+ *
+ * Globals: none
+ */
+void Classify::MakePermanent(ADAPT_TEMPLATES Templates,
+                             CLASS_ID ClassId,
+                             int ConfigId,
+                             TBLOB *Blob) {
+  UNICHAR_ID *Ambigs;
+  TEMP_CONFIG Config;
+  ADAPT_CLASS Class;
+  PROTO_KEY ProtoKey;
+
+  Class = Templates->Class[ClassId];
+  Config = TempConfigFor(Class, ConfigId);
+
+  MakeConfigPermanent(Class, ConfigId);
+  if (Class->NumPermConfigs == 0)
+    Templates->NumPermClasses++;
+  Class->NumPermConfigs++;
+
+  // Initialize permanent config.
+  Ambigs = GetAmbiguities(Blob, ClassId);
+  auto Perm = static_cast<PERM_CONFIG>(malloc(sizeof(PERM_CONFIG_STRUCT)));
+  Perm->Ambigs = Ambigs;
+  Perm->FontinfoId = Config->FontinfoId;
+
+  // Free memory associated with temporary config (since ADAPTED_CONFIG
+  // is a union we need to clean up before we record permanent config).
+  ProtoKey.Templates = Templates;
+  ProtoKey.ClassId = ClassId;
+  ProtoKey.ConfigId = ConfigId;
+  Class->TempProtos = delete_d(Class->TempProtos, &ProtoKey, MakeTempProtoPerm);
+  FreeTempConfig(Config);
+
+  // Record permanent config.
+  PermConfigFor(Class, ConfigId) = Perm;
+
+  if (classify_learning_debug_level >= 1) {
+    tprintf("Making config %d for %s (ClassId %d) permanent:"
+            " fontinfo id %d, ambiguities '",
+            ConfigId, getDict().getUnicharset().debug_str(ClassId).c_str(),
+            ClassId, PermConfigFor(Class, ConfigId)->FontinfoId);
+    for (UNICHAR_ID *AmbigsPointer = Ambigs;
+        *AmbigsPointer >= 0; ++AmbigsPointer)
+      tprintf("%s", unicharset.id_to_unichar(*AmbigsPointer));
+    tprintf("'.\n");
+  }
+}                              /* MakePermanent */
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine converts TempProto to be permanent if
+ * its proto id is used by the configuration specified in
+ * ProtoKey.
+ *
+ * @param item1 (TEMP_PROTO) temporary proto to compare to key
+ * @param item2 (PROTO_KEY) defines which protos to make permanent
+ *
+ * Globals: none
+ *
+ * @return true if TempProto is converted, false otherwise
+ */
+int MakeTempProtoPerm(void *item1, void *item2) {
+  ADAPT_CLASS Class;
+  TEMP_CONFIG Config;
+  TEMP_PROTO TempProto;
+  PROTO_KEY *ProtoKey;
+
+  TempProto = static_cast<TEMP_PROTO>(item1);
+  ProtoKey = static_cast<PROTO_KEY *>(item2);
+
+  Class = ProtoKey->Templates->Class[ProtoKey->ClassId];
+  Config = TempConfigFor(Class, ProtoKey->ConfigId);
+
+  if (TempProto->ProtoId > Config->MaxProtoId ||
+      !test_bit (Config->Protos, TempProto->ProtoId))
+    return false;
+
+  MakeProtoPermanent(Class, TempProto->ProtoId);
+  AddProtoToClassPruner(&(TempProto->Proto), ProtoKey->ClassId,
+                         ProtoKey->Templates->Templates);
+  FreeTempProto(TempProto);
+
+  return true;
+}                              /* MakeTempProtoPerm */
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine writes the matches in Results to File.
+ *
+ * @param results match results to write to File
+ *
+ * Globals: none
+ */
+void Classify::PrintAdaptiveMatchResults(const ADAPT_RESULTS& results) {
+  for (int i = 0; i < results.match.size(); ++i) {
+    tprintf("%s  ", unicharset.debug_str(results.match[i].unichar_id).c_str());
+    results.match[i].Print();
+  }
+}                              /* PrintAdaptiveMatchResults */
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine steps through each matching class in Results
+ * and removes it from the match list if its rating
+ * is worse than the BestRating plus a pad.  In other words,
+ * all good matches get moved to the front of the classes
+ * array.
+ *
+ * @param Results contains matches to be filtered
+ *
+ * Globals:
+ * - matcher_bad_match_pad defines a "bad match"
+ */
+void Classify::RemoveBadMatches(ADAPT_RESULTS *Results) {
+  int Next, NextGood;
+  float BadMatchThreshold;
+  static const char* romans = "i v x I V X";
+  BadMatchThreshold = Results->best_rating - matcher_bad_match_pad;
+
+  if (classify_bln_numeric_mode) {
+    UNICHAR_ID unichar_id_one = unicharset.contains_unichar("1") ?
+        unicharset.unichar_to_id("1") : -1;
+    UNICHAR_ID unichar_id_zero = unicharset.contains_unichar("0") ?
+        unicharset.unichar_to_id("0") : -1;
+    float scored_one = ScoredUnichar(unichar_id_one, *Results);
+    float scored_zero = ScoredUnichar(unichar_id_zero, *Results);
+
+    for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
+      const UnicharRating& match = Results->match[Next];
+      if (match.rating >= BadMatchThreshold) {
+        if (!unicharset.get_isalpha(match.unichar_id) ||
+            strstr(romans,
+                   unicharset.id_to_unichar(match.unichar_id)) != nullptr) {
+        } else if (unicharset.eq(match.unichar_id, "l") &&
+                   scored_one < BadMatchThreshold) {
+          Results->match[Next].unichar_id = unichar_id_one;
+        } else if (unicharset.eq(match.unichar_id, "O") &&
+                   scored_zero < BadMatchThreshold) {
+          Results->match[Next].unichar_id = unichar_id_zero;
+        } else {
+          Results->match[Next].unichar_id = INVALID_UNICHAR_ID;  // Don't copy.
+        }
+        if (Results->match[Next].unichar_id != INVALID_UNICHAR_ID) {
+          if (NextGood == Next) {
+            ++NextGood;
+          } else {
+            Results->match[NextGood++] = Results->match[Next];
+          }
+        }
+      }
+    }
+  } else {
+    for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
+      if (Results->match[Next].rating >= BadMatchThreshold) {
+        if (NextGood == Next) {
+          ++NextGood;
+        } else {
+          Results->match[NextGood++] = Results->match[Next];
+        }
+      }
+    }
+  }
+  Results->match.resize(NextGood);
+}                              /* RemoveBadMatches */
+
+/*----------------------------------------------------------------------------*/
+/**
+ * This routine discards extra digits or punctuation from the results.
+ * We keep only the top 2 punctuation answers and the top 1 digit answer if
+ * present.
+ *
+ * @param Results contains matches to be filtered
+ */
+void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) {
+  int Next, NextGood;
+  int punc_count;              /*no of garbage characters */
+  int digit_count;
+  /*garbage characters */
+  static char punc_chars[] = ". , ; : / ` ~ ' - = \\ | \" ! _ ^";
+  static char digit_chars[] = "0 1 2 3 4 5 6 7 8 9";
+
+  punc_count = 0;
+  digit_count = 0;
+  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
+    const UnicharRating& match = Results->match[Next];
+    bool keep = true;
+    if (strstr(punc_chars,
+               unicharset.id_to_unichar(match.unichar_id)) != nullptr) {
+      if (punc_count >= 2)
+        keep = false;
+      punc_count++;
+    } else {
+      if (strstr(digit_chars,
+                 unicharset.id_to_unichar(match.unichar_id)) != nullptr) {
+        if (digit_count >= 1)
+          keep = false;
+        digit_count++;
+      }
+    }
+    if (keep) {
+      if (NextGood == Next) {
+        ++NextGood;
+      } else {
+        Results->match[NextGood++] = match;
+      }
+    }
+  }
+  Results->match.resize(NextGood);
+}                              /* RemoveExtraPuncs */
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine resets the internal thresholds inside
+ * the integer matcher to correspond to the specified
+ * threshold.
+ *
+ * @param Threshold threshold for creating new templates
+ *
+ * Globals:
+ * - matcher_good_threshold default good match rating
+ */
+void Classify::SetAdaptiveThreshold(float Threshold) {
+  Threshold = (Threshold == matcher_good_threshold) ? 0.9: (1.0 - Threshold);
+  classify_adapt_proto_threshold.set_value(
+      ClipToRange<int>(255 * Threshold, 0, 255));
+  classify_adapt_feature_threshold.set_value(
+      ClipToRange<int>(255 * Threshold, 0, 255));
+}                              /* SetAdaptiveThreshold */
+
+#ifndef GRAPHICS_DISABLED
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine displays debug information for the best config
+ * of the given shape_id for the given set of features.
+ *
+ * @param shape_id classifier id to work with
+ * @param features features of the unknown character
+ * @param num_features Number of features in the features array.
+ */
+
+void Classify::ShowBestMatchFor(int shape_id,
+                                const INT_FEATURE_STRUCT* features,
+                                int num_features) {
+  uint32_t config_mask;
+  if (UnusedClassIdIn(PreTrainedTemplates, shape_id)) {
+    tprintf("No built-in templates for class/shape %d\n", shape_id);
+    return;
+  }
+  if (num_features <= 0) {
+    tprintf("Illegal blob (char norm features)!\n");
+    return;
+  }
+  UnicharRating cn_result;
+  classify_norm_method.set_value(character);
+  im_.Match(ClassForClassId(PreTrainedTemplates, shape_id),
+            AllProtosOn, AllConfigsOn,
+            num_features, features, &cn_result,
+            classify_adapt_feature_threshold, NO_DEBUG,
+            matcher_debug_separate_windows);
+  tprintf("\n");
+  config_mask = 1 << cn_result.config;
+
+  tprintf("Static Shape ID: %d\n", shape_id);
+  ShowMatchDisplay();
+  im_.Match(ClassForClassId(PreTrainedTemplates, shape_id), AllProtosOn,
+            &config_mask, num_features, features, &cn_result,
+            classify_adapt_feature_threshold, matcher_debug_flags,
+            matcher_debug_separate_windows);
+  UpdateMatchDisplay();
+}                              /* ShowBestMatchFor */
+
+#endif // !GRAPHICS_DISABLED
+
+// Returns a string for the classifier class_id: either the corresponding
+// unicharset debug_str or the shape_table_ debug str.
+STRING Classify::ClassIDToDebugStr(const INT_TEMPLATES_STRUCT* templates,
+                                   int class_id, int config_id) const {
+  STRING class_string;
+  if (templates == PreTrainedTemplates && shape_table_ != nullptr) {
+    int shape_id = ClassAndConfigIDToFontOrShapeID(class_id, config_id);
+    class_string = shape_table_->DebugStr(shape_id);
+  } else {
+    class_string = unicharset.debug_str(class_id);
+  }
+  return class_string;
+}
+
+// Converts a classifier class_id index to a shape_table_ index
+int Classify::ClassAndConfigIDToFontOrShapeID(int class_id,
+                                              int int_result_config) const {
+  int font_set_id = PreTrainedTemplates->Class[class_id]->font_set_id;
+  // Older inttemps have no font_ids.
+  if (font_set_id < 0)
+    return kBlankFontinfoId;
+  const FontSet &fs = fontset_table_.get(font_set_id);
+  ASSERT_HOST(int_result_config >= 0 && int_result_config < fs.size);
+  return fs.configs[int_result_config];
+}
+
+// Converts a shape_table_ index to a classifier class_id index (not a
+// unichar-id!). Uses a search, so not fast.
+int Classify::ShapeIDToClassID(int shape_id) const {
+  for (int id = 0; id < PreTrainedTemplates->NumClasses; ++id) {
+    int font_set_id = PreTrainedTemplates->Class[id]->font_set_id;
+    ASSERT_HOST(font_set_id >= 0);
+    const FontSet &fs = fontset_table_.get(font_set_id);
+    for (int config = 0; config < fs.size; ++config) {
+      if (fs.configs[config] == shape_id)
+        return id;
+    }
+  }
+  tprintf("Shape %d not found\n", shape_id);
+  return -1;
+}
+
+// Returns true if the given TEMP_CONFIG is good enough to make it
+// a permanent config.
+bool Classify::TempConfigReliable(CLASS_ID class_id,
+                                  const TEMP_CONFIG &config) {
+  if (classify_learning_debug_level >= 1) {
+    tprintf("NumTimesSeen for config of %s is %d\n",
+            getDict().getUnicharset().debug_str(class_id).c_str(),
+            config->NumTimesSeen);
+  }
+  if (config->NumTimesSeen >= matcher_sufficient_examples_for_prototyping) {
+    return true;
+  } else if (config->NumTimesSeen < matcher_min_examples_for_prototyping) {
+    return false;
+  } else if (use_ambigs_for_adaption) {
+    // Go through the ambigs vector and see whether we have already seen
+    // enough times all the characters represented by the ambigs vector.
+    const UnicharIdVector *ambigs =
+      getDict().getUnicharAmbigs().AmbigsForAdaption(class_id);
+    int ambigs_size = (ambigs == nullptr) ? 0 : ambigs->size();
+    for (int ambig = 0; ambig < ambigs_size; ++ambig) {
+      ADAPT_CLASS ambig_class = AdaptedTemplates->Class[(*ambigs)[ambig]];
+      assert(ambig_class != nullptr);
+      if (ambig_class->NumPermConfigs == 0 &&
+          ambig_class->MaxNumTimesSeen <
+          matcher_min_examples_for_prototyping) {
+        if (classify_learning_debug_level >= 1) {
+          tprintf("Ambig %s has not been seen enough times,"
+                  " not making config for %s permanent\n",
+                  getDict().getUnicharset().debug_str(
+                      (*ambigs)[ambig]).c_str(),
+                  getDict().getUnicharset().debug_str(class_id).c_str());
+        }
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+void Classify::UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob) {
+  const UnicharIdVector *ambigs =
+    getDict().getUnicharAmbigs().ReverseAmbigsForAdaption(class_id);
+  int ambigs_size = (ambigs == nullptr) ? 0 : ambigs->size();
+  if (classify_learning_debug_level >= 1) {
+    tprintf("Running UpdateAmbigsGroup for %s class_id=%d\n",
+            getDict().getUnicharset().debug_str(class_id).c_str(), class_id);
+  }
+  for (int ambig = 0; ambig < ambigs_size; ++ambig) {
+    CLASS_ID ambig_class_id = (*ambigs)[ambig];
+    const ADAPT_CLASS ambigs_class = AdaptedTemplates->Class[ambig_class_id];
+    for (int cfg = 0; cfg < MAX_NUM_CONFIGS; ++cfg) {
+      if (ConfigIsPermanent(ambigs_class, cfg)) continue;
+      const TEMP_CONFIG config =
+        TempConfigFor(AdaptedTemplates->Class[ambig_class_id], cfg);
+      if (config != nullptr && TempConfigReliable(ambig_class_id, config)) {
+        if (classify_learning_debug_level >= 1) {
+          tprintf("Making config %d of %s permanent\n", cfg,
+                  getDict().getUnicharset().debug_str(
+                      ambig_class_id).c_str());
+        }
+        MakePermanent(AdaptedTemplates, ambig_class_id, cfg, Blob);
+      }
+    }
+  }
+}
+
+}  // namespace tesseract
diff --git a/tesseract/src/classify/blobclass.cpp b/tesseract/src/classify/blobclass.cpp
new file mode 100644
index 00000000..497ad045
--- /dev/null
+++ b/tesseract/src/classify/blobclass.cpp
@@ -0,0 +1,110 @@
+/******************************************************************************
+ **      Filename:       blobclass.c
+ **      Purpose:        High level blob classification and training routines.
+ **      Author:         Dan Johnson
+ **
+ **      (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#include "blobclass.h"
+
+#include <cstdio>
+
+#include "classify.h"
+#include "featdefs.h"
+#include "mf.h"
+#include "normfeat.h"
+
+namespace tesseract {
+
+static const char kUnknownFontName[] = "UnknownFont";
+
+static STRING_VAR(classify_font_name, kUnknownFontName,
+                  "Default font name to be used in training");
+
+/**----------------------------------------------------------------------------
+            Public Code
+----------------------------------------------------------------------------**/
+
+// Finds the name of the training font and returns it in fontname, by cutting
+// it out based on the expectation that the filename is of the form:
+// /path/to/dir/[lang].[fontname].exp[num]
+// The [lang], [fontname] and [num] fields should not have '.' characters.
+// If the global parameter classify_font_name is set, its value is used instead.
+void ExtractFontName(const char* filename, STRING* fontname) {
+  *fontname = classify_font_name;
+  if (*fontname == kUnknownFontName) {
+    // filename is expected to be of the form [lang].[fontname].exp[num]
+    // The [lang], [fontname] and [num] fields should not have '.' characters.
+    const char *basename = strrchr(filename, '/');
+    const char *firstdot = strchr(basename ? basename : filename, '.');
+    const char *lastdot  = strrchr(filename, '.');
+    if (firstdot != lastdot && firstdot != nullptr && lastdot != nullptr) {
+      ++firstdot;
+      *fontname = firstdot;
+      fontname->truncate_at(lastdot - firstdot);
+    }
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+// Extracts features from the given blob and saves them in the tr_file_data_
+// member variable.
+// fontname:  Name of font that this blob was printed in.
+// cn_denorm: Character normalization transformation to apply to the blob.
+// fx_info:   Character normalization parameters computed with cn_denorm.
+// blob_text: Ground truth text for the blob.
+void Classify::LearnBlob(const STRING& fontname, TBLOB* blob,
+                         const DENORM& cn_denorm,
+                         const INT_FX_RESULT_STRUCT& fx_info,
+                         const char* blob_text) {
+  CHAR_DESC CharDesc = NewCharDescription(feature_defs_);
+  CharDesc->FeatureSets[0] = ExtractMicros(blob, cn_denorm);
+  CharDesc->FeatureSets[1] = ExtractCharNormFeatures(fx_info);
+  CharDesc->FeatureSets[2] = ExtractIntCNFeatures(*blob, fx_info);
+  CharDesc->FeatureSets[3] = ExtractIntGeoFeatures(*blob, fx_info);
+
+  if (ValidCharDescription(feature_defs_, CharDesc)) {
+    // Label the features with a class name and font name.
+    tr_file_data_ += "\n";
+    tr_file_data_ += fontname;
+    tr_file_data_ += " ";
+    tr_file_data_ += blob_text;
+    tr_file_data_ += "\n";
+
+    // write micro-features to file and clean up
+    WriteCharDescription(feature_defs_, CharDesc, &tr_file_data_);
+  } else {
+    tprintf("Blob learned was invalid!\n");
+  }
+  FreeCharDescription(CharDesc);
+}                                // LearnBlob
+
+// Writes stored training data to a .tr file based on the given filename.
+// Returns false on error.
+bool Classify::WriteTRFile(const char* filename) {
+  bool result = false;
+  std::string tr_filename = filename;
+  tr_filename += ".tr";
+  FILE* fp = fopen(tr_filename.c_str(), "wb");
+  if (fp) {
+    result =
+      tesseract::Serialize(fp, &tr_file_data_[0], tr_file_data_.length());
+    fclose(fp);
+  }
+  tr_file_data_.truncate_at(0);
+  return result;
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/classify/blobclass.h b/tesseract/src/classify/blobclass.h
new file mode 100644
index 00000000..94532fc9
--- /dev/null
+++ b/tesseract/src/classify/blobclass.h
@@ -0,0 +1,39 @@
+/******************************************************************************
+ ** Filename: blobclass.h
+ ** Purpose: Interface to high level classification and training.
+ ** Author:  Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#ifndef BLOBCLASS_H
+#define BLOBCLASS_H
+
+/**----------------------------------------------------------------------------
+          Include Files and Type Defines
+----------------------------------------------------------------------------**/
+#include "strngs.h"
+
+/**----------------------------------------------------------------------------
+          Public Function Prototypes
+----------------------------------------------------------------------------**/
+namespace tesseract {
+// Finds the name of the training font and returns it in fontname, by cutting
+// it out based on the expectation that the filename is of the form:
+// /path/to/dir/[lang].[fontname].exp[num]
+// The [lang], [fontname] and [num] fields should not have '.' characters.
+// If the global parameter classify_font_name is set, its value is used instead.
+void ExtractFontName(const char* filename, STRING* fontname);
+
+}  // namespace tesseract.
+
+#endif
diff --git a/tesseract/src/classify/classify.cpp b/tesseract/src/classify/classify.cpp
new file mode 100644
index 00000000..939036d0
--- /dev/null
+++ b/tesseract/src/classify/classify.cpp
@@ -0,0 +1,230 @@
+///////////////////////////////////////////////////////////////////////
+// File:        classify.cpp
+// Description: classify class.
+// Author:      Samuel Charron
+//
+// (C) Copyright 2006, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "classify.h"
+
+#ifdef DISABLED_LEGACY_ENGINE
+
+#include <string.h>
+
+namespace tesseract {
+
+Classify::Classify()
+    :
+      INT_MEMBER(classify_debug_level, 0, "Classify debug level",
+                 this->params()),
+
+      BOOL_MEMBER(classify_bln_numeric_mode, 0,
+"Assume the input is numbers [0-9].", this->params()),
+
+      double_MEMBER(classify_max_rating_ratio, 1.5,
+                    "Veto ratio between classifier ratings", this->params()),
+
+      double_MEMBER(classify_max_certainty_margin, 5.5,
+                    "Veto difference between classifier certainties",
+                    this->params()),
+
+      dict_(this) {}
+
+Classify::~Classify() {}
+
+}  // namespace tesseract
+
+#else  // DISABLED_LEGACY_ENGINE not defined
+
+#include "fontinfo.h"
+#include "intproto.h"
+#include "mfoutline.h"
+#include "scrollview.h"
+#include "shapeclassifier.h"
+#include "shapetable.h"
+#include "unicity_table.h"
+#include <cstring>
+
+namespace tesseract {
+Classify::Classify()
+    : BOOL_MEMBER(allow_blob_division, true, "Use divisible blobs chopping",
+                  this->params()),
+      BOOL_MEMBER(prioritize_division, false,
+                  "Prioritize blob division over chopping", this->params()),
+      BOOL_MEMBER(classify_enable_learning, true, "Enable adaptive classifier",
+                  this->params()),
+      INT_MEMBER(classify_debug_level, 0, "Classify debug level",
+                 this->params()),
+      INT_MEMBER(classify_norm_method, character, "Normalization Method   ...",
+                 this->params()),
+      double_MEMBER(classify_char_norm_range, 0.2,
+                    "Character Normalization Range ...", this->params()),
+      double_MEMBER(classify_max_rating_ratio, 1.5,
+                    "Veto ratio between classifier ratings", this->params()),
+      double_MEMBER(classify_max_certainty_margin, 5.5,
+                    "Veto difference between classifier certainties",
+                    this->params()),
+      BOOL_MEMBER(tess_cn_matching, 0, "Character Normalized Matching",
+                  this->params()),
+      BOOL_MEMBER(tess_bn_matching, 0, "Baseline Normalized Matching",
+                  this->params()),
+      BOOL_MEMBER(classify_enable_adaptive_matcher, 1,
+                  "Enable adaptive classifier", this->params()),
+      BOOL_MEMBER(classify_use_pre_adapted_templates, 0,
+                  "Use pre-adapted classifier templates", this->params()),
+      BOOL_MEMBER(classify_save_adapted_templates, 0,
+                  "Save adapted templates to a file", this->params()),
+      BOOL_MEMBER(classify_enable_adaptive_debugger, 0, "Enable match debugger",
+                  this->params()),
+      BOOL_MEMBER(classify_nonlinear_norm, 0,
+                  "Non-linear stroke-density normalization", this->params()),
+      INT_MEMBER(matcher_debug_level, 0, "Matcher Debug Level", this->params()),
+      INT_MEMBER(matcher_debug_flags, 0, "Matcher Debug Flags", this->params()),
+      INT_MEMBER(classify_learning_debug_level, 0, "Learning Debug Level: ",
+                 this->params()),
+      double_MEMBER(matcher_good_threshold, 0.125, "Good Match (0-1)",
+                    this->params()),
+      double_MEMBER(matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)",
+                    this->params()),
+      double_MEMBER(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)",
+                    this->params()),
+      double_MEMBER(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)",
+                    this->params()),
+      double_MEMBER(matcher_rating_margin, 0.1, "New template margin (0-1)",
+                    this->params()),
+      double_MEMBER(matcher_avg_noise_size, 12.0, "Avg. noise blob length",
+                    this->params()),
+      INT_MEMBER(matcher_permanent_classes_min, 1, "Min # of permanent classes",
+                 this->params()),
+      INT_MEMBER(matcher_min_examples_for_prototyping, 3,
+                 "Reliable Config Threshold", this->params()),
+      INT_MEMBER(matcher_sufficient_examples_for_prototyping, 5,
+                 "Enable adaption even if the ambiguities have not been seen",
+                 this->params()),
+      double_MEMBER(matcher_clustering_max_angle_delta, 0.015,
+                    "Maximum angle delta for prototype clustering",
+                    this->params()),
+      double_MEMBER(classify_misfit_junk_penalty, 0.0,
+                    "Penalty to apply when a non-alnum is vertically out of "
+                    "its expected textline position",
+                    this->params()),
+      double_MEMBER(rating_scale, 1.5, "Rating scaling factor", this->params()),
+      double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor",
+                    this->params()),
+      double_MEMBER(tessedit_class_miss_scale, 0.00390625,
+                    "Scale factor for features not used", this->params()),
+      double_MEMBER(
+          classify_adapted_pruning_factor, 2.5,
+          "Prune poor adapted results this much worse than best result",
+          this->params()),
+      double_MEMBER(classify_adapted_pruning_threshold, -1.0,
+                    "Threshold at which classify_adapted_pruning_factor starts",
+                    this->params()),
+      INT_MEMBER(classify_adapt_proto_threshold, 230,
+                 "Threshold for good protos during adaptive 0-255",
+                 this->params()),
+      INT_MEMBER(classify_adapt_feature_threshold, 230,
+                 "Threshold for good features during adaptive 0-255",
+                 this->params()),
+      BOOL_MEMBER(disable_character_fragments, true,
+                  "Do not include character fragments in the"
+                  " results of the classifier",
+                  this->params()),
+      double_MEMBER(classify_character_fragments_garbage_certainty_threshold,
+                    -3.0,
+                    "Exclude fragments that do not look like whole"
+                    " characters from training and adaption",
+                    this->params()),
+      BOOL_MEMBER(classify_debug_character_fragments, false,
+                  "Bring up graphical debugging windows for fragments training",
+                  this->params()),
+      BOOL_MEMBER(matcher_debug_separate_windows, false,
+                  "Use two different windows for debugging the matching: "
+                  "One for the protos and one for the features.",
+                  this->params()),
+      STRING_MEMBER(classify_learn_debug_str, "", "Class str to debug learning",
+                    this->params()),
+      INT_MEMBER(classify_class_pruner_threshold, 229,
+                 "Class Pruner Threshold 0-255", this->params()),
+      INT_MEMBER(classify_class_pruner_multiplier, 15,
+                 "Class Pruner Multiplier 0-255:       ", this->params()),
+      INT_MEMBER(classify_cp_cutoff_strength, 7,
+                 "Class Pruner CutoffStrength:         ", this->params()),
+      INT_MEMBER(classify_integer_matcher_multiplier, 10,
+                 "Integer Matcher Multiplier  0-255:   ", this->params()),
+      BOOL_MEMBER(classify_bln_numeric_mode, 0,
+                  "Assume the input is numbers [0-9].", this->params()),
+      double_MEMBER(speckle_large_max_size, 0.30, "Max large speckle size",
+                    this->params()),
+      double_MEMBER(speckle_rating_penalty, 10.0,
+                    "Penalty to add to worst rating for noise", this->params()),
+      im_(&classify_debug_level),
+      dict_(this) {
+  using namespace std::placeholders; // for _1, _2
+  fontinfo_table_.set_clear_callback(std::bind(FontInfoDeleteCallback, _1));
+  fontset_table_.set_clear_callback(std::bind(FontSetDeleteCallback, _1));
+
+  InitFeatureDefs(&feature_defs_);
+}
+
+Classify::~Classify() {
+  EndAdaptiveClassifier();
+  delete learn_debug_win_;
+  delete learn_fragmented_word_debug_win_;
+  delete learn_fragments_debug_win_;
+}
+
+
+// Takes ownership of the given classifier, and uses it for future calls
+// to CharNormClassifier.
+void Classify::SetStaticClassifier(ShapeClassifier* static_classifier) {
+  delete static_classifier_;
+  static_classifier_ = static_classifier;
+}
+
+// Moved from speckle.cpp
+// Adds a noise classification result that is a bit worse than the worst
+// current result, or the worst possible result if no current results.
+void Classify::AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices) {
+    BLOB_CHOICE_IT bc_it(choices);
+  // If there is no classifier result, we will use the worst possible certainty
+  // and corresponding rating.
+  float certainty = -getDict().certainty_scale;
+  float rating = rating_scale * blob_length;
+  if (!choices->empty() && blob_length > 0) {
+    bc_it.move_to_last();
+    BLOB_CHOICE* worst_choice = bc_it.data();
+    // Add speckle_rating_penalty to worst rating, matching old value.
+    rating = worst_choice->rating() + speckle_rating_penalty;
+    // Compute the rating to correspond to the certainty. (Used to be kept
+    // the same, but that messes up the language model search.)
+    certainty = -rating * getDict().certainty_scale /
+        (rating_scale * blob_length);
+  }
+  auto* blob_choice = new BLOB_CHOICE(UNICHAR_SPACE, rating, certainty,
+                                             -1, 0.0f, FLT_MAX, 0,
+                                             BCC_SPECKLE_CLASSIFIER);
+  bc_it.add_to_end(blob_choice);
+}
+
+// Returns true if the blob is small enough to be a large speckle.
+bool Classify::LargeSpeckle(const TBLOB &blob) {
+  double speckle_size = kBlnXHeight * speckle_large_max_size;
+  TBOX bbox = blob.bounding_box();
+  return bbox.width() < speckle_size && bbox.height() < speckle_size;
+}
+
+}  // namespace tesseract
+
+#endif  // def DISABLED_LEGACY_ENGINE
diff --git a/tesseract/src/classify/classify.h b/tesseract/src/classify/classify.h
new file mode 100644
index 00000000..44e0a77b
--- /dev/null
+++ b/tesseract/src/classify/classify.h
@@ -0,0 +1,583 @@
+///////////////////////////////////////////////////////////////////////
+// File:        classify.h
+// Description: classify class.
+// Author:      Samuel Charron
+//
+// (C) Copyright 2006, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CLASSIFY_CLASSIFY_H_
+#define TESSERACT_CLASSIFY_CLASSIFY_H_
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#ifdef DISABLED_LEGACY_ENGINE
+
+#include "ccstruct.h"
+#include "dict.h"
+
+namespace tesseract {
+
+class Classify : public CCStruct {
+ public:
+  Classify();
+  virtual ~Classify();
+  virtual Dict& getDict() {
+    return dict_;
+  }
+
+  // Member variables.
+
+  INT_VAR_H(classify_debug_level, 0, "Classify debug level");
+
+  BOOL_VAR_H(classify_bln_numeric_mode, 0,
+             "Assume the input is numbers [0-9].");
+
+  double_VAR_H(classify_max_rating_ratio, 1.5,
+               "Veto ratio between classifier ratings");
+
+  double_VAR_H(classify_max_certainty_margin, 5.5,
+               "Veto difference between classifier certainties");
+
+ private:
+  Dict dict_;
+};
+
+}  // namespace tesseract
+
+
+#else  // DISABLED_LEGACY_ENGINE not defined
+
+#include "adaptive.h"
+#include "ccstruct.h"
+#include "dict.h"
+#include "featdefs.h"
+#include "fontinfo.h"
+#include "imagedata.h"
+#include "intfx.h"
+#include "intmatcher.h"
+#include "normalis.h"
+#include "ratngs.h"
+#include "ocrfeatures.h"
+#include "unicity_table.h"
+
+namespace tesseract {
+
+class ScrollView;
+class WERD_CHOICE;
+class WERD_RES;
+struct ADAPT_RESULTS;
+struct NORM_PROTOS;
+
+static const int kUnknownFontinfoId = -1;
+static const int kBlankFontinfoId = -2;
+
+class ShapeClassifier;
+struct ShapeRating;
+class ShapeTable;
+struct UnicharRating;
+
+// How segmented is a blob. In this enum, character refers to a classifiable
+// unit, but that is too long and character is usually easier to understand.
+enum CharSegmentationType {
+  CST_FRAGMENT,  // A partial character.
+  CST_WHOLE,     // A correctly segmented character.
+  CST_IMPROPER,  // More than one but less than 2 characters.
+  CST_NGRAM      // Multiple characters.
+};
+
+class TESS_API Classify : public CCStruct {
+ public:
+  Classify();
+  ~Classify() override;
+  virtual Dict& getDict() {
+    return dict_;
+  }
+
+  const ShapeTable* shape_table() const {
+    return shape_table_;
+  }
+
+  // Takes ownership of the given classifier, and uses it for future calls
+  // to CharNormClassifier.
+  void SetStaticClassifier(ShapeClassifier* static_classifier);
+
+  // Adds a noise classification result that is a bit worse than the worst
+  // current result, or the worst possible result if no current results.
+  void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices);
+
+  // Returns true if the blob is small enough to be a large speckle.
+  bool LargeSpeckle(const TBLOB &blob);
+
+  /* adaptive.cpp ************************************************************/
+  ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset);
+  int GetFontinfoId(ADAPT_CLASS Class, uint8_t ConfigId);
+  // Runs the class pruner from int_templates on the given features, returning
+  // the number of classes output in results.
+  //    int_templates          Class pruner tables
+  //    num_features           Number of features in blob
+  //    features               Array of features
+  //    normalization_factors  (input) Array of int_templates->NumClasses fudge
+  //                           factors from blob normalization process.
+  //                           (Indexed by CLASS_INDEX)
+  //    expected_num_features  (input) Array of int_templates->NumClasses
+  //                           expected number of features for each class.
+  //                           (Indexed by CLASS_INDEX)
+  //    results                (output) Sorted Array of pruned classes.
+  //                           Array must be sized to take the maximum possible
+  //                           number of outputs : int_templates->NumClasses.
+  int PruneClasses(const INT_TEMPLATES_STRUCT* int_templates, int num_features,
+                   int keep_this, const INT_FEATURE_STRUCT* features,
+                   const uint8_t* normalization_factors,
+                   const uint16_t* expected_num_features,
+                   std::vector<CP_RESULT_STRUCT>* results);
+  void ReadNewCutoffs(TFile* fp, uint16_t* Cutoffs);
+  void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates);
+  void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates);
+  ADAPT_TEMPLATES ReadAdaptedTemplates(TFile* File);
+  /* normmatch.cpp ************************************************************/
+  float ComputeNormMatch(CLASS_ID ClassId,
+                         const FEATURE_STRUCT& feature, bool DebugMatch);
+  void FreeNormProtos();
+  NORM_PROTOS* ReadNormProtos(TFile* fp);
+  /* protos.cpp ***************************************************************/
+  void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class);
+  INT_TEMPLATES CreateIntTemplates(CLASSES FloatProtos,
+                                   const UNICHARSET& target_unicharset);
+  /* adaptmatch.cpp ***********************************************************/
+
+  // Learns the given word using its chopped_word, seam_array, denorm,
+  // box_word, best_state, and correct_text to learn both correctly and
+  // incorrectly segmented blobs. If fontname is not nullptr, then LearnBlob
+  // is called and the data will be saved in an internal buffer.
+  // Otherwise AdaptToBlob is called for adaption within a document.
+  void LearnWord(const char* fontname, WERD_RES* word);
+
+  // Builds a blob of length fragments, from the word, starting at start,
+  // and then learns it, as having the given correct_text.
+  // If fontname is not nullptr, then LearnBlob is called and the data will be
+  // saved in an internal buffer for static training.
+  // Otherwise AdaptToBlob is called for adaption within a document.
+  // threshold is a magic number required by AdaptToChar and generated by
+  // ComputeAdaptionThresholds.
+  // Although it can be partly inferred from the string, segmentation is
+  // provided to explicitly clarify the character segmentation.
+  void LearnPieces(const char* fontname, int start, int length, float threshold,
+                   CharSegmentationType segmentation, const char* correct_text,
+                   WERD_RES* word);
+  void InitAdaptiveClassifier(TessdataManager* mgr);
+  void InitAdaptedClass(TBLOB *Blob,
+                        CLASS_ID ClassId,
+                        int FontinfoId,
+                        ADAPT_CLASS Class,
+                        ADAPT_TEMPLATES Templates);
+  void AmbigClassifier(const std::vector<INT_FEATURE_STRUCT>& int_features,
+                       const INT_FX_RESULT_STRUCT& fx_info,
+                       const TBLOB *blob,
+                       INT_TEMPLATES templates,
+                       ADAPT_CLASS *classes,
+                       UNICHAR_ID *ambiguities,
+                       ADAPT_RESULTS *results);
+  void MasterMatcher(INT_TEMPLATES templates,
+                     int16_t num_features,
+                     const INT_FEATURE_STRUCT* features,
+                     const uint8_t* norm_factors,
+                     ADAPT_CLASS* classes,
+                     int debug,
+                     int matcher_multiplier,
+                     const TBOX& blob_box,
+                     const std::vector<CP_RESULT_STRUCT>& results,
+                     ADAPT_RESULTS* final_results);
+  // Converts configs to fonts, and if the result is not adapted, and a
+  // shape_table_ is present, the shape is expanded to include all
+  // unichar_ids represented, before applying a set of corrections to the
+  // distance rating in int_result, (see ComputeCorrectedRating.)
+  // The results are added to the final_results output.
+  void ExpandShapesAndApplyCorrections(ADAPT_CLASS* classes,
+                                       bool debug,
+                                       int class_id,
+                                       int bottom, int top,
+                                       float cp_rating,
+                                       int blob_length,
+                                       int matcher_multiplier,
+                                       const uint8_t* cn_factors,
+                                       UnicharRating* int_result,
+                                       ADAPT_RESULTS* final_results);
+  // Applies a set of corrections to the distance im_rating,
+  // including the cn_correction, miss penalty and additional penalty
+  // for non-alnums being vertical misfits. Returns the corrected distance.
+  double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating,
+                                double im_rating, int feature_misses,
+                                int bottom, int top,
+                                int blob_length, int matcher_multiplier,
+                                const uint8_t* cn_factors);
+  void ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
+                               ADAPT_RESULTS *Results,
+                               BLOB_CHOICE_LIST *Choices);
+  void AddNewResult(const UnicharRating& new_result, ADAPT_RESULTS *results);
+  int GetAdaptiveFeatures(TBLOB *Blob,
+                          INT_FEATURE_ARRAY IntFeatures,
+                          FEATURE_SET *FloatFeatures);
+
+#ifndef GRAPHICS_DISABLED
+  void DebugAdaptiveClassifier(TBLOB *Blob,
+                               ADAPT_RESULTS *Results);
+#endif
+  PROTO_ID MakeNewTempProtos(FEATURE_SET Features,
+                             int NumBadFeat,
+                             FEATURE_ID BadFeat[],
+                             INT_CLASS IClass,
+                             ADAPT_CLASS Class,
+                             BIT_VECTOR TempProtoMask);
+  int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates,
+                             CLASS_ID ClassId,
+                             int FontinfoId,
+                             int NumFeatures,
+                             INT_FEATURE_ARRAY Features,
+                             FEATURE_SET FloatFeatures);
+  void MakePermanent(ADAPT_TEMPLATES Templates,
+                     CLASS_ID ClassId,
+                     int ConfigId,
+                     TBLOB *Blob);
+  void PrintAdaptiveMatchResults(const ADAPT_RESULTS& results);
+  void RemoveExtraPuncs(ADAPT_RESULTS *Results);
+  void RemoveBadMatches(ADAPT_RESULTS *Results);
+  void SetAdaptiveThreshold(float Threshold);
+  void ShowBestMatchFor(int shape_id,
+                        const INT_FEATURE_STRUCT* features,
+                        int num_features);
+  // Returns a string for the classifier class_id: either the corresponding
+  // unicharset debug_str or the shape_table_ debug str.
+  STRING ClassIDToDebugStr(const INT_TEMPLATES_STRUCT* templates,
+                           int class_id, int config_id) const;
+  // Converts a classifier class_id index with a config ID to:
+  // shape_table_ present: a shape_table_ index OR
+  // No shape_table_: a font ID.
+  // Without shape training, each class_id, config pair represents a single
+  // unichar id/font combination, so this function looks up the corresponding
+  // font id.
+  // With shape training, each class_id, config pair represents a single
+  // shape table index, so the fontset_table stores the shape table index,
+  // and the shape_table_ must be consulted to obtain the actual unichar_id/
+  // font combinations that the shape represents.
+  int ClassAndConfigIDToFontOrShapeID(int class_id,
+                                      int int_result_config) const;
+  // Converts a shape_table_ index to a classifier class_id index (not a
+  // unichar-id!). Uses a search, so not fast.
+  int ShapeIDToClassID(int shape_id) const;
+  UNICHAR_ID *BaselineClassifier(
+      TBLOB *Blob, const std::vector<INT_FEATURE_STRUCT>& int_features,
+      const INT_FX_RESULT_STRUCT& fx_info,
+      ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results);
+  int CharNormClassifier(TBLOB *blob,
+                         const TrainingSample& sample,
+                         ADAPT_RESULTS *adapt_results);
+
+  // As CharNormClassifier, but operates on a TrainingSample and outputs to
+  // a GenericVector of ShapeRating without conversion to classes.
+  int CharNormTrainingSample(bool pruner_only, int keep_this,
+                             const TrainingSample& sample,
+                             std::vector<UnicharRating>* results);
+  UNICHAR_ID *GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass);
+  void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results);
+  void AdaptToChar(TBLOB* Blob, CLASS_ID ClassId, int FontinfoId,
+                   float Threshold, ADAPT_TEMPLATES adaptive_templates);
+  void DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class);
+  bool AdaptableWord(WERD_RES* word);
+  void EndAdaptiveClassifier();
+  void SettupPass1();
+  void SettupPass2();
+  void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices);
+  void ClassifyAsNoise(ADAPT_RESULTS *Results);
+  void ResetAdaptiveClassifierInternal();
+  void SwitchAdaptiveClassifier();
+  void StartBackupAdaptiveClassifier();
+
+  int GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info,
+                         INT_TEMPLATES templates,
+                         uint8_t* pruner_norm_array,
+                         uint8_t* char_norm_array);
+  // Computes the char_norm_array for the unicharset and, if not nullptr, the
+  // pruner_array as appropriate according to the existence of the shape_table.
+  // The norm_feature is deleted as it is almost certainly no longer needed.
+  void ComputeCharNormArrays(FEATURE_STRUCT* norm_feature,
+                             INT_TEMPLATES_STRUCT* templates,
+                             uint8_t* char_norm_array,
+                             uint8_t* pruner_array);
+
+  bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config);
+  void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob);
+
+  bool AdaptiveClassifierIsFull() const { return NumAdaptationsFailed > 0; }
+  bool AdaptiveClassifierIsEmpty() const {
+    return AdaptedTemplates->NumPermClasses == 0;
+  }
+  bool LooksLikeGarbage(TBLOB *blob);
+  void RefreshDebugWindow(ScrollView **win, const char *msg,
+                          int y_offset, const TBOX &wbox);
+  // intfx.cpp
+  // Computes the DENORMS for bl(baseline) and cn(character) normalization
+  // during feature extraction. The input denorm describes the current state
+  // of the blob, which is usually a baseline-normalized word.
+  // The Transforms setup are as follows:
+  // Baseline Normalized (bl) Output:
+  //   We center the grapheme by aligning the x-coordinate of its centroid with
+  //   x=128 and leaving the already-baseline-normalized y as-is.
+  //
+  // Character Normalized (cn) Output:
+  //   We align the grapheme's centroid at the origin and scale it
+  //   asymmetrically in x and y so that the 2nd moments are a standard value
+  //   (51.2) ie the result is vaguely square.
+  // If classify_nonlinear_norm is true:
+  //   A non-linear normalization is setup that attempts to evenly distribute
+  //   edges across x and y.
+  //
+  // Some of the fields of fx_info are also setup:
+  // Length: Total length of outline.
+  // Rx:     Rounded y second moment. (Reversed by convention.)
+  // Ry:     rounded x second moment.
+  // Xmean:  Rounded x center of mass of the blob.
+  // Ymean:  Rounded y center of mass of the blob.
+  static void SetupBLCNDenorms(const TBLOB& blob, bool nonlinear_norm,
+                               DENORM* bl_denorm, DENORM* cn_denorm,
+                               INT_FX_RESULT_STRUCT* fx_info);
+
+  // Extracts sets of 3-D features of length kStandardFeatureLength (=12.8), as
+  // (x,y) position and angle as measured counterclockwise from the vector
+  // <-1, 0>, from blob using two normalizations defined by bl_denorm and
+  // cn_denorm. See SetpuBLCNDenorms for definitions.
+  // If outline_cn_counts is not nullptr, on return it contains the cumulative
+  // number of cn features generated for each outline in the blob (in order).
+  // Thus after the first outline, there were (*outline_cn_counts)[0] features,
+  // after the second outline, there were (*outline_cn_counts)[1] features etc.
+  static void ExtractFeatures(const TBLOB& blob,
+                              bool nonlinear_norm,
+                              std::vector<INT_FEATURE_STRUCT>* bl_features,
+                              std::vector<INT_FEATURE_STRUCT>* cn_features,
+                              INT_FX_RESULT_STRUCT* results,
+                              GenericVector<int>* outline_cn_counts);
+  /* float2int.cpp ************************************************************/
+  void ClearCharNormArray(uint8_t* char_norm_array);
+  void ComputeIntCharNormArray(const FEATURE_STRUCT& norm_feature,
+                               uint8_t* char_norm_array);
+  void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures);
+  /* intproto.cpp *************************************************************/
+  INT_TEMPLATES ReadIntTemplates(TFile* fp);
+  void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates,
+                         const UNICHARSET& target_unicharset);
+  CLASS_ID GetClassToDebug(const char *Prompt, bool* adaptive_on,
+                           bool* pretrained_on, int* shape_id);
+  void ShowMatchDisplay();
+  /* font detection ***********************************************************/
+  UnicityTable<FontInfo>& get_fontinfo_table() {
+    return fontinfo_table_;
+  }
+  const UnicityTable<FontInfo>& get_fontinfo_table() const {
+    return fontinfo_table_;
+  }
+  UnicityTable<FontSet>& get_fontset_table() {
+    return fontset_table_;
+  }
+  /* mfoutline.cpp ***********************************************************/
+  void NormalizeOutlines(LIST Outlines, float *XScale, float *YScale);
+  /* outfeat.cpp ***********************************************************/
+  FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob);
+  /* picofeat.cpp ***********************************************************/
+  FEATURE_SET ExtractPicoFeatures(TBLOB *Blob);
+  FEATURE_SET ExtractIntCNFeatures(const TBLOB& blob,
+                                   const INT_FX_RESULT_STRUCT& fx_info);
+  FEATURE_SET ExtractIntGeoFeatures(const TBLOB& blob,
+                                    const INT_FX_RESULT_STRUCT& fx_info);
+  /* blobclass.cpp ***********************************************************/
+  // Extracts features from the given blob and saves them in the tr_file_data_
+  // member variable.
+  // fontname:  Name of font that this blob was printed in.
+  // cn_denorm: Character normalization transformation to apply to the blob.
+  // fx_info:   Character normalization parameters computed with cn_denorm.
+  // blob_text: Ground truth text for the blob.
+  void LearnBlob(const STRING& fontname, TBLOB* Blob, const DENORM& cn_denorm,
+                 const INT_FX_RESULT_STRUCT& fx_info, const char* blob_text);
+  // Writes stored training data to a .tr file based on the given filename.
+  // Returns false on error.
+  bool WriteTRFile(const char* filename);
+
+  // Member variables.
+
+  // Parameters.
+  // Set during training (in lang.config) to indicate whether the divisible
+  // blobs chopper should be used (true for latin script.)
+  BOOL_VAR_H(allow_blob_division, true, "Use divisible blobs chopping");
+  // Set during training (in lang.config) to indicate whether the divisible
+  // blobs chopper should be used in preference to chopping. Set to true for
+  // southern Indic scripts.
+  BOOL_VAR_H(prioritize_division, false,
+             "Prioritize blob division over chopping");
+  BOOL_VAR_H(classify_enable_learning, true, "Enable adaptive classifier");
+  INT_VAR_H(classify_debug_level, 0, "Classify debug level");
+
+  /* mfoutline.cpp ***********************************************************/
+  /* control knobs used to control normalization of outlines */
+  INT_VAR_H(classify_norm_method, character, "Normalization Method   ...");
+  double_VAR_H(classify_char_norm_range, 0.2,
+             "Character Normalization Range ...");
+  double_VAR_H(classify_max_rating_ratio, 1.5,
+               "Veto ratio between classifier ratings");
+  double_VAR_H(classify_max_certainty_margin, 5.5,
+               "Veto difference between classifier certainties");
+
+  /* adaptmatch.cpp ***********************************************************/
+  BOOL_VAR_H(tess_cn_matching, 0, "Character Normalized Matching");
+  BOOL_VAR_H(tess_bn_matching, 0, "Baseline Normalized Matching");
+  BOOL_VAR_H(classify_enable_adaptive_matcher, 1, "Enable adaptive classifier");
+  BOOL_VAR_H(classify_use_pre_adapted_templates, 0,
+             "Use pre-adapted classifier templates");
+  BOOL_VAR_H(classify_save_adapted_templates, 0,
+             "Save adapted templates to a file");
+  BOOL_VAR_H(classify_enable_adaptive_debugger, 0, "Enable match debugger");
+  BOOL_VAR_H(classify_nonlinear_norm, 0,
+             "Non-linear stroke-density normalization");
+  INT_VAR_H(matcher_debug_level, 0, "Matcher Debug Level");
+  INT_VAR_H(matcher_debug_flags, 0, "Matcher Debug Flags");
+  INT_VAR_H(classify_learning_debug_level, 0, "Learning Debug Level: ");
+  double_VAR_H(matcher_good_threshold, 0.125, "Good Match (0-1)");
+  double_VAR_H(matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)");
+  double_VAR_H(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)");
+  double_VAR_H(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)");
+  double_VAR_H(matcher_rating_margin, 0.1, "New template margin (0-1)");
+  double_VAR_H(matcher_avg_noise_size, 12.0, "Avg. noise blob length: ");
+  INT_VAR_H(matcher_permanent_classes_min, 1, "Min # of permanent classes");
+  INT_VAR_H(matcher_min_examples_for_prototyping, 3,
+            "Reliable Config Threshold");
+  INT_VAR_H(matcher_sufficient_examples_for_prototyping, 5,
+            "Enable adaption even if the ambiguities have not been seen");
+  double_VAR_H(matcher_clustering_max_angle_delta, 0.015,
+               "Maximum angle delta for prototype clustering");
+  double_VAR_H(classify_misfit_junk_penalty, 0.0,
+               "Penalty to apply when a non-alnum is vertically out of "
+               "its expected textline position");
+  double_VAR_H(rating_scale, 1.5, "Rating scaling factor");
+  double_VAR_H(certainty_scale, 20.0, "Certainty scaling factor");
+  double_VAR_H(tessedit_class_miss_scale, 0.00390625,
+               "Scale factor for features not used");
+  double_VAR_H(classify_adapted_pruning_factor, 2.5,
+               "Prune poor adapted results this much worse than best result");
+  double_VAR_H(classify_adapted_pruning_threshold, -1.0,
+               "Threshold at which classify_adapted_pruning_factor starts");
+  INT_VAR_H(classify_adapt_proto_threshold, 230,
+            "Threshold for good protos during adaptive 0-255");
+  INT_VAR_H(classify_adapt_feature_threshold, 230,
+            "Threshold for good features during adaptive 0-255");
+  BOOL_VAR_H(disable_character_fragments, true,
+             "Do not include character fragments in the"
+             " results of the classifier");
+  double_VAR_H(classify_character_fragments_garbage_certainty_threshold, -3.0,
+               "Exclude fragments that do not match any whole character"
+               " with at least this certainty");
+  BOOL_VAR_H(classify_debug_character_fragments, false,
+             "Bring up graphical debugging windows for fragments training");
+  BOOL_VAR_H(matcher_debug_separate_windows, false,
+             "Use two different windows for debugging the matching: "
+             "One for the protos and one for the features.");
+  STRING_VAR_H(classify_learn_debug_str, "", "Class str to debug learning");
+
+  /* intmatcher.cpp **********************************************************/
+  INT_VAR_H(classify_class_pruner_threshold, 229,
+            "Class Pruner Threshold 0-255");
+  INT_VAR_H(classify_class_pruner_multiplier, 15,
+            "Class Pruner Multiplier 0-255:       ");
+  INT_VAR_H(classify_cp_cutoff_strength, 7,
+            "Class Pruner CutoffStrength:         ");
+  INT_VAR_H(classify_integer_matcher_multiplier, 10,
+            "Integer Matcher Multiplier  0-255:   ");
+
+  BOOL_VAR_H(classify_bln_numeric_mode, 0,
+             "Assume the input is numbers [0-9].");
+  double_VAR_H(speckle_large_max_size, 0.30, "Max large speckle size");
+  double_VAR_H(speckle_rating_penalty, 10.0,
+               "Penalty to add to worst rating for noise");
+
+  // Use class variables to hold onto built-in templates and adapted templates.
+  INT_TEMPLATES PreTrainedTemplates = nullptr;
+  ADAPT_TEMPLATES AdaptedTemplates = nullptr;
+  // The backup adapted templates are created from the previous page (only)
+  // so they are always ready and reasonably well trained if the primary
+  // adapted templates become full.
+  ADAPT_TEMPLATES BackupAdaptedTemplates = nullptr;
+
+  // Create dummy proto and config masks for use with the built-in templates.
+  BIT_VECTOR AllProtosOn = nullptr;
+  BIT_VECTOR AllConfigsOn = nullptr;
+  BIT_VECTOR AllConfigsOff = nullptr;
+  BIT_VECTOR TempProtoMask = nullptr;
+  /* normmatch.cpp */
+  NORM_PROTOS* NormProtos = nullptr;
+  /* font detection ***********************************************************/
+  UnicityTable<FontInfo> fontinfo_table_;
+  // Without shape training, each class_id, config pair represents a single
+  // unichar id/font combination, so each fontset_table_ entry holds font ids
+  // for each config in the class.
+  // With shape training, each class_id, config pair represents a single
+  // shape_table_ index, so the fontset_table_ stores the shape_table_ index,
+  // and the shape_table_ must be consulted to obtain the actual unichar_id/
+  // font combinations that the shape represents.
+  UnicityTable<FontSet> fontset_table_;
+
+ protected:
+  IntegerMatcher im_;
+  FEATURE_DEFS_STRUCT feature_defs_;
+  // If a shape_table_ is present, it is used to remap classifier output in
+  // ExpandShapesAndApplyCorrections. font_ids referenced by configs actually
+  // mean an index to the shape_table_ and the choices returned are *all* the
+  // shape_table_ entries at that index.
+  ShapeTable* shape_table_ = nullptr;
+
+ private:
+  // The currently active static classifier.
+  ShapeClassifier* static_classifier_ = nullptr;
+  ScrollView* learn_debug_win_ = nullptr;
+  ScrollView* learn_fragmented_word_debug_win_ = nullptr;
+  ScrollView* learn_fragments_debug_win_ = nullptr;
+
+  // Training data gathered here for all the images in a document.
+  STRING tr_file_data_;
+
+  Dict dict_;
+
+  GenericVector<uint16_t> shapetable_cutoffs_;
+
+  /* variables used to hold performance statistics */
+  int NumAdaptationsFailed = 0;
+
+  // Expected number of features in the class pruner, used to penalize
+  // unknowns that have too few features (like a c being classified as e) so
+  // it doesn't recognize everything as '@' or '#'.
+  // CharNormCutoffs is for the static classifier (with no shapetable).
+  // BaselineCutoffs gets a copy of CharNormCutoffs as an estimate of the real
+  // value in the adaptive classifier. Both are indexed by unichar_id.
+  // shapetable_cutoffs_ provides a similar value for each shape in the
+  // shape_table_
+  uint16_t CharNormCutoffs[MAX_NUM_CLASSES];
+  uint16_t BaselineCutoffs[MAX_NUM_CLASSES];
+
+ public:
+  bool EnableLearning = true;
+};
+
+} // namespace tesseract
+
+#endif  // DISABLED_LEGACY_ENGINE
+
+#endif  // TESSERACT_CLASSIFY_CLASSIFY_H_
diff --git a/tesseract/src/classify/cluster.cpp b/tesseract/src/classify/cluster.cpp
new file mode 100644
index 00000000..25b2776d
--- /dev/null
+++ b/tesseract/src/classify/cluster.cpp
@@ -0,0 +1,2425 @@
+/******************************************************************************
+ ** Filename: cluster.cpp
+ ** Purpose:  Routines for clustering points in N-D space
+ ** Author:   Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *****************************************************************************/
+
+#define _USE_MATH_DEFINES // for M_PI
+
+#include "cluster.h"
+
+#include "genericheap.h"
+#include "kdpair.h"
+#include "matrix.h"
+#include "tprintf.h"
+
+#include "helpers.h"
+
+#include <cfloat>       // for FLT_MAX
+#include <cmath>        // for M_PI
+#include <vector>       // for std::vector
+
+namespace tesseract {
+
+#define HOTELLING 1  // If true use Hotelling's test to decide where to split.
+#define FTABLE_X 10  // Size of FTable.
+#define FTABLE_Y 100  // Size of FTable.
+
+// Table of values approximating the cumulative F-distribution for a confidence of 1%.
+const double FTable[FTABLE_Y][FTABLE_X] = {
+ {4052.19, 4999.52, 5403.34, 5624.62, 5763.65, 5858.97, 5928.33, 5981.10, 6022.50, 6055.85,},
+  {98.502,  99.000,  99.166,  99.249,  99.300,  99.333,  99.356,  99.374,  99.388,  99.399,},
+  {34.116,  30.816,  29.457,  28.710,  28.237,  27.911,  27.672,  27.489,  27.345,  27.229,},
+  {21.198,  18.000,  16.694,  15.977,  15.522,  15.207,  14.976,  14.799,  14.659,  14.546,},
+  {16.258,  13.274,  12.060,  11.392,  10.967,  10.672,  10.456,  10.289,  10.158,  10.051,},
+  {13.745,  10.925,   9.780,   9.148,   8.746,   8.466,   8.260,   8.102,   7.976,   7.874,},
+  {12.246,   9.547,   8.451,   7.847,   7.460,   7.191,   6.993,   6.840,   6.719,   6.620,},
+  {11.259,   8.649,   7.591,   7.006,   6.632,   6.371,   6.178,   6.029,   5.911,   5.814,},
+  {10.561,   8.022,   6.992,   6.422,   6.057,   5.802,   5.613,   5.467,   5.351,   5.257,},
+  {10.044,   7.559,   6.552,   5.994,   5.636,   5.386,   5.200,   5.057,   4.942,   4.849,},
+  { 9.646,   7.206,   6.217,   5.668,   5.316,   5.069,   4.886,   4.744,   4.632,   4.539,},
+  { 9.330,   6.927,   5.953,   5.412,   5.064,   4.821,   4.640,   4.499,   4.388,   4.296,},
+  { 9.074,   6.701,   5.739,   5.205,   4.862,   4.620,   4.441,   4.302,   4.191,   4.100,},
+  { 8.862,   6.515,   5.564,   5.035,   4.695,   4.456,   4.278,   4.140,   4.030,   3.939,},
+  { 8.683,   6.359,   5.417,   4.893,   4.556,   4.318,   4.142,   4.004,   3.895,   3.805,},
+  { 8.531,   6.226,   5.292,   4.773,   4.437,   4.202,   4.026,   3.890,   3.780,   3.691,},
+  { 8.400,   6.112,   5.185,   4.669,   4.336,   4.102,   3.927,   3.791,   3.682,   3.593,},
+  { 8.285,   6.013,   5.092,   4.579,   4.248,   4.015,   3.841,   3.705,   3.597,   3.508,},
+  { 8.185,   5.926,   5.010,   4.500,   4.171,   3.939,   3.765,   3.631,   3.523,   3.434,},
+  { 8.096,   5.849,   4.938,   4.431,   4.103,   3.871,   3.699,   3.564,   3.457,   3.368,},
+  { 8.017,   5.780,   4.874,   4.369,   4.042,   3.812,   3.640,   3.506,   3.398,   3.310,},
+  { 7.945,   5.719,   4.817,   4.313,   3.988,   3.758,   3.587,   3.453,   3.346,   3.258,},
+  { 7.881,   5.664,   4.765,   4.264,   3.939,   3.710,   3.539,   3.406,   3.299,   3.211,},
+  { 7.823,   5.614,   4.718,   4.218,   3.895,   3.667,   3.496,   3.363,   3.256,   3.168,},
+  { 7.770,   5.568,   4.675,   4.177,   3.855,   3.627,   3.457,   3.324,   3.217,   3.129,},
+  { 7.721,   5.526,   4.637,   4.140,   3.818,   3.591,   3.421,   3.288,   3.182,   3.094,},
+  { 7.677,   5.488,   4.601,   4.106,   3.785,   3.558,   3.388,   3.256,   3.149,   3.062,},
+  { 7.636,   5.453,   4.568,   4.074,   3.754,   3.528,   3.358,   3.226,   3.120,   3.032,},
+  { 7.598,   5.420,   4.538,   4.045,   3.725,   3.499,   3.330,   3.198,   3.092,   3.005,},
+  { 7.562,   5.390,   4.510,   4.018,   3.699,   3.473,   3.305,   3.173,   3.067,   2.979,},
+  { 7.530,   5.362,   4.484,   3.993,   3.675,   3.449,   3.281,   3.149,   3.043,   2.955,},
+  { 7.499,   5.336,   4.459,   3.969,   3.652,   3.427,   3.258,   3.127,   3.021,   2.934,},
+  { 7.471,   5.312,   4.437,   3.948,   3.630,   3.406,   3.238,   3.106,   3.000,   2.913,},
+  { 7.444,   5.289,   4.416,   3.927,   3.611,   3.386,   3.218,   3.087,   2.981,   2.894,},
+  { 7.419,   5.268,   4.396,   3.908,   3.592,   3.368,   3.200,   3.069,   2.963,   2.876,},
+  { 7.396,   5.248,   4.377,   3.890,   3.574,   3.351,   3.183,   3.052,   2.946,   2.859,},
+  { 7.373,   5.229,   4.360,   3.873,   3.558,   3.334,   3.167,   3.036,   2.930,   2.843,},
+  { 7.353,   5.211,   4.343,   3.858,   3.542,   3.319,   3.152,   3.021,   2.915,   2.828,},
+  { 7.333,   5.194,   4.327,   3.843,   3.528,   3.305,   3.137,   3.006,   2.901,   2.814,},
+  { 7.314,   5.179,   4.313,   3.828,   3.514,   3.291,   3.124,   2.993,   2.888,   2.801,},
+  { 7.296,   5.163,   4.299,   3.815,   3.501,   3.278,   3.111,   2.980,   2.875,   2.788,},
+  { 7.280,   5.149,   4.285,   3.802,   3.488,   3.266,   3.099,   2.968,   2.863,   2.776,},
+  { 7.264,   5.136,   4.273,   3.790,   3.476,   3.254,   3.087,   2.957,   2.851,   2.764,},
+  { 7.248,   5.123,   4.261,   3.778,   3.465,   3.243,   3.076,   2.946,   2.840,   2.754,},
+  { 7.234,   5.110,   4.249,   3.767,   3.454,   3.232,   3.066,   2.935,   2.830,   2.743,},
+  { 7.220,   5.099,   4.238,   3.757,   3.444,   3.222,   3.056,   2.925,   2.820,   2.733,},
+  { 7.207,   5.087,   4.228,   3.747,   3.434,   3.213,   3.046,   2.916,   2.811,   2.724,},
+  { 7.194,   5.077,   4.218,   3.737,   3.425,   3.204,   3.037,   2.907,   2.802,   2.715,},
+  { 7.182,   5.066,   4.208,   3.728,   3.416,   3.195,   3.028,   2.898,   2.793,   2.706,},
+  { 7.171,   5.057,   4.199,   3.720,   3.408,   3.186,   3.020,   2.890,   2.785,   2.698,},
+  { 7.159,   5.047,   4.191,   3.711,   3.400,   3.178,   3.012,   2.882,   2.777,   2.690,},
+  { 7.149,   5.038,   4.182,   3.703,   3.392,   3.171,   3.005,   2.874,   2.769,   2.683,},
+  { 7.139,   5.030,   4.174,   3.695,   3.384,   3.163,   2.997,   2.867,   2.762,   2.675,},
+  { 7.129,   5.021,   4.167,   3.688,   3.377,   3.156,   2.990,   2.860,   2.755,   2.668,},
+  { 7.119,   5.013,   4.159,   3.681,   3.370,   3.149,   2.983,   2.853,   2.748,   2.662,},
+  { 7.110,   5.006,   4.152,   3.674,   3.363,   3.143,   2.977,   2.847,   2.742,   2.655,},
+  { 7.102,   4.998,   4.145,   3.667,   3.357,   3.136,   2.971,   2.841,   2.736,   2.649,},
+  { 7.093,   4.991,   4.138,   3.661,   3.351,   3.130,   2.965,   2.835,   2.730,   2.643,},
+  { 7.085,   4.984,   4.132,   3.655,   3.345,   3.124,   2.959,   2.829,   2.724,   2.637,},
+  { 7.077,   4.977,   4.126,   3.649,   3.339,   3.119,   2.953,   2.823,   2.718,   2.632,},
+  { 7.070,   4.971,   4.120,   3.643,   3.333,   3.113,   2.948,   2.818,   2.713,   2.626,},
+  { 7.062,   4.965,   4.114,   3.638,   3.328,   3.108,   2.942,   2.813,   2.708,   2.621,},
+  { 7.055,   4.959,   4.109,   3.632,   3.323,   3.103,   2.937,   2.808,   2.703,   2.616,},
+  { 7.048,   4.953,   4.103,   3.627,   3.318,   3.098,   2.932,   2.803,   2.698,   2.611,},
+  { 7.042,   4.947,   4.098,   3.622,   3.313,   3.093,   2.928,   2.798,   2.693,   2.607,},
+  { 7.035,   4.942,   4.093,   3.618,   3.308,   3.088,   2.923,   2.793,   2.689,   2.602,},
+  { 7.029,   4.937,   4.088,   3.613,   3.304,   3.084,   2.919,   2.789,   2.684,   2.598,},
+  { 7.023,   4.932,   4.083,   3.608,   3.299,   3.080,   2.914,   2.785,   2.680,   2.593,},
+  { 7.017,   4.927,   4.079,   3.604,   3.295,   3.075,   2.910,   2.781,   2.676,   2.589,},
+  { 7.011,   4.922,   4.074,   3.600,   3.291,   3.071,   2.906,   2.777,   2.672,   2.585,},
+  { 7.006,   4.917,   4.070,   3.596,   3.287,   3.067,   2.902,   2.773,   2.668,   2.581,},
+  { 7.001,   4.913,   4.066,   3.591,   3.283,   3.063,   2.898,   2.769,   2.664,   2.578,},
+  { 6.995,   4.908,   4.062,   3.588,   3.279,   3.060,   2.895,   2.765,   2.660,   2.574,},
+  { 6.990,   4.904,   4.058,   3.584,   3.275,   3.056,   2.891,   2.762,   2.657,   2.570,},
+  { 6.985,   4.900,   4.054,   3.580,   3.272,   3.052,   2.887,   2.758,   2.653,   2.567,},
+  { 6.981,   4.896,   4.050,   3.577,   3.268,   3.049,   2.884,   2.755,   2.650,   2.563,},
+  { 6.976,   4.892,   4.047,   3.573,   3.265,   3.046,   2.881,   2.751,   2.647,   2.560,},
+  { 6.971,   4.888,   4.043,   3.570,   3.261,   3.042,   2.877,   2.748,   2.644,   2.557,},
+  { 6.967,   4.884,   4.040,   3.566,   3.258,   3.039,   2.874,   2.745,   2.640,   2.554,},
+  { 6.963,   4.881,   4.036,   3.563,   3.255,   3.036,   2.871,   2.742,   2.637,   2.551,},
+  { 6.958,   4.877,   4.033,   3.560,   3.252,   3.033,   2.868,   2.739,   2.634,   2.548,},
+  { 6.954,   4.874,   4.030,   3.557,   3.249,   3.030,   2.865,   2.736,   2.632,   2.545,},
+  { 6.950,   4.870,   4.027,   3.554,   3.246,   3.027,   2.863,   2.733,   2.629,   2.542,},
+  { 6.947,   4.867,   4.024,   3.551,   3.243,   3.025,   2.860,   2.731,   2.626,   2.539,},
+  { 6.943,   4.864,   4.021,   3.548,   3.240,   3.022,   2.857,   2.728,   2.623,   2.537,},
+  { 6.939,   4.861,   4.018,   3.545,   3.238,   3.019,   2.854,   2.725,   2.621,   2.534,},
+  { 6.935,   4.858,   4.015,   3.543,   3.235,   3.017,   2.852,   2.723,   2.618,   2.532,},
+  { 6.932,   4.855,   4.012,   3.540,   3.233,   3.014,   2.849,   2.720,   2.616,   2.529,},
+  { 6.928,   4.852,   4.010,   3.538,   3.230,   3.012,   2.847,   2.718,   2.613,   2.527,},
+  { 6.925,   4.849,   4.007,   3.535,   3.228,   3.009,   2.845,   2.715,   2.611,   2.524,},
+  { 6.922,   4.846,   4.004,   3.533,   3.225,   3.007,   2.842,   2.713,   2.609,   2.522,},
+  { 6.919,   4.844,   4.002,   3.530,   3.223,   3.004,   2.840,   2.711,   2.606,   2.520,},
+  { 6.915,   4.841,   3.999,   3.528,   3.221,   3.002,   2.838,   2.709,   2.604,   2.518,},
+  { 6.912,   4.838,   3.997,   3.525,   3.218,   3.000,   2.835,   2.706,   2.602,   2.515,},
+  { 6.909,   4.836,   3.995,   3.523,   3.216,   2.998,   2.833,   2.704,   2.600,   2.513,},
+  { 6.906,   4.833,   3.992,   3.521,   3.214,   2.996,   2.831,   2.702,   2.598,   2.511,},
+  { 6.904,   4.831,   3.990,   3.519,   3.212,   2.994,   2.829,   2.700,   2.596,   2.509,},
+  { 6.901,   4.829,   3.988,   3.517,   3.210,   2.992,   2.827,   2.698,   2.594,   2.507,},
+  { 6.898,   4.826,   3.986,   3.515,   3.208,   2.990,   2.825,   2.696,   2.592,   2.505,},
+  { 6.895,   4.824,   3.984,   3.513,   3.206,   2.988,   2.823,   2.694,   2.590,   2.503}
+};
+
+/** define the variance which will be used as a minimum variance for any
+  dimension of any feature. Since most features are calculated from numbers
+  with a precision no better than 1 in 128, the variance should never be
+  less than the square of this number for parameters whose range is 1. */
+#define MINVARIANCE     0.0004
+
+/** define the absolute minimum number of samples which must be present in
+  order to accurately test hypotheses about underlying probability
+  distributions.  Define separately the minimum samples that are needed
+  before a statistical analysis is attempted; this number should be
+  equal to MINSAMPLES but can be set to a lower number for early testing
+  when very few samples are available. */
+#define MINSAMPLESPERBUCKET 5
+#define MINSAMPLES    (MINBUCKETS * MINSAMPLESPERBUCKET)
+#define MINSAMPLESNEEDED  1
+
+/** define the size of the table which maps normalized samples to
+  histogram buckets.  Also define the number of standard deviations
+  in a normal distribution which are considered to be significant.
+  The mapping table will be defined in such a way that it covers
+  the specified number of standard deviations on either side of
+  the mean.  BUCKETTABLESIZE should always be even. */
+#define BUCKETTABLESIZE   1024
+#define NORMALEXTENT    3.0
+
+struct TEMPCLUSTER {
+  CLUSTER *Cluster;
+  CLUSTER *Neighbor;
+};
+
+using ClusterPair = tesseract::KDPairInc<float, TEMPCLUSTER*>;
+using ClusterHeap = tesseract::GenericHeap<ClusterPair>;
+
+struct STATISTICS {
+  float AvgVariance;
+  float *CoVariance;
+  float *Min;                  // largest negative distance from the mean
+  float *Max;                  // largest positive distance from the mean
+};
+
+struct BUCKETS {
+  DISTRIBUTION Distribution;        // distribution being tested for
+  uint32_t SampleCount;             // # of samples in histogram
+  double Confidence;                // confidence level of test
+  double ChiSquared;                // test threshold
+  uint16_t NumberOfBuckets;         // number of cells in histogram
+  uint16_t Bucket[BUCKETTABLESIZE]; // mapping to histogram buckets
+  uint32_t *Count;                  // frequency of occurrence histogram
+  float *ExpectedCount;             // expected histogram
+};
+
+struct CHISTRUCT{
+  uint16_t DegreesOfFreedom;
+  double Alpha;
+  double ChiSquared;
+};
+
+// For use with KDWalk / MakePotentialClusters
+struct ClusteringContext {
+  ClusterHeap *heap;  // heap used to hold temp clusters, "best" on top
+  TEMPCLUSTER *candidates;  // array of potential clusters
+  KDTREE *tree;  // kd-tree to be searched for neighbors
+  int32_t next;  // next candidate to be used
+};
+
+using DENSITYFUNC = double (*)(int32_t);
+using SOLVEFUNC = double (*)(CHISTRUCT*, double);
+
+#define Odd(N) ((N)%2)
+#define Mirror(N,R) ((R) - (N) - 1)
+#define Abs(N) (((N) < 0) ? (-(N)) : (N))
+
+//--------------Global Data Definitions and Declarations----------------------
+/** the following variables describe a discrete normal distribution
+  which is used by NormalDensity() and NormalBucket().  The
+  constant NORMALEXTENT determines how many standard
+  deviations of the distribution are mapped onto the fixed
+  discrete range of x.  x=0 is mapped to -NORMALEXTENT standard
+  deviations and x=BUCKETTABLESIZE is mapped to
+  +NORMALEXTENT standard deviations. */
+#define SqrtOf2Pi     2.506628275
+static const double kNormalStdDev = BUCKETTABLESIZE / (2.0 * NORMALEXTENT);
+static const double kNormalVariance =
+    (BUCKETTABLESIZE * BUCKETTABLESIZE) / (4.0 * NORMALEXTENT * NORMALEXTENT);
+static const double kNormalMagnitude =
+    (2.0 * NORMALEXTENT) / (SqrtOf2Pi * BUCKETTABLESIZE);
+static const double kNormalMean = BUCKETTABLESIZE / 2;
+
+/** define lookup tables used to compute the number of histogram buckets
+  that should be used for a given number of samples. */
+#define LOOKUPTABLESIZE   8
+#define MAXDEGREESOFFREEDOM MAXBUCKETS
+
+static const uint32_t kCountTable[LOOKUPTABLESIZE] = {
+  MINSAMPLES, 200, 400, 600, 800, 1000, 1500, 2000
+};  // number of samples
+
+static const uint16_t kBucketsTable[LOOKUPTABLESIZE] = {
+  MINBUCKETS, 16, 20, 24, 27, 30, 35, MAXBUCKETS
+};  // number of buckets
+
+/*-------------------------------------------------------------------------
+          Private Function Prototypes
+--------------------------------------------------------------------------*/
+static void CreateClusterTree(CLUSTERER* Clusterer);
+
+static void MakePotentialClusters(ClusteringContext* context, CLUSTER* Cluster,
+                                  int32_t Level);
+
+static CLUSTER* FindNearestNeighbor(KDTREE*Tree, CLUSTER* Cluster,
+                                    float* Distance);
+
+static CLUSTER* MakeNewCluster(CLUSTERER* Clusterer, TEMPCLUSTER* TempCluster);
+
+static void ComputePrototypes(CLUSTERER* Clusterer, CLUSTERCONFIG* Config);
+
+static PROTOTYPE* MakePrototype(CLUSTERER* Clusterer, CLUSTERCONFIG* Config,
+                                CLUSTER* Cluster);
+
+static PROTOTYPE* MakeDegenerateProto(uint16_t N,
+                                      CLUSTER* Cluster, STATISTICS* Statistics,
+                                      PROTOSTYLE Style, int32_t MinSamples);
+
+static PROTOTYPE* TestEllipticalProto(CLUSTERER* Clusterer,
+                                      CLUSTERCONFIG* Config, CLUSTER* Cluster,
+                                      STATISTICS* Statistics);
+
+static PROTOTYPE* MakeSphericalProto(CLUSTERER* Clusterer,
+                                     CLUSTER* Cluster, STATISTICS* Statistics,
+                                     BUCKETS* Buckets);
+
+static PROTOTYPE* MakeEllipticalProto(CLUSTERER* Clusterer,
+                                      CLUSTER* Cluster, STATISTICS* Statistics,
+                                      BUCKETS* Buckets);
+
+static PROTOTYPE* MakeMixedProto(CLUSTERER* Clusterer,
+                                 CLUSTER* Cluster, STATISTICS* Statistics,
+                                 BUCKETS* NormalBuckets, double Confidence);
+
+static void MakeDimRandom(uint16_t i, PROTOTYPE* Proto, PARAM_DESC* ParamDesc);
+
+static void MakeDimUniform(uint16_t i, PROTOTYPE* Proto, STATISTICS* Statistics);
+
+static STATISTICS* ComputeStatistics(int16_t N, PARAM_DESC ParamDesc[],
+                                     CLUSTER* Cluster);
+
+static PROTOTYPE* NewSphericalProto(uint16_t N, CLUSTER* Cluster,
+                                    STATISTICS* Statistics);
+
+static PROTOTYPE* NewEllipticalProto(int16_t N, CLUSTER* Cluster,
+                                     STATISTICS* Statistics);
+
+static PROTOTYPE* NewMixedProto(int16_t N, CLUSTER *Cluster, STATISTICS *Statistics);
+
+static PROTOTYPE* NewSimpleProto(int16_t N, CLUSTER *Cluster);
+
+static bool Independent(PARAM_DESC* ParamDesc,
+                 int16_t N, float* CoVariance, float Independence);
+
+static BUCKETS *GetBuckets(CLUSTERER* clusterer,
+                    DISTRIBUTION Distribution,
+                    uint32_t SampleCount,
+                    double Confidence);
+
+static BUCKETS *MakeBuckets(DISTRIBUTION Distribution,
+                     uint32_t SampleCount,
+                     double Confidence);
+
+static uint16_t OptimumNumberOfBuckets(uint32_t SampleCount);
+
+static double ComputeChiSquared(uint16_t DegreesOfFreedom, double Alpha);
+
+static double NormalDensity(int32_t x);
+
+static double UniformDensity(int32_t x);
+
+static double Integral(double f1, double f2, double Dx);
+
+static void FillBuckets(BUCKETS *Buckets,
+                 CLUSTER *Cluster,
+                 uint16_t Dim,
+                 PARAM_DESC *ParamDesc,
+                 float Mean,
+                 float StdDev);
+
+static uint16_t NormalBucket(PARAM_DESC *ParamDesc,
+                    float x,
+                    float Mean,
+                    float StdDev);
+
+static uint16_t UniformBucket(PARAM_DESC *ParamDesc,
+                     float x,
+                     float Mean,
+                     float StdDev);
+
+static bool DistributionOK(BUCKETS* Buckets);
+
+static void FreeStatistics(STATISTICS *Statistics);
+
+static void FreeBuckets(BUCKETS *Buckets);
+
+static void FreeCluster(CLUSTER *Cluster);
+
+static uint16_t DegreesOfFreedom(DISTRIBUTION Distribution, uint16_t HistogramBuckets);
+
+static void AdjustBuckets(BUCKETS *Buckets, uint32_t NewSampleCount);
+
+static void InitBuckets(BUCKETS *Buckets);
+
+static int AlphaMatch(void *arg1,   // CHISTRUCT *ChiStruct,
+               void *arg2);  // CHISTRUCT *SearchKey);
+
+static CHISTRUCT *NewChiStruct(uint16_t DegreesOfFreedom, double Alpha);
+
+static double Solve(SOLVEFUNC Function,
+             void *FunctionParams,
+             double InitialGuess,
+             double Accuracy);
+
+static double ChiArea(CHISTRUCT *ChiParams, double x);
+
+static bool MultipleCharSamples(CLUSTERER* Clusterer,
+                         CLUSTER* Cluster,
+                         float MaxIllegal);
+
+static double InvertMatrix(const float* input, int size, float* inv);
+
+//--------------------------Public Code--------------------------------------
+/**
+ * This routine creates a new clusterer data structure,
+ * initializes it, and returns a pointer to it.
+ *
+ * @param SampleSize  number of dimensions in feature space
+ * @param ParamDesc description of each dimension
+ * @return pointer to the new clusterer data structure
+ */
+CLUSTERER *
+MakeClusterer (int16_t SampleSize, const PARAM_DESC ParamDesc[]) {
+  CLUSTERER *Clusterer;
+  int i;
+
+  // allocate main clusterer data structure and init simple fields
+  Clusterer = static_cast<CLUSTERER *>(malloc (sizeof (CLUSTERER)));
+  Clusterer->SampleSize = SampleSize;
+  Clusterer->NumberOfSamples = 0;
+  Clusterer->NumChar = 0;
+
+  // init fields which will not be used initially
+  Clusterer->Root = nullptr;
+  Clusterer->ProtoList = NIL_LIST;
+
+  // maintain a copy of param descriptors in the clusterer data structure
+  Clusterer->ParamDesc =
+    static_cast<PARAM_DESC *>(malloc (SampleSize * sizeof (PARAM_DESC)));
+  for (i = 0; i < SampleSize; i++) {
+    Clusterer->ParamDesc[i].Circular = ParamDesc[i].Circular;
+    Clusterer->ParamDesc[i].NonEssential = ParamDesc[i].NonEssential;
+    Clusterer->ParamDesc[i].Min = ParamDesc[i].Min;
+    Clusterer->ParamDesc[i].Max = ParamDesc[i].Max;
+    Clusterer->ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
+    Clusterer->ParamDesc[i].HalfRange = Clusterer->ParamDesc[i].Range / 2;
+    Clusterer->ParamDesc[i].MidRange =
+      (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
+  }
+
+  // allocate a kd tree to hold the samples
+  Clusterer->KDTree = MakeKDTree (SampleSize, ParamDesc);
+
+  // Initialize cache of histogram buckets to minimize recomputing them.
+  for (auto & d : Clusterer->bucket_cache) {
+    for (auto & c : d)
+      c = nullptr;
+  }
+
+  return Clusterer;
+}                                // MakeClusterer
+
+/**
+ * This routine creates a new sample data structure to hold
+ * the specified feature.  This sample is added to the clusterer
+ * data structure (so that it knows which samples are to be
+ * clustered later), and a pointer to the sample is returned to
+ * the caller.
+ *
+ * @param Clusterer clusterer data structure to add sample to
+ * @param Feature feature to be added to clusterer
+ * @param CharID  unique ident. of char that sample came from
+ *
+ * @return    Pointer to the new sample data structure
+ */
+SAMPLE* MakeSample(CLUSTERER * Clusterer, const float* Feature,
+                   int32_t CharID) {
+  SAMPLE *Sample;
+  int i;
+
+  // see if the samples have already been clustered - if so trap an error
+  // Can't add samples after they have been clustered.
+  ASSERT_HOST(Clusterer->Root == nullptr);
+
+  // allocate the new sample and initialize it
+  Sample = static_cast<SAMPLE *>(malloc (sizeof (SAMPLE) +
+    (Clusterer->SampleSize -
+    1) * sizeof (float)));
+  Sample->Clustered = false;
+  Sample->Prototype = false;
+  Sample->SampleCount = 1;
+  Sample->Left = nullptr;
+  Sample->Right = nullptr;
+  Sample->CharID = CharID;
+
+  for (i = 0; i < Clusterer->SampleSize; i++)
+    Sample->Mean[i] = Feature[i];
+
+  // add the sample to the KD tree - keep track of the total # of samples
+  Clusterer->NumberOfSamples++;
+  KDStore(Clusterer->KDTree, Sample->Mean, Sample);
+  if (CharID >= Clusterer->NumChar)
+    Clusterer->NumChar = CharID + 1;
+
+  // execute hook for monitoring clustering operation
+  // (*SampleCreationHook)(Sample);
+
+  return (Sample);
+}                                // MakeSample
+
+/**
+ * This routine first checks to see if the samples in this
+ * clusterer have already been clustered before; if so, it does
+ * not bother to recreate the cluster tree.  It simply recomputes
+ * the prototypes based on the new Config info.
+ *
+ * If the samples have not been clustered before, the
+ * samples in the KD tree are formed into a cluster tree and then
+ * the prototypes are computed from the cluster tree.
+ *
+ * In either case this routine returns a pointer to a
+ * list of prototypes that best represent the samples given
+ * the constraints specified in Config.
+ *
+ * @param Clusterer data struct containing samples to be clustered
+ * @param Config  parameters which control clustering process
+ *
+ * @return Pointer to a list of prototypes
+ */
+LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) {
+  //only create cluster tree if samples have never been clustered before
+  if (Clusterer->Root == nullptr)
+    CreateClusterTree(Clusterer);
+
+  //deallocate the old prototype list if one exists
+  FreeProtoList (&Clusterer->ProtoList);
+  Clusterer->ProtoList = NIL_LIST;
+
+  //compute prototypes starting at the root node in the tree
+  ComputePrototypes(Clusterer, Config);
+  // We don't need the cluster pointers in the protos any more, so null them
+  // out, which makes it safe to delete the clusterer.
+  LIST proto_list = Clusterer->ProtoList;
+  iterate(proto_list) {
+    auto *proto = reinterpret_cast<PROTOTYPE *>(first_node(proto_list));
+    proto->Cluster = nullptr;
+  }
+  return Clusterer->ProtoList;
+}                                // ClusterSamples
+
+/**
+ * This routine frees all of the memory allocated to the
+ * specified data structure.  It will not, however, free
+ * the memory used by the prototype list.  The pointers to
+ * the clusters for each prototype in the list will be set
+ * to nullptr to indicate that the cluster data structures no
+ * longer exist.  Any sample lists that have been obtained
+ * via calls to GetSamples are no longer valid.
+ * @param Clusterer pointer to data structure to be freed
+ */
+void FreeClusterer(CLUSTERER *Clusterer) {
+  if (Clusterer != nullptr) {
+    free(Clusterer->ParamDesc);
+    if (Clusterer->KDTree != nullptr)
+      FreeKDTree (Clusterer->KDTree);
+    if (Clusterer->Root != nullptr)
+      FreeCluster (Clusterer->Root);
+    // Free up all used buckets structures.
+    for (auto & d : Clusterer->bucket_cache) {
+      for (auto & c : d)
+        if (c != nullptr)
+          FreeBuckets(c);
+    }
+
+    free(Clusterer);
+  }
+}                                // FreeClusterer
+
+/**
+ * This routine frees all of the memory allocated to the
+ * specified list of prototypes.  The clusters which are
+ * pointed to by the prototypes are not freed.
+ * @param ProtoList pointer to list of prototypes to be freed
+ */
+void FreeProtoList(LIST *ProtoList) {
+  destroy_nodes(*ProtoList, FreePrototype);
+}                                // FreeProtoList
+
+/**
+ * This routine deallocates the memory consumed by the specified
+ * prototype and modifies the corresponding cluster so that it
+ * is no longer marked as a prototype.  The cluster is NOT
+ * deallocated by this routine.
+ * @param arg prototype data structure to be deallocated
+ */
+void FreePrototype(void *arg) {  //PROTOTYPE     *Prototype)
+  auto *Prototype = static_cast<PROTOTYPE *>(arg);
+
+  // unmark the corresponding cluster (if there is one
+  if (Prototype->Cluster != nullptr)
+    Prototype->Cluster->Prototype = false;
+
+  // deallocate the prototype statistics and then the prototype itself
+  free(Prototype->Distrib);
+  free(Prototype->Mean);
+  if (Prototype->Style != spherical) {
+    free(Prototype->Variance.Elliptical);
+    free(Prototype->Magnitude.Elliptical);
+    free(Prototype->Weight.Elliptical);
+  }
+  free(Prototype);
+}                                // FreePrototype
+
+/**
+ * This routine is used to find all of the samples which
+ * belong to a cluster.  It starts by removing the top
+ * cluster on the cluster list (SearchState).  If this cluster is
+ * a leaf it is returned.  Otherwise, the right subcluster
+ * is pushed on the list and we continue the search in the
+ * left subcluster.  This continues until a leaf is found.
+ * If all samples have been found, nullptr is returned.
+ * InitSampleSearch() must be called
+ * before NextSample() to initialize the search.
+ * @param SearchState ptr to list containing clusters to be searched
+ * @return  Pointer to the next leaf cluster (sample) or nullptr.
+ */
+CLUSTER *NextSample(LIST *SearchState) {
+  CLUSTER *Cluster;
+
+  if (*SearchState == NIL_LIST)
+    return (nullptr);
+  Cluster = reinterpret_cast<CLUSTER *>first_node (*SearchState);
+  *SearchState = pop (*SearchState);
+  for (;;) {
+    if (Cluster->Left == nullptr)
+      return (Cluster);
+    *SearchState = push (*SearchState, Cluster->Right);
+    Cluster = Cluster->Left;
+  }
+}                                // NextSample
+
+/**
+ * This routine returns the mean of the specified
+ * prototype in the indicated dimension.
+ * @param Proto prototype to return mean of
+ * @param Dimension dimension whose mean is to be returned
+ * @return  Mean of Prototype in Dimension
+ */
+float Mean(PROTOTYPE *Proto, uint16_t Dimension) {
+  return (Proto->Mean[Dimension]);
+}                                // Mean
+
+/**
+ * This routine returns the standard deviation of the
+ * prototype in the indicated dimension.
+ * @param Proto   prototype to return standard deviation of
+ * @param Dimension dimension whose stddev is to be returned
+ * @return  Standard deviation of Prototype in Dimension
+ */
+float StandardDeviation(PROTOTYPE *Proto, uint16_t Dimension) {
+  switch (Proto->Style) {
+    case spherical:
+      return (static_cast<float>(sqrt (static_cast<double>(Proto->Variance.Spherical))));
+    case elliptical:
+      return (static_cast<float>(sqrt (static_cast<double>(Proto->Variance.Elliptical[Dimension]))));
+    case mixed:
+      switch (Proto->Distrib[Dimension]) {
+        case normal:
+          return (static_cast<float>(sqrt (static_cast<double>(Proto->Variance.Elliptical[Dimension]))));
+        case uniform:
+        case D_random:
+          return (Proto->Variance.Elliptical[Dimension]);
+        case DISTRIBUTION_COUNT:
+          ASSERT_HOST(!"Distribution count not allowed!");
+      }
+  }
+  return 0.0f;
+}                                // StandardDeviation
+
+
+/*---------------------------------------------------------------------------
+            Private Code
+----------------------------------------------------------------------------*/
+/**
+ * This routine performs a bottoms-up clustering on the samples
+ * held in the kd-tree of the Clusterer data structure.  The
+ * result is a cluster tree.  Each node in the tree represents
+ * a cluster which conceptually contains a subset of the samples.
+ * More precisely, the cluster contains all of the samples which
+ * are contained in its two sub-clusters.  The leaves of the
+ * tree are the individual samples themselves; they have no
+ * sub-clusters.  The root node of the tree conceptually contains
+ * all of the samples.
+ * The Clusterer data structure is changed.
+ * @param Clusterer data structure holdings samples to be clustered
+ */
+static void CreateClusterTree(CLUSTERER *Clusterer) {
+  ClusteringContext context;
+  ClusterPair HeapEntry;
+  TEMPCLUSTER *PotentialCluster;
+
+  // each sample and its nearest neighbor form a "potential" cluster
+  // save these in a heap with the "best" potential clusters on top
+  context.tree = Clusterer->KDTree;
+  context.candidates = static_cast<TEMPCLUSTER *>(malloc(Clusterer->NumberOfSamples * sizeof(TEMPCLUSTER)));
+  context.next = 0;
+  context.heap = new ClusterHeap(Clusterer->NumberOfSamples);
+  KDWalk(context.tree, reinterpret_cast<void_proc>(MakePotentialClusters), &context);
+
+  // form potential clusters into actual clusters - always do "best" first
+  while (context.heap->Pop(&HeapEntry)) {
+    PotentialCluster = HeapEntry.data();
+
+    // if main cluster of potential cluster is already in another cluster
+    // then we don't need to worry about it
+    if (PotentialCluster->Cluster->Clustered) {
+      continue;
+    }
+
+    // if main cluster is not yet clustered, but its nearest neighbor is
+    // then we must find a new nearest neighbor
+    else if (PotentialCluster->Neighbor->Clustered) {
+      PotentialCluster->Neighbor =
+        FindNearestNeighbor(context.tree, PotentialCluster->Cluster,
+                            &HeapEntry.key());
+      if (PotentialCluster->Neighbor != nullptr) {
+        context.heap->Push(&HeapEntry);
+      }
+    }
+
+    // if neither cluster is already clustered, form permanent cluster
+    else {
+      PotentialCluster->Cluster =
+          MakeNewCluster(Clusterer, PotentialCluster);
+      PotentialCluster->Neighbor =
+          FindNearestNeighbor(context.tree, PotentialCluster->Cluster,
+                              &HeapEntry.key());
+      if (PotentialCluster->Neighbor != nullptr) {
+        context.heap->Push(&HeapEntry);
+      }
+    }
+  }
+
+  // the root node in the cluster tree is now the only node in the kd-tree
+  Clusterer->Root = static_cast<CLUSTER *>RootOf(Clusterer->KDTree);
+
+  // free up the memory used by the K-D tree, heap, and temp clusters
+  FreeKDTree(context.tree);
+  Clusterer->KDTree = nullptr;
+  delete context.heap;
+  free(context.candidates);
+}                                // CreateClusterTree
+
+/**
+ * This routine is designed to be used in concert with the
+ * KDWalk routine.  It will create a potential cluster for
+ * each sample in the kd-tree that is being walked.  This
+ * potential cluster will then be pushed on the heap.
+ * @param context  ClusteringContext (see definition above)
+ * @param Cluster  current cluster being visited in kd-tree walk
+ * @param Level  level of this cluster in the kd-tree
+ */
+static void MakePotentialClusters(ClusteringContext* context,
+                                  CLUSTER* Cluster, int32_t /*Level*/) {
+  ClusterPair HeapEntry;
+  int next = context->next;
+  context->candidates[next].Cluster = Cluster;
+  HeapEntry.data() = &(context->candidates[next]);
+  context->candidates[next].Neighbor =
+      FindNearestNeighbor(context->tree,
+                          context->candidates[next].Cluster,
+                          &HeapEntry.key());
+  if (context->candidates[next].Neighbor != nullptr) {
+    context->heap->Push(&HeapEntry);
+    context->next++;
+  }
+}                                // MakePotentialClusters
+
+/**
+ * This routine searches the specified kd-tree for the nearest
+ * neighbor of the specified cluster.  It actually uses the
+ * kd routines to find the 2 nearest neighbors since one of them
+ * will be the original cluster.  A pointer to the nearest
+ * neighbor is returned, if it can be found, otherwise nullptr is
+ * returned.  The distance between the 2 nodes is placed
+ * in the specified variable.
+ * @param Tree    kd-tree to search in for nearest neighbor
+ * @param Cluster cluster whose nearest neighbor is to be found
+ * @param Distance  ptr to variable to report distance found
+ * @return  Pointer to the nearest neighbor of Cluster, or nullptr
+ */
+static CLUSTER*
+FindNearestNeighbor(KDTREE* Tree, CLUSTER* Cluster, float* Distance)
+#define MAXNEIGHBORS  2
+#define MAXDISTANCE   FLT_MAX
+{
+  CLUSTER *Neighbor[MAXNEIGHBORS];
+  float Dist[MAXNEIGHBORS];
+  int NumberOfNeighbors;
+  int32_t i;
+  CLUSTER *BestNeighbor;
+
+  // find the 2 nearest neighbors of the cluster
+  KDNearestNeighborSearch(Tree, Cluster->Mean, MAXNEIGHBORS, MAXDISTANCE,
+                          &NumberOfNeighbors, reinterpret_cast<void **>(Neighbor), Dist);
+
+  // search for the nearest neighbor that is not the cluster itself
+  *Distance = MAXDISTANCE;
+  BestNeighbor = nullptr;
+  for (i = 0; i < NumberOfNeighbors; i++) {
+    if ((Dist[i] < *Distance) && (Neighbor[i] != Cluster)) {
+      *Distance = Dist[i];
+      BestNeighbor = Neighbor[i];
+    }
+  }
+  return BestNeighbor;
+}                                // FindNearestNeighbor
+
+/**
+ * This routine creates a new permanent cluster from the
+ * clusters specified in TempCluster.  The 2 clusters in
+ * TempCluster are marked as "clustered" and deleted from
+ * the kd-tree.  The new cluster is then added to the kd-tree.
+ * @param Clusterer current clustering environment
+ * @param TempCluster potential cluster to make permanent
+ * @return Pointer to the new permanent cluster
+ */
+static CLUSTER* MakeNewCluster(CLUSTERER* Clusterer,
+                               TEMPCLUSTER* TempCluster) {
+  CLUSTER *Cluster;
+
+  // allocate the new cluster and initialize it
+  Cluster = static_cast<CLUSTER *>(malloc(
+      sizeof(CLUSTER) + (Clusterer->SampleSize - 1) * sizeof(float)));
+  Cluster->Clustered = false;
+  Cluster->Prototype = false;
+  Cluster->Left = TempCluster->Cluster;
+  Cluster->Right = TempCluster->Neighbor;
+  Cluster->CharID = -1;
+
+  // mark the old clusters as "clustered" and delete them from the kd-tree
+  Cluster->Left->Clustered = true;
+  Cluster->Right->Clustered = true;
+  KDDelete(Clusterer->KDTree, Cluster->Left->Mean, Cluster->Left);
+  KDDelete(Clusterer->KDTree, Cluster->Right->Mean, Cluster->Right);
+
+  // compute the mean and sample count for the new cluster
+  Cluster->SampleCount =
+      MergeClusters(Clusterer->SampleSize, Clusterer->ParamDesc,
+                    Cluster->Left->SampleCount, Cluster->Right->SampleCount,
+                    Cluster->Mean, Cluster->Left->Mean, Cluster->Right->Mean);
+
+  // add the new cluster to the KD tree
+  KDStore(Clusterer->KDTree, Cluster->Mean, Cluster);
+  return Cluster;
+}                                // MakeNewCluster
+
+/**
+ * This routine merges two clusters into one larger cluster.
+ * To do this it computes the number of samples in the new
+ * cluster and the mean of the new cluster.  The ParamDesc
+ * information is used to ensure that circular dimensions
+ * are handled correctly.
+ * @param N # of dimensions (size of arrays)
+ * @param ParamDesc array of dimension descriptions
+ * @param n1, n2  number of samples in each old cluster
+ * @param m array to hold mean of new cluster
+ * @param m1, m2  arrays containing means of old clusters
+ * @return  The number of samples in the new cluster.
+ */
+int32_t MergeClusters(int16_t N,
+                    PARAM_DESC ParamDesc[],
+                    int32_t n1,
+                    int32_t n2,
+                    float m[],
+                    float m1[], float m2[]) {
+  int32_t i, n;
+
+  n = n1 + n2;
+  for (i = N; i > 0; i--, ParamDesc++, m++, m1++, m2++) {
+    if (ParamDesc->Circular) {
+      // if distance between means is greater than allowed
+      // reduce upper point by one "rotation" to compute mean
+      // then normalize the mean back into the accepted range
+      if ((*m2 - *m1) > ParamDesc->HalfRange) {
+        *m = (n1 * *m1 + n2 * (*m2 - ParamDesc->Range)) / n;
+        if (*m < ParamDesc->Min)
+          *m += ParamDesc->Range;
+      }
+      else if ((*m1 - *m2) > ParamDesc->HalfRange) {
+        *m = (n1 * (*m1 - ParamDesc->Range) + n2 * *m2) / n;
+        if (*m < ParamDesc->Min)
+          *m += ParamDesc->Range;
+      }
+      else
+        *m = (n1 * *m1 + n2 * *m2) / n;
+    }
+    else
+      *m = (n1 * *m1 + n2 * *m2) / n;
+  }
+  return n;
+}                                // MergeClusters
+
+/**
+ * This routine decides which clusters in the cluster tree
+ * should be represented by prototypes, forms a list of these
+ * prototypes, and places the list in the Clusterer data
+ * structure.
+ * @param Clusterer data structure holding cluster tree
+ * @param Config    parameters used to control prototype generation
+ */
+static void ComputePrototypes(CLUSTERER* Clusterer, CLUSTERCONFIG* Config) {
+  LIST ClusterStack = NIL_LIST;
+  CLUSTER *Cluster;
+  PROTOTYPE *Prototype;
+
+  // use a stack to keep track of clusters waiting to be processed
+  // initially the only cluster on the stack is the root cluster
+  if (Clusterer->Root != nullptr)
+    ClusterStack = push (NIL_LIST, Clusterer->Root);
+
+  // loop until we have analyzed all clusters which are potential prototypes
+  while (ClusterStack != NIL_LIST) {
+    // remove the next cluster to be analyzed from the stack
+    // try to make a prototype from the cluster
+    // if successful, put it on the proto list, else split the cluster
+    Cluster = reinterpret_cast<CLUSTER *>first_node (ClusterStack);
+    ClusterStack = pop (ClusterStack);
+    Prototype = MakePrototype(Clusterer, Config, Cluster);
+    if (Prototype != nullptr) {
+      Clusterer->ProtoList = push (Clusterer->ProtoList, Prototype);
+    }
+    else {
+      ClusterStack = push (ClusterStack, Cluster->Right);
+      ClusterStack = push (ClusterStack, Cluster->Left);
+    }
+  }
+}                                // ComputePrototypes
+
+/**
+ * This routine attempts to create a prototype from the
+ * specified cluster that conforms to the distribution
+ * specified in Config.  If there are too few samples in the
+ * cluster to perform a statistical analysis, then a prototype
+ * is generated but labelled as insignificant.  If the
+ * dimensions of the cluster are not independent, no prototype
+ * is generated and nullptr is returned.  If a prototype can be
+ * found that matches the desired distribution then a pointer
+ * to it is returned, otherwise nullptr is returned.
+ * @param Clusterer data structure holding cluster tree
+ * @param Config  parameters used to control prototype generation
+ * @param Cluster cluster to be made into a prototype
+ * @return  Pointer to new prototype or nullptr
+ */
+static PROTOTYPE* MakePrototype(CLUSTERER* Clusterer, CLUSTERCONFIG* Config,
+                                CLUSTER* Cluster) {
+  STATISTICS *Statistics;
+  PROTOTYPE *Proto;
+  BUCKETS *Buckets;
+
+  // filter out clusters which contain samples from the same character
+  if (MultipleCharSamples (Clusterer, Cluster, Config->MaxIllegal))
+    return nullptr;
+
+  // compute the covariance matrix and ranges for the cluster
+  Statistics =
+      ComputeStatistics(Clusterer->SampleSize, Clusterer->ParamDesc, Cluster);
+
+  // check for degenerate clusters which need not be analyzed further
+  // note that the MinSamples test assumes that all clusters with multiple
+  // character samples have been removed (as above)
+  Proto = MakeDegenerateProto(
+      Clusterer->SampleSize, Cluster, Statistics, Config->ProtoStyle,
+      static_cast<int32_t>(Config->MinSamples * Clusterer->NumChar));
+  if (Proto != nullptr) {
+    FreeStatistics(Statistics);
+    return Proto;
+  }
+  // check to ensure that all dimensions are independent
+  if (!Independent(Clusterer->ParamDesc, Clusterer->SampleSize,
+                   Statistics->CoVariance, Config->Independence)) {
+    FreeStatistics(Statistics);
+    return nullptr;
+  }
+
+  if (HOTELLING && Config->ProtoStyle == elliptical) {
+    Proto = TestEllipticalProto(Clusterer, Config, Cluster, Statistics);
+    if (Proto != nullptr) {
+      FreeStatistics(Statistics);
+      return Proto;
+    }
+  }
+
+  // create a histogram data structure used to evaluate distributions
+  Buckets = GetBuckets(Clusterer, normal, Cluster->SampleCount,
+                       Config->Confidence);
+
+  // create a prototype based on the statistics and test it
+  switch (Config->ProtoStyle) {
+    case spherical:
+      Proto = MakeSphericalProto(Clusterer, Cluster, Statistics, Buckets);
+      break;
+    case elliptical:
+      Proto = MakeEllipticalProto(Clusterer, Cluster, Statistics, Buckets);
+      break;
+    case mixed:
+      Proto = MakeMixedProto(Clusterer, Cluster, Statistics, Buckets,
+                             Config->Confidence);
+      break;
+    case automatic:
+      Proto = MakeSphericalProto(Clusterer, Cluster, Statistics, Buckets);
+      if (Proto != nullptr)
+        break;
+      Proto = MakeEllipticalProto(Clusterer, Cluster, Statistics, Buckets);
+      if (Proto != nullptr)
+        break;
+      Proto = MakeMixedProto(Clusterer, Cluster, Statistics, Buckets,
+                             Config->Confidence);
+      break;
+  }
+  FreeStatistics(Statistics);
+  return Proto;
+}                                // MakePrototype
+
+/**
+ * This routine checks for clusters which are degenerate and
+ * therefore cannot be analyzed in a statistically valid way.
+ * A cluster is defined as degenerate if it does not have at
+ * least MINSAMPLESNEEDED samples in it.  If the cluster is
+ * found to be degenerate, a prototype of the specified style
+ * is generated and marked as insignificant.  A cluster is
+ * also degenerate if it does not have at least MinSamples
+ * samples in it.
+ *
+ * If the cluster is not degenerate, nullptr is returned.
+ *
+ * @param N   number of dimensions
+ * @param Cluster   cluster being analyzed
+ * @param Statistics  statistical info about cluster
+ * @param Style   type of prototype to be generated
+ * @param MinSamples  minimum number of samples in a cluster
+ * @return  Pointer to degenerate prototype or nullptr.
+ */
+static PROTOTYPE* MakeDegenerateProto(  //this was MinSample
+                               uint16_t N,
+                               CLUSTER *Cluster,
+                               STATISTICS *Statistics,
+                               PROTOSTYLE Style,
+                               int32_t MinSamples) {
+  PROTOTYPE *Proto = nullptr;
+
+  if (MinSamples < MINSAMPLESNEEDED)
+    MinSamples = MINSAMPLESNEEDED;
+
+  if (Cluster->SampleCount < MinSamples) {
+    switch (Style) {
+      case spherical:
+        Proto = NewSphericalProto (N, Cluster, Statistics);
+        break;
+      case elliptical:
+      case automatic:
+        Proto = NewEllipticalProto (N, Cluster, Statistics);
+        break;
+      case mixed:
+        Proto = NewMixedProto (N, Cluster, Statistics);
+        break;
+    }
+    Proto->Significant = false;
+  }
+  return (Proto);
+}                                // MakeDegenerateProto
+
+/**
+ * This routine tests the specified cluster to see if **
+ * there is a statistically significant difference between
+ * the sub-clusters that would be made if the cluster were to
+ * be split. If not, then a new prototype is formed and
+ * returned to the caller. If there is, then nullptr is returned
+ * to the caller.
+ * @param Clusterer data struct containing samples being clustered
+ * @param Config provides the magic number of samples that make a good cluster
+ * @param Cluster   cluster to be made into an elliptical prototype
+ * @param Statistics  statistical info about cluster
+ * @return Pointer to new elliptical prototype or nullptr.
+ */
+static PROTOTYPE* TestEllipticalProto(CLUSTERER* Clusterer,
+                                      CLUSTERCONFIG *Config, CLUSTER* Cluster,
+                                      STATISTICS* Statistics) {
+  // Fraction of the number of samples used as a range around 1 within
+  // which a cluster has the magic size that allows a boost to the
+  // FTable by kFTableBoostMargin, thus allowing clusters near the
+  // magic size (equal to the number of sample characters) to be more
+  // likely to stay together.
+  const double kMagicSampleMargin = 0.0625;
+  const double kFTableBoostMargin = 2.0;
+
+  int N = Clusterer->SampleSize;
+  CLUSTER* Left = Cluster->Left;
+  CLUSTER* Right = Cluster->Right;
+  if (Left == nullptr || Right == nullptr)
+    return nullptr;
+  int TotalDims = Left->SampleCount + Right->SampleCount;
+  if (TotalDims < N + 1 || TotalDims < 2)
+    return nullptr;
+  std::vector<float> Covariance(static_cast<size_t>(N) * N);
+  std::vector<float> Inverse(static_cast<size_t>(N) * N);
+  std::vector<float> Delta(N);
+  // Compute a new covariance matrix that only uses essential features.
+  for (int i = 0; i < N; ++i) {
+    int row_offset = i * N;
+    if (!Clusterer->ParamDesc[i].NonEssential) {
+      for (int j = 0; j < N; ++j) {
+        if (!Clusterer->ParamDesc[j].NonEssential)
+          Covariance[j + row_offset] = Statistics->CoVariance[j + row_offset];
+        else
+          Covariance[j + row_offset] = 0.0f;
+      }
+    } else {
+      for (int j = 0; j < N; ++j) {
+        if (i == j)
+          Covariance[j + row_offset] = 1.0f;
+        else
+          Covariance[j + row_offset] = 0.0f;
+      }
+    }
+  }
+  double err = InvertMatrix(&Covariance[0], N, &Inverse[0]);
+  if (err > 1) {
+    tprintf("Clustering error: Matrix inverse failed with error %g\n", err);
+  }
+  int EssentialN = 0;
+  for (int dim = 0; dim < N; ++dim) {
+    if (!Clusterer->ParamDesc[dim].NonEssential) {
+      Delta[dim] = Left->Mean[dim] - Right->Mean[dim];
+      ++EssentialN;
+    } else {
+      Delta[dim] = 0.0f;
+    }
+  }
+  // Compute Hotelling's T-squared.
+  double Tsq = 0.0;
+  for (int x = 0; x < N; ++x) {
+    double temp = 0.0;
+    for (int y = 0; y < N; ++y) {
+      temp += static_cast<double>(Inverse[y + N * x]) * Delta[y];
+    }
+    Tsq += Delta[x] * temp;
+  }
+  // Changed this function to match the formula in
+  // Statistical Methods in Medical Research p 473
+  // By Peter Armitage, Geoffrey Berry, J. N. S. Matthews.
+  // Tsq *= Left->SampleCount * Right->SampleCount / TotalDims;
+  double F = Tsq * (TotalDims - EssentialN - 1) / ((TotalDims - 2)*EssentialN);
+  int Fx = EssentialN;
+  if (Fx > FTABLE_X)
+    Fx = FTABLE_X;
+  --Fx;
+  int Fy = TotalDims - EssentialN - 1;
+  if (Fy > FTABLE_Y)
+    Fy = FTABLE_Y;
+  --Fy;
+  double FTarget = FTable[Fy][Fx];
+  if (Config->MagicSamples > 0 &&
+      TotalDims >= Config->MagicSamples * (1.0 - kMagicSampleMargin) &&
+      TotalDims <= Config->MagicSamples * (1.0 + kMagicSampleMargin)) {
+    // Give magic-sized clusters a magic FTable boost.
+    FTarget += kFTableBoostMargin;
+  }
+  if (F < FTarget) {
+    return NewEllipticalProto (Clusterer->SampleSize, Cluster, Statistics);
+  }
+  return nullptr;
+}
+
+/**
+ * This routine tests the specified cluster to see if it can
+ * be approximated by a spherical normal distribution.  If it
+ * can be, then a new prototype is formed and returned to the
+ * caller.  If it can't be, then nullptr is returned to the caller.
+ * @param Clusterer data struct containing samples being clustered
+ * @param Cluster   cluster to be made into a spherical prototype
+ * @param Statistics  statistical info about cluster
+ * @param Buckets   histogram struct used to analyze distribution
+ * @return  Pointer to new spherical prototype or nullptr.
+ */
+static PROTOTYPE* MakeSphericalProto(CLUSTERER* Clusterer,
+                                     CLUSTER* Cluster, STATISTICS* Statistics,
+                                     BUCKETS* Buckets) {
+  PROTOTYPE *Proto = nullptr;
+  int i;
+
+  // check that each dimension is a normal distribution
+  for (i = 0; i < Clusterer->SampleSize; i++) {
+    if (Clusterer->ParamDesc[i].NonEssential)
+      continue;
+
+    FillBuckets (Buckets, Cluster, i, &(Clusterer->ParamDesc[i]),
+      Cluster->Mean[i],
+      sqrt (static_cast<double>(Statistics->AvgVariance)));
+    if (!DistributionOK (Buckets))
+      break;
+  }
+  // if all dimensions matched a normal distribution, make a proto
+  if (i >= Clusterer->SampleSize)
+    Proto = NewSphericalProto (Clusterer->SampleSize, Cluster, Statistics);
+  return (Proto);
+}                                // MakeSphericalProto
+
+/**
+ * This routine tests the specified cluster to see if it can
+ * be approximated by an elliptical normal distribution.  If it
+ * can be, then a new prototype is formed and returned to the
+ * caller.  If it can't be, then nullptr is returned to the caller.
+ * @param Clusterer data struct containing samples being clustered
+ * @param Cluster   cluster to be made into an elliptical prototype
+ * @param Statistics  statistical info about cluster
+ * @param Buckets   histogram struct used to analyze distribution
+ * @return  Pointer to new elliptical prototype or nullptr.
+ */
+static PROTOTYPE* MakeEllipticalProto(CLUSTERER* Clusterer,
+                                      CLUSTER* Cluster, STATISTICS* Statistics,
+                                      BUCKETS* Buckets) {
+  PROTOTYPE *Proto = nullptr;
+  int i;
+
+  // check that each dimension is a normal distribution
+  for (i = 0; i < Clusterer->SampleSize; i++) {
+    if (Clusterer->ParamDesc[i].NonEssential)
+      continue;
+
+    FillBuckets (Buckets, Cluster, i, &(Clusterer->ParamDesc[i]),
+      Cluster->Mean[i],
+      sqrt (static_cast<double>(Statistics->
+      CoVariance[i * (Clusterer->SampleSize + 1)])));
+    if (!DistributionOK (Buckets))
+      break;
+  }
+  // if all dimensions matched a normal distribution, make a proto
+  if (i >= Clusterer->SampleSize)
+    Proto = NewEllipticalProto (Clusterer->SampleSize, Cluster, Statistics);
+  return (Proto);
+}                                // MakeEllipticalProto
+
+/**
+ * This routine tests each dimension of the specified cluster to
+ * see what distribution would best approximate that dimension.
+ * Each dimension is compared to the following distributions
+ * in order: normal, random, uniform.  If each dimension can
+ * be represented by one of these distributions,
+ * then a new prototype is formed and returned to the
+ * caller.  If it can't be, then nullptr is returned to the caller.
+ * @param Clusterer data struct containing samples being clustered
+ * @param Cluster   cluster to be made into a prototype
+ * @param Statistics  statistical info about cluster
+ * @param NormalBuckets histogram struct used to analyze distribution
+ * @param Confidence  confidence level for alternate distributions
+ * @return  Pointer to new mixed prototype or nullptr.
+ */
+static PROTOTYPE* MakeMixedProto(CLUSTERER* Clusterer,
+                                 CLUSTER* Cluster, STATISTICS* Statistics,
+                                 BUCKETS* NormalBuckets, double Confidence) {
+  PROTOTYPE *Proto;
+  int i;
+  BUCKETS *UniformBuckets = nullptr;
+  BUCKETS *RandomBuckets = nullptr;
+
+  // create a mixed proto to work on - initially assume all dimensions normal*/
+  Proto = NewMixedProto (Clusterer->SampleSize, Cluster, Statistics);
+
+  // find the proper distribution for each dimension
+  for (i = 0; i < Clusterer->SampleSize; i++) {
+    if (Clusterer->ParamDesc[i].NonEssential)
+      continue;
+
+    FillBuckets (NormalBuckets, Cluster, i, &(Clusterer->ParamDesc[i]),
+      Proto->Mean[i],
+      sqrt (static_cast<double>(Proto->Variance.Elliptical[i])));
+    if (DistributionOK (NormalBuckets))
+      continue;
+
+    if (RandomBuckets == nullptr)
+      RandomBuckets =
+        GetBuckets(Clusterer, D_random, Cluster->SampleCount, Confidence);
+    MakeDimRandom (i, Proto, &(Clusterer->ParamDesc[i]));
+    FillBuckets (RandomBuckets, Cluster, i, &(Clusterer->ParamDesc[i]),
+      Proto->Mean[i], Proto->Variance.Elliptical[i]);
+    if (DistributionOK (RandomBuckets))
+      continue;
+
+    if (UniformBuckets == nullptr)
+      UniformBuckets =
+        GetBuckets(Clusterer, uniform, Cluster->SampleCount, Confidence);
+    MakeDimUniform(i, Proto, Statistics);
+    FillBuckets (UniformBuckets, Cluster, i, &(Clusterer->ParamDesc[i]),
+      Proto->Mean[i], Proto->Variance.Elliptical[i]);
+    if (DistributionOK (UniformBuckets))
+      continue;
+    break;
+  }
+  // if any dimension failed to match a distribution, discard the proto
+  if (i < Clusterer->SampleSize) {
+    FreePrototype(Proto);
+    Proto = nullptr;
+  }
+  return (Proto);
+}                                // MakeMixedProto
+
+/**
+ * This routine alters the ith dimension of the specified
+ * mixed prototype to be D_random.
+ * @param i index of dimension to be changed
+ * @param Proto prototype whose dimension is to be altered
+ * @param ParamDesc description of specified dimension
+ */
+static void MakeDimRandom(uint16_t i, PROTOTYPE* Proto, PARAM_DESC* ParamDesc) {
+  Proto->Distrib[i] = D_random;
+  Proto->Mean[i] = ParamDesc->MidRange;
+  Proto->Variance.Elliptical[i] = ParamDesc->HalfRange;
+
+  // subtract out the previous magnitude of this dimension from the total
+  Proto->TotalMagnitude /= Proto->Magnitude.Elliptical[i];
+  Proto->Magnitude.Elliptical[i] = 1.0 / ParamDesc->Range;
+  Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
+  Proto->LogMagnitude = log (static_cast<double>(Proto->TotalMagnitude));
+
+  // note that the proto Weight is irrelevant for D_random protos
+}                                // MakeDimRandom
+
+/**
+ * This routine alters the ith dimension of the specified
+ * mixed prototype to be uniform.
+ * @param i index of dimension to be changed
+ * @param Proto   prototype whose dimension is to be altered
+ * @param Statistics  statistical info about prototype
+ */
+static void MakeDimUniform(uint16_t i, PROTOTYPE* Proto, STATISTICS* Statistics) {
+  Proto->Distrib[i] = uniform;
+  Proto->Mean[i] = Proto->Cluster->Mean[i] +
+    (Statistics->Min[i] + Statistics->Max[i]) / 2;
+  Proto->Variance.Elliptical[i] =
+    (Statistics->Max[i] - Statistics->Min[i]) / 2;
+  if (Proto->Variance.Elliptical[i] < MINVARIANCE)
+    Proto->Variance.Elliptical[i] = MINVARIANCE;
+
+  // subtract out the previous magnitude of this dimension from the total
+  Proto->TotalMagnitude /= Proto->Magnitude.Elliptical[i];
+  Proto->Magnitude.Elliptical[i] =
+    1.0 / (2.0 * Proto->Variance.Elliptical[i]);
+  Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
+  Proto->LogMagnitude = log (static_cast<double>(Proto->TotalMagnitude));
+
+  // note that the proto Weight is irrelevant for uniform protos
+}                                // MakeDimUniform
+
+/**
+ * This routine searches the cluster tree for all leaf nodes
+ * which are samples in the specified cluster.  It computes
+ * a full covariance matrix for these samples as well as
+ * keeping track of the ranges (min and max) for each
+ * dimension.  A special data structure is allocated to
+ * return this information to the caller.  An incremental
+ * algorithm for computing statistics is not used because
+ * it will not work with circular dimensions.
+ * @param N number of dimensions
+ * @param ParamDesc array of dimension descriptions
+ * @param Cluster cluster whose stats are to be computed
+ * @return  Pointer to new data structure containing statistics
+ */
+static STATISTICS*
+ComputeStatistics (int16_t N, PARAM_DESC ParamDesc[], CLUSTER * Cluster) {
+  STATISTICS *Statistics;
+  int i, j;
+  float *CoVariance;
+  float *Distance;
+  LIST SearchState;
+  SAMPLE *Sample;
+  uint32_t SampleCountAdjustedForBias;
+
+  // allocate memory to hold the statistics results
+  Statistics = static_cast<STATISTICS *>(malloc (sizeof (STATISTICS)));
+  Statistics->CoVariance = static_cast<float *>(malloc(sizeof(float) * N * N));
+  Statistics->Min = static_cast<float *>(malloc (N * sizeof (float)));
+  Statistics->Max = static_cast<float *>(malloc (N * sizeof (float)));
+
+  // allocate temporary memory to hold the sample to mean distances
+  Distance = static_cast<float *>(malloc (N * sizeof (float)));
+
+  // initialize the statistics
+  Statistics->AvgVariance = 1.0;
+  CoVariance = Statistics->CoVariance;
+  for (i = 0; i < N; i++) {
+    Statistics->Min[i] = 0.0;
+    Statistics->Max[i] = 0.0;
+    for (j = 0; j < N; j++, CoVariance++)
+      *CoVariance = 0;
+  }
+  // find each sample in the cluster and merge it into the statistics
+  InitSampleSearch(SearchState, Cluster);
+  while ((Sample = NextSample (&SearchState)) != nullptr) {
+    for (i = 0; i < N; i++) {
+      Distance[i] = Sample->Mean[i] - Cluster->Mean[i];
+      if (ParamDesc[i].Circular) {
+        if (Distance[i] > ParamDesc[i].HalfRange)
+          Distance[i] -= ParamDesc[i].Range;
+        if (Distance[i] < -ParamDesc[i].HalfRange)
+          Distance[i] += ParamDesc[i].Range;
+      }
+      if (Distance[i] < Statistics->Min[i])
+        Statistics->Min[i] = Distance[i];
+      if (Distance[i] > Statistics->Max[i])
+        Statistics->Max[i] = Distance[i];
+    }
+    CoVariance = Statistics->CoVariance;
+    for (i = 0; i < N; i++)
+      for (j = 0; j < N; j++, CoVariance++)
+        *CoVariance += Distance[i] * Distance[j];
+  }
+  // normalize the variances by the total number of samples
+  // use SampleCount-1 instead of SampleCount to get an unbiased estimate
+  // also compute the geometic mean of the diagonal variances
+  // ensure that clusters with only 1 sample are handled correctly
+  if (Cluster->SampleCount > 1)
+    SampleCountAdjustedForBias = Cluster->SampleCount - 1;
+  else
+    SampleCountAdjustedForBias = 1;
+  CoVariance = Statistics->CoVariance;
+  for (i = 0; i < N; i++)
+  for (j = 0; j < N; j++, CoVariance++) {
+    *CoVariance /= SampleCountAdjustedForBias;
+    if (j == i) {
+      if (*CoVariance < MINVARIANCE)
+        *CoVariance = MINVARIANCE;
+      Statistics->AvgVariance *= *CoVariance;
+    }
+  }
+  Statistics->AvgVariance = static_cast<float>(pow(static_cast<double>(Statistics->AvgVariance),
+                                       1.0 / N));
+
+  // release temporary memory and return
+  free(Distance);
+  return (Statistics);
+}                                // ComputeStatistics
+
+/**
+ * This routine creates a spherical prototype data structure to
+ * approximate the samples in the specified cluster.
+ * Spherical prototypes have a single variance which is
+ * common across all dimensions.  All dimensions are normally
+ * distributed and independent.
+ * @param N number of dimensions
+ * @param Cluster cluster to be made into a spherical prototype
+ * @param Statistics  statistical info about samples in cluster
+ * @return  Pointer to a new spherical prototype data structure
+ */
+static PROTOTYPE* NewSphericalProto(uint16_t N, CLUSTER* Cluster,
+                                    STATISTICS* Statistics) {
+  PROTOTYPE *Proto;
+
+  Proto = NewSimpleProto (N, Cluster);
+
+  Proto->Variance.Spherical = Statistics->AvgVariance;
+  if (Proto->Variance.Spherical < MINVARIANCE)
+    Proto->Variance.Spherical = MINVARIANCE;
+
+  Proto->Magnitude.Spherical =
+    1.0 / sqrt(2.0 * M_PI * Proto->Variance.Spherical);
+  Proto->TotalMagnitude = static_cast<float>(pow(static_cast<double>(Proto->Magnitude.Spherical),
+                                     static_cast<double>(N)));
+  Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
+  Proto->LogMagnitude = log (static_cast<double>(Proto->TotalMagnitude));
+
+  return (Proto);
+}                                // NewSphericalProto
+
+/**
+ * This routine creates an elliptical prototype data structure to
+ * approximate the samples in the specified cluster.
+ * Elliptical prototypes have a variance for each dimension.
+ * All dimensions are normally distributed and independent.
+ * @param N number of dimensions
+ * @param Cluster cluster to be made into an elliptical prototype
+ * @param Statistics  statistical info about samples in cluster
+ * @return  Pointer to a new elliptical prototype data structure
+ */
+static PROTOTYPE* NewEllipticalProto(int16_t N, CLUSTER* Cluster,
+                                     STATISTICS* Statistics) {
+  PROTOTYPE *Proto;
+  float *CoVariance;
+  int i;
+
+  Proto = NewSimpleProto (N, Cluster);
+  Proto->Variance.Elliptical = static_cast<float *>(malloc (N * sizeof (float)));
+  Proto->Magnitude.Elliptical = static_cast<float *>(malloc (N * sizeof (float)));
+  Proto->Weight.Elliptical = static_cast<float *>(malloc (N * sizeof (float)));
+
+  CoVariance = Statistics->CoVariance;
+  Proto->TotalMagnitude = 1.0;
+  for (i = 0; i < N; i++, CoVariance += N + 1) {
+    Proto->Variance.Elliptical[i] = *CoVariance;
+    if (Proto->Variance.Elliptical[i] < MINVARIANCE)
+      Proto->Variance.Elliptical[i] = MINVARIANCE;
+
+    Proto->Magnitude.Elliptical[i] =
+      1.0 / sqrt(2.0 * M_PI * Proto->Variance.Elliptical[i]);
+    Proto->Weight.Elliptical[i] = 1.0 / Proto->Variance.Elliptical[i];
+    Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
+  }
+  Proto->LogMagnitude = log (static_cast<double>(Proto->TotalMagnitude));
+  Proto->Style = elliptical;
+  return (Proto);
+}                                // NewEllipticalProto
+
+/**
+ * This routine creates a mixed prototype data structure to
+ * approximate the samples in the specified cluster.
+ * Mixed prototypes can have different distributions for
+ * each dimension.  All dimensions are independent.  The
+ * structure is initially filled in as though it were an
+ * elliptical prototype.  The actual distributions of the
+ * dimensions can be altered by other routines.
+ * @param N number of dimensions
+ * @param Cluster cluster to be made into a mixed prototype
+ * @param Statistics  statistical info about samples in cluster
+ * @return  Pointer to a new mixed prototype data structure
+ */
+static PROTOTYPE* NewMixedProto(int16_t N, CLUSTER* Cluster,
+                                STATISTICS* Statistics) {
+  PROTOTYPE *Proto;
+  int i;
+
+  Proto = NewEllipticalProto (N, Cluster, Statistics);
+  Proto->Distrib = static_cast<DISTRIBUTION *>(malloc (N * sizeof (DISTRIBUTION)));
+
+  for (i = 0; i < N; i++) {
+    Proto->Distrib[i] = normal;
+  }
+  Proto->Style = mixed;
+  return (Proto);
+}                                // NewMixedProto
+
+/**
+ * This routine allocates memory to hold a simple prototype
+ * data structure, i.e. one without independent distributions
+ * and variances for each dimension.
+ * @param N number of dimensions
+ * @param Cluster cluster to be made into a prototype
+ * @return  Pointer to new simple prototype
+ */
+static PROTOTYPE *NewSimpleProto(int16_t N, CLUSTER *Cluster) {
+  PROTOTYPE *Proto;
+  int i;
+
+  Proto = static_cast<PROTOTYPE *>(malloc (sizeof (PROTOTYPE)));
+  Proto->Mean = static_cast<float *>(malloc (N * sizeof (float)));
+
+  for (i = 0; i < N; i++)
+    Proto->Mean[i] = Cluster->Mean[i];
+  Proto->Distrib = nullptr;
+
+  Proto->Significant = true;
+  Proto->Merged = false;
+  Proto->Style = spherical;
+  Proto->NumSamples = Cluster->SampleCount;
+  Proto->Cluster = Cluster;
+  Proto->Cluster->Prototype = true;
+  return (Proto);
+}                                // NewSimpleProto
+
+/**
+ * This routine returns true if the specified covariance
+ * matrix indicates that all N dimensions are independent of
+ * one another.  One dimension is judged to be independent of
+ * another when the magnitude of the corresponding correlation
+ * coefficient is
+ * less than the specified Independence factor.  The
+ * correlation coefficient is calculated as: (see Duda and
+ * Hart, pg. 247)
+ * coeff[ij] = stddev[ij] / sqrt (stddev[ii] * stddev[jj])
+ * The covariance matrix is assumed to be symmetric (which
+ * should always be true).
+ * @param ParamDesc descriptions of each feature space dimension
+ * @param N number of dimensions
+ * @param CoVariance  ptr to a covariance matrix
+ * @param Independence  max off-diagonal correlation coefficient
+ * @return true if dimensions are independent, false otherwise
+ */
+static bool
+Independent(PARAM_DESC* ParamDesc,
+            int16_t N, float* CoVariance, float Independence) {
+  int i, j;
+  float *VARii;                // points to ith on-diagonal element
+  float *VARjj;                // points to jth on-diagonal element
+  float CorrelationCoeff;
+
+  VARii = CoVariance;
+  for (i = 0; i < N; i++, VARii += N + 1) {
+    if (ParamDesc[i].NonEssential)
+      continue;
+
+    VARjj = VARii + N + 1;
+    CoVariance = VARii + 1;
+    for (j = i + 1; j < N; j++, CoVariance++, VARjj += N + 1) {
+      if (ParamDesc[j].NonEssential)
+        continue;
+
+      if ((*VARii == 0.0) || (*VARjj == 0.0))
+        CorrelationCoeff = 0.0;
+      else
+        CorrelationCoeff =
+          sqrt (sqrt (*CoVariance * *CoVariance / (*VARii * *VARjj)));
+      if (CorrelationCoeff > Independence)
+        return false;
+    }
+  }
+  return true;
+}                                // Independent
+
+/**
+ * This routine returns a histogram data structure which can
+ * be used by other routines to place samples into histogram
+ * buckets, and then apply a goodness of fit test to the
+ * histogram data to determine if the samples belong to the
+ * specified probability distribution.  The routine keeps
+ * a list of bucket data structures which have already been
+ * created so that it minimizes the computation time needed
+ * to create a new bucket.
+ * @param clusterer  which keeps a bucket_cache for us.
+ * @param Distribution  type of probability distribution to test for
+ * @param SampleCount number of samples that are available
+ * @param Confidence  probability of a Type I error
+ * @return  Bucket data structure
+ */
+static BUCKETS *GetBuckets(CLUSTERER* clusterer,
+                    DISTRIBUTION Distribution,
+                    uint32_t SampleCount,
+                    double Confidence) {
+  // Get an old bucket structure with the same number of buckets.
+  uint16_t NumberOfBuckets = OptimumNumberOfBuckets(SampleCount);
+  BUCKETS *Buckets =
+      clusterer->bucket_cache[Distribution][NumberOfBuckets - MINBUCKETS];
+
+  // If a matching bucket structure is not found, make one and save it.
+  if (Buckets == nullptr) {
+    Buckets = MakeBuckets(Distribution, SampleCount, Confidence);
+    clusterer->bucket_cache[Distribution][NumberOfBuckets - MINBUCKETS] =
+        Buckets;
+  } else {
+    // Just adjust the existing buckets.
+    if (SampleCount != Buckets->SampleCount)
+      AdjustBuckets(Buckets, SampleCount);
+    if (Confidence != Buckets->Confidence) {
+      Buckets->Confidence = Confidence;
+      Buckets->ChiSquared = ComputeChiSquared(
+          DegreesOfFreedom(Distribution, Buckets->NumberOfBuckets),
+          Confidence);
+    }
+    InitBuckets(Buckets);
+  }
+  return Buckets;
+}                                // GetBuckets
+
+/**
+ * This routine creates a histogram data structure which can
+ * be used by other routines to place samples into histogram
+ * buckets, and then apply a goodness of fit test to the
+ * histogram data to determine if the samples belong to the
+ * specified probability distribution.  The buckets are
+ * allocated in such a way that the expected frequency of
+ * samples in each bucket is approximately the same.  In
+ * order to make this possible, a mapping table is
+ * computed which maps "normalized" samples into the
+ * appropriate bucket.
+ * @param Distribution  type of probability distribution to test for
+ * @param SampleCount number of samples that are available
+ * @param Confidence  probability of a Type I error
+ * @return Pointer to new histogram data structure
+ */
+static BUCKETS *MakeBuckets(DISTRIBUTION Distribution,
+                     uint32_t SampleCount,
+                     double Confidence) {
+  const DENSITYFUNC DensityFunction[] =
+    { NormalDensity, UniformDensity, UniformDensity };
+  int i, j;
+  BUCKETS *Buckets;
+  double BucketProbability;
+  double NextBucketBoundary;
+  double Probability;
+  double ProbabilityDelta;
+  double LastProbDensity;
+  double ProbDensity;
+  uint16_t CurrentBucket;
+  bool Symmetrical;
+
+  // allocate memory needed for data structure
+  Buckets = static_cast<BUCKETS *>(malloc(sizeof(BUCKETS)));
+  Buckets->NumberOfBuckets = OptimumNumberOfBuckets(SampleCount);
+  Buckets->SampleCount = SampleCount;
+  Buckets->Confidence = Confidence;
+  Buckets->Count =
+      static_cast<uint32_t *>(malloc(Buckets->NumberOfBuckets * sizeof(uint32_t)));
+  Buckets->ExpectedCount = static_cast<float *>(
+      malloc(Buckets->NumberOfBuckets * sizeof(float)));
+
+  // initialize simple fields
+  Buckets->Distribution = Distribution;
+  for (i = 0; i < Buckets->NumberOfBuckets; i++) {
+    Buckets->Count[i] = 0;
+    Buckets->ExpectedCount[i] = 0.0;
+  }
+
+  // all currently defined distributions are symmetrical
+  Symmetrical = true;
+  Buckets->ChiSquared = ComputeChiSquared(
+      DegreesOfFreedom(Distribution, Buckets->NumberOfBuckets), Confidence);
+
+  if (Symmetrical) {
+    // allocate buckets so that all have approx. equal probability
+    BucketProbability = 1.0 / static_cast<double>(Buckets->NumberOfBuckets);
+
+    // distribution is symmetric so fill in upper half then copy
+    CurrentBucket = Buckets->NumberOfBuckets / 2;
+    if (Odd (Buckets->NumberOfBuckets))
+      NextBucketBoundary = BucketProbability / 2;
+    else
+      NextBucketBoundary = BucketProbability;
+
+    Probability = 0.0;
+    LastProbDensity =
+      (*DensityFunction[static_cast<int>(Distribution)]) (BUCKETTABLESIZE / 2);
+    for (i = BUCKETTABLESIZE / 2; i < BUCKETTABLESIZE; i++) {
+      ProbDensity = (*DensityFunction[static_cast<int>(Distribution)]) (i + 1);
+      ProbabilityDelta = Integral (LastProbDensity, ProbDensity, 1.0);
+      Probability += ProbabilityDelta;
+      if (Probability > NextBucketBoundary) {
+        if (CurrentBucket < Buckets->NumberOfBuckets - 1)
+          CurrentBucket++;
+        NextBucketBoundary += BucketProbability;
+      }
+      Buckets->Bucket[i] = CurrentBucket;
+      Buckets->ExpectedCount[CurrentBucket] +=
+        static_cast<float>(ProbabilityDelta * SampleCount);
+      LastProbDensity = ProbDensity;
+    }
+    // place any leftover probability into the last bucket
+    Buckets->ExpectedCount[CurrentBucket] +=
+      static_cast<float>((0.5 - Probability) * SampleCount);
+
+    // copy upper half of distribution to lower half
+    for (i = 0, j = BUCKETTABLESIZE - 1; i < j; i++, j--)
+      Buckets->Bucket[i] =
+        Mirror(Buckets->Bucket[j], Buckets->NumberOfBuckets);
+
+    // copy upper half of expected counts to lower half
+    for (i = 0, j = Buckets->NumberOfBuckets - 1; i <= j; i++, j--)
+      Buckets->ExpectedCount[i] += Buckets->ExpectedCount[j];
+  }
+  return Buckets;
+}                                // MakeBuckets
+
+/**
+ * This routine computes the optimum number of histogram
+ * buckets that should be used in a chi-squared goodness of
+ * fit test for the specified number of samples.  The optimum
+ * number is computed based on Table 4.1 on pg. 147 of
+ * "Measurement and Analysis of Random Data" by Bendat & Piersol.
+ * Linear interpolation is used to interpolate between table
+ * values.  The table is intended for a 0.05 level of
+ * significance (alpha).  This routine assumes that it is
+ * equally valid for other alpha's, which may not be true.
+ * @param SampleCount number of samples to be tested
+ * @return Optimum number of histogram buckets
+ */
+static uint16_t OptimumNumberOfBuckets(uint32_t SampleCount) {
+  uint8_t Last, Next;
+  float Slope;
+
+  if (SampleCount < kCountTable[0])
+    return kBucketsTable[0];
+
+  for (Last = 0, Next = 1; Next < LOOKUPTABLESIZE; Last++, Next++) {
+    if (SampleCount <= kCountTable[Next]) {
+      Slope = static_cast<float>(kBucketsTable[Next] - kBucketsTable[Last]) /
+          static_cast<float>(kCountTable[Next] - kCountTable[Last]);
+      return (static_cast<uint16_t>(kBucketsTable[Last] +
+          Slope * (SampleCount - kCountTable[Last])));
+    }
+  }
+  return kBucketsTable[Last];
+}                                // OptimumNumberOfBuckets
+
+/**
+ * This routine computes the chi-squared value which will
+ * leave a cumulative probability of Alpha in the right tail
+ * of a chi-squared distribution with the specified number of
+ * degrees of freedom.  Alpha must be between 0 and 1.
+ * DegreesOfFreedom must be even.  The routine maintains an
+ * array of lists.  Each list corresponds to a different
+ * number of degrees of freedom.  Each entry in the list
+ * corresponds to a different alpha value and its corresponding
+ * chi-squared value.  Therefore, once a particular chi-squared
+ * value is computed, it is stored in the list and never
+ * needs to be computed again.
+ * @param DegreesOfFreedom  determines shape of distribution
+ * @param Alpha probability of right tail
+ * @return Desired chi-squared value
+ */
+static double
+ComputeChiSquared (uint16_t DegreesOfFreedom, double Alpha)
+#define CHIACCURACY     0.01
+#define MINALPHA  (1e-200)
+{
+  static LIST ChiWith[MAXDEGREESOFFREEDOM + 1];
+
+  CHISTRUCT *OldChiSquared;
+  CHISTRUCT SearchKey;
+
+  // limit the minimum alpha that can be used - if alpha is too small
+  //      it may not be possible to compute chi-squared.
+  Alpha = ClipToRange(Alpha, MINALPHA, 1.0);
+  if (Odd (DegreesOfFreedom))
+    DegreesOfFreedom++;
+
+  /* find the list of chi-squared values which have already been computed
+     for the specified number of degrees of freedom.  Search the list for
+     the desired chi-squared. */
+  SearchKey.Alpha = Alpha;
+  OldChiSquared = reinterpret_cast<CHISTRUCT *>first_node (search (ChiWith[DegreesOfFreedom],
+    &SearchKey, AlphaMatch));
+
+  if (OldChiSquared == nullptr) {
+    OldChiSquared = NewChiStruct (DegreesOfFreedom, Alpha);
+    OldChiSquared->ChiSquared = Solve (ChiArea, OldChiSquared,
+      static_cast<double>(DegreesOfFreedom),
+      CHIACCURACY);
+    ChiWith[DegreesOfFreedom] = push (ChiWith[DegreesOfFreedom],
+      OldChiSquared);
+  }
+  else {
+    // further optimization might move OldChiSquared to front of list
+  }
+
+  return (OldChiSquared->ChiSquared);
+
+}                                // ComputeChiSquared
+
+/**
+ * This routine computes the probability density function
+ * of a discrete normal distribution defined by the global
+ * variables kNormalMean, kNormalVariance, and kNormalMagnitude.
+ * Normal magnitude could, of course, be computed in terms of
+ * the normal variance but it is precomputed for efficiency.
+ * @param x number to compute the normal probability density for
+ * @note Globals:
+ *    kNormalMean mean of a discrete normal distribution
+ *    kNormalVariance variance of a discrete normal distribution
+ *    kNormalMagnitude  magnitude of a discrete normal distribution
+ * @return  The value of the normal distribution at x.
+ */
+static double NormalDensity(int32_t x) {
+  double Distance;
+
+  Distance = x - kNormalMean;
+  return kNormalMagnitude * exp(-0.5 * Distance * Distance / kNormalVariance);
+}                                // NormalDensity
+
+/**
+ * This routine computes the probability density function
+ * of a uniform distribution at the specified point.  The
+ * range of the distribution is from 0 to BUCKETTABLESIZE.
+ * @param x number to compute the uniform probability density for
+ * @return The value of the uniform distribution at x.
+ */
+static double UniformDensity(int32_t x) {
+  constexpr auto UniformDistributionDensity = 1.0 / BUCKETTABLESIZE;
+
+  if ((x >= 0) && (x <= BUCKETTABLESIZE)) {
+    return UniformDistributionDensity;
+  } else {
+    return 0.0;
+  }
+}                                // UniformDensity
+
+/**
+ * This routine computes a trapezoidal approximation to the
+ * integral of a function over a small delta in x.
+ * @param f1  value of function at x1
+ * @param f2  value of function at x2
+ * @param Dx  x2 - x1 (should always be positive)
+ * @return Approximation of the integral of the function from x1 to x2.
+ */
+static double Integral(double f1, double f2, double Dx) {
+  return (f1 + f2) * Dx / 2.0;
+}                                // Integral
+
+/**
+ * This routine counts the number of cluster samples which
+ * fall within the various histogram buckets in Buckets.  Only
+ * one dimension of each sample is examined.  The exact meaning
+ * of the Mean and StdDev parameters depends on the
+ * distribution which is being analyzed (this info is in the
+ * Buckets data structure).  For normal distributions, Mean
+ * and StdDev have the expected meanings.  For uniform and
+ * random distributions the Mean is the center point of the
+ * range and the StdDev is 1/2 the range.  A dimension with
+ * zero standard deviation cannot be statistically analyzed.
+ * In this case, a pseudo-analysis is used.
+ * The Buckets data structure is filled in.
+ * @param Buckets histogram buckets to count samples
+ * @param Cluster cluster whose samples are being analyzed
+ * @param Dim dimension of samples which is being analyzed
+ * @param ParamDesc description of the dimension
+ * @param Mean  "mean" of the distribution
+ * @param StdDev  "standard deviation" of the distribution
+ */
+static void FillBuckets(BUCKETS *Buckets,
+                 CLUSTER *Cluster,
+                 uint16_t Dim,
+                 PARAM_DESC *ParamDesc,
+                 float Mean,
+                 float StdDev) {
+  uint16_t BucketID;
+  int i;
+  LIST SearchState;
+  SAMPLE *Sample;
+
+  // initialize the histogram bucket counts to 0
+  for (i = 0; i < Buckets->NumberOfBuckets; i++)
+    Buckets->Count[i] = 0;
+
+  if (StdDev == 0.0) {
+    /* if the standard deviation is zero, then we can't statistically
+       analyze the cluster.  Use a pseudo-analysis: samples exactly on
+       the mean are distributed evenly across all buckets.  Samples greater
+       than the mean are placed in the last bucket; samples less than the
+       mean are placed in the first bucket. */
+
+    InitSampleSearch(SearchState, Cluster);
+    i = 0;
+    while ((Sample = NextSample (&SearchState)) != nullptr) {
+      if (Sample->Mean[Dim] > Mean)
+        BucketID = Buckets->NumberOfBuckets - 1;
+      else if (Sample->Mean[Dim] < Mean)
+        BucketID = 0;
+      else
+        BucketID = i;
+      Buckets->Count[BucketID] += 1;
+      i++;
+      if (i >= Buckets->NumberOfBuckets)
+        i = 0;
+    }
+  }
+  else {
+    // search for all samples in the cluster and add to histogram buckets
+    InitSampleSearch(SearchState, Cluster);
+    while ((Sample = NextSample (&SearchState)) != nullptr) {
+      switch (Buckets->Distribution) {
+        case normal:
+          BucketID = NormalBucket (ParamDesc, Sample->Mean[Dim],
+            Mean, StdDev);
+          break;
+        case D_random:
+        case uniform:
+          BucketID = UniformBucket (ParamDesc, Sample->Mean[Dim],
+            Mean, StdDev);
+          break;
+        default:
+          BucketID = 0;
+      }
+      Buckets->Count[Buckets->Bucket[BucketID]] += 1;
+    }
+  }
+}                                // FillBuckets
+
+/**
+ * This routine determines which bucket x falls into in the
+ * discrete normal distribution defined by kNormalMean
+ * and kNormalStdDev.  x values which exceed the range of
+ * the discrete distribution are clipped.
+ * @param ParamDesc used to identify circular dimensions
+ * @param x value to be normalized
+ * @param Mean  mean of normal distribution
+ * @param StdDev  standard deviation of normal distribution
+ * @return Bucket number into which x falls
+ */
+static uint16_t NormalBucket(PARAM_DESC *ParamDesc,
+                      float x,
+                      float Mean,
+                      float StdDev) {
+  float X;
+
+  // wraparound circular parameters if necessary
+  if (ParamDesc->Circular) {
+    if (x - Mean > ParamDesc->HalfRange)
+      x -= ParamDesc->Range;
+    else if (x - Mean < -ParamDesc->HalfRange)
+      x += ParamDesc->Range;
+  }
+
+  X = ((x - Mean) / StdDev) * kNormalStdDev + kNormalMean;
+  if (X < 0)
+    return 0;
+  if (X > BUCKETTABLESIZE - 1)
+    return (static_cast<uint16_t>(BUCKETTABLESIZE - 1));
+  return static_cast<uint16_t>(floor(static_cast<double>(X)));
+}                                // NormalBucket
+
+/**
+ * This routine determines which bucket x falls into in the
+ * discrete uniform distribution defined by
+ * BUCKETTABLESIZE.  x values which exceed the range of
+ * the discrete distribution are clipped.
+ * @param ParamDesc used to identify circular dimensions
+ * @param x value to be normalized
+ * @param Mean  center of range of uniform distribution
+ * @param StdDev  1/2 the range of the uniform distribution
+ * @return Bucket number into which x falls
+ */
+static uint16_t UniformBucket(PARAM_DESC *ParamDesc,
+                       float x,
+                       float Mean,
+                       float StdDev) {
+  float X;
+
+  // wraparound circular parameters if necessary
+  if (ParamDesc->Circular) {
+    if (x - Mean > ParamDesc->HalfRange)
+      x -= ParamDesc->Range;
+    else if (x - Mean < -ParamDesc->HalfRange)
+      x += ParamDesc->Range;
+  }
+
+  X = ((x - Mean) / (2 * StdDev) * BUCKETTABLESIZE + BUCKETTABLESIZE / 2.0);
+  if (X < 0)
+    return 0;
+  if (X > BUCKETTABLESIZE - 1)
+    return static_cast<uint16_t>(BUCKETTABLESIZE - 1);
+  return static_cast<uint16_t>(floor(static_cast<double>(X)));
+}                                // UniformBucket
+
+/**
+ * This routine performs a chi-square goodness of fit test
+ * on the histogram data in the Buckets data structure.
+ * true is returned if the histogram matches the probability
+ * distribution which was specified when the Buckets
+ * structure was originally created.  Otherwise false is
+ * returned.
+ * @param Buckets   histogram data to perform chi-square test on
+ * @return true if samples match distribution, false otherwise
+ */
+static bool DistributionOK(BUCKETS* Buckets) {
+  float FrequencyDifference;
+  float TotalDifference;
+  int i;
+
+  // compute how well the histogram matches the expected histogram
+  TotalDifference = 0.0;
+  for (i = 0; i < Buckets->NumberOfBuckets; i++) {
+    FrequencyDifference = Buckets->Count[i] - Buckets->ExpectedCount[i];
+    TotalDifference += (FrequencyDifference * FrequencyDifference) /
+      Buckets->ExpectedCount[i];
+  }
+
+  // test to see if the difference is more than expected
+  if (TotalDifference > Buckets->ChiSquared)
+    return false;
+  else
+    return true;
+}                                // DistributionOK
+
+/**
+ * This routine frees the memory used by the statistics
+ * data structure.
+ * @param Statistics  pointer to data structure to be freed
+ */
+static void FreeStatistics(STATISTICS *Statistics) {
+  free(Statistics->CoVariance);
+  free(Statistics->Min);
+  free(Statistics->Max);
+  free(Statistics);
+}                                // FreeStatistics
+
+/**
+ * This routine properly frees the memory used by a BUCKETS.
+ *
+ * @param buckets  pointer to data structure to be freed
+ */
+static void FreeBuckets(BUCKETS *buckets) {
+  free(buckets->Count);
+  free(buckets->ExpectedCount);
+  free(buckets);
+}                                // FreeBuckets
+
+/**
+ * This routine frees the memory consumed by the specified
+ * cluster and all of its subclusters.  This is done by
+ * recursive calls to FreeCluster().
+ *
+ * @param Cluster pointer to cluster to be freed
+ */
+static void FreeCluster(CLUSTER *Cluster) {
+  if (Cluster != nullptr) {
+    FreeCluster (Cluster->Left);
+    FreeCluster (Cluster->Right);
+    free(Cluster);
+  }
+}                                // FreeCluster
+
+/**
+ * This routine computes the degrees of freedom that should
+ * be used in a chi-squared test with the specified number of
+ * histogram buckets.  The result is always rounded up to
+ * the next even number so that the value of chi-squared can be
+ * computed more easily.  This will cause the value of
+ * chi-squared to be higher than the optimum value, resulting
+ * in the chi-square test being more lenient than optimum.
+ * @param Distribution    distribution being tested for
+ * @param HistogramBuckets  number of buckets in chi-square test
+ * @return The number of degrees of freedom for a chi-square test
+ */
+static uint16_t DegreesOfFreedom(DISTRIBUTION Distribution, uint16_t HistogramBuckets) {
+  static uint8_t DegreeOffsets[] = { 3, 3, 1 };
+
+  uint16_t AdjustedNumBuckets;
+
+  AdjustedNumBuckets = HistogramBuckets - DegreeOffsets[static_cast<int>(Distribution)];
+  if (Odd (AdjustedNumBuckets))
+    AdjustedNumBuckets++;
+  return (AdjustedNumBuckets);
+
+}                                // DegreesOfFreedom
+
+/**
+ * This routine multiplies each ExpectedCount histogram entry
+ * by NewSampleCount/OldSampleCount so that the histogram
+ * is now adjusted to the new sample count.
+ * @param Buckets histogram data structure to adjust
+ * @param NewSampleCount  new sample count to adjust to
+ */
+static void AdjustBuckets(BUCKETS *Buckets, uint32_t NewSampleCount) {
+  int i;
+  double AdjustFactor;
+
+  AdjustFactor = ((static_cast<double>(NewSampleCount)) /
+    (static_cast<double>(Buckets->SampleCount)));
+
+  for (i = 0; i < Buckets->NumberOfBuckets; i++) {
+    Buckets->ExpectedCount[i] *= AdjustFactor;
+  }
+
+  Buckets->SampleCount = NewSampleCount;
+
+}                                // AdjustBuckets
+
+/**
+ * This routine sets the bucket counts in the specified histogram
+ * to zero.
+ * @param Buckets histogram data structure to init
+ */
+static void InitBuckets(BUCKETS *Buckets) {
+  int i;
+
+  for (i = 0; i < Buckets->NumberOfBuckets; i++) {
+    Buckets->Count[i] = 0;
+  }
+
+}                                // InitBuckets
+
+/**
+ * This routine is used to search a list of structures which
+ * hold pre-computed chi-squared values for a chi-squared
+ * value whose corresponding alpha field matches the alpha
+ * field of SearchKey.
+ *
+ * It is called by the list search routines.
+ *
+ * @param arg1 chi-squared struct being tested for a match
+ * @param arg2 chi-squared struct that is the search key
+ * @return true if ChiStruct's Alpha matches SearchKey's Alpha
+ */
+static int AlphaMatch(void *arg1,    //CHISTRUCT                             *ChiStruct,
+               void *arg2) {  //CHISTRUCT                             *SearchKey)
+  auto *ChiStruct = static_cast<CHISTRUCT *>(arg1);
+  auto *SearchKey = static_cast<CHISTRUCT *>(arg2);
+
+  return (ChiStruct->Alpha == SearchKey->Alpha);
+
+}                                // AlphaMatch
+
+/**
+ * This routine allocates a new data structure which is used
+ * to hold a chi-squared value along with its associated
+ * number of degrees of freedom and alpha value.
+ *
+ * @param DegreesOfFreedom  degrees of freedom for new chi value
+ * @param Alpha     confidence level for new chi value
+ * @return newly allocated data structure
+ */
+static CHISTRUCT *NewChiStruct(uint16_t DegreesOfFreedom, double Alpha) {
+  CHISTRUCT *NewChiStruct;
+
+  NewChiStruct = static_cast<CHISTRUCT *>(malloc (sizeof (CHISTRUCT)));
+  NewChiStruct->DegreesOfFreedom = DegreesOfFreedom;
+  NewChiStruct->Alpha = Alpha;
+  return (NewChiStruct);
+
+}                                // NewChiStruct
+
+/**
+ * This routine attempts to find an x value at which Function
+ * goes to zero (i.e. a root of the function).  It will only
+ * work correctly if a solution actually exists and there
+ * are no extrema between the solution and the InitialGuess.
+ * The algorithms used are extremely primitive.
+ *
+ * @param Function  function whose zero is to be found
+ * @param FunctionParams  arbitrary data to pass to function
+ * @param InitialGuess  point to start solution search at
+ * @param Accuracy  maximum allowed error
+ * @return Solution of function (x for which f(x) = 0).
+ */
+static double
+Solve (SOLVEFUNC Function,
+void *FunctionParams, double InitialGuess, double Accuracy)
+#define INITIALDELTA    0.1
+#define  DELTARATIO     0.1
+{
+  double x;
+  double f;
+  double Slope;
+  double Delta;
+  double NewDelta;
+  double xDelta;
+  double LastPosX, LastNegX;
+
+  x = InitialGuess;
+  Delta = INITIALDELTA;
+  LastPosX = FLT_MAX;
+  LastNegX = -FLT_MAX;
+  f = (*Function) (static_cast<CHISTRUCT *>(FunctionParams), x);
+  while (Abs (LastPosX - LastNegX) > Accuracy) {
+    // keep track of outer bounds of current estimate
+    if (f < 0)
+      LastNegX = x;
+    else
+      LastPosX = x;
+
+    // compute the approx. slope of f(x) at the current point
+    Slope =
+      ((*Function) (static_cast<CHISTRUCT *>(FunctionParams), x + Delta) - f) / Delta;
+
+    // compute the next solution guess */
+    xDelta = f / Slope;
+    x -= xDelta;
+
+    // reduce the delta used for computing slope to be a fraction of
+    //the amount moved to get to the new guess
+    NewDelta = Abs (xDelta) * DELTARATIO;
+    if (NewDelta < Delta)
+      Delta = NewDelta;
+
+    // compute the value of the function at the new guess
+    f = (*Function) (static_cast<CHISTRUCT *>(FunctionParams), x);
+  }
+  return (x);
+
+}                                // Solve
+
+/**
+ * This routine computes the area under a chi density curve
+ * from 0 to x, minus the desired area under the curve.  The
+ * number of degrees of freedom of the chi curve is specified
+ * in the ChiParams structure.  The desired area is also
+ * specified in the ChiParams structure as Alpha (or 1 minus
+ * the desired area).  This routine is intended to be passed
+ * to the Solve() function to find the value of chi-squared
+ * which will yield a desired area under the right tail of
+ * the chi density curve.  The function will only work for
+ * even degrees of freedom.  The equations are based on
+ * integrating the chi density curve in parts to obtain
+ * a series that can be used to compute the area under the
+ * curve.
+ * @param ChiParams contains degrees of freedom and alpha
+ * @param x   value of chi-squared to evaluate
+ * @return Error between actual and desired area under the chi curve.
+ */
+static double ChiArea(CHISTRUCT *ChiParams, double x) {
+  int i, N;
+  double SeriesTotal;
+  double Denominator;
+  double PowerOfx;
+
+  N = ChiParams->DegreesOfFreedom / 2 - 1;
+  SeriesTotal = 1;
+  Denominator = 1;
+  PowerOfx = 1;
+  for (i = 1; i <= N; i++) {
+    Denominator *= 2 * i;
+    PowerOfx *= x;
+    SeriesTotal += PowerOfx / Denominator;
+  }
+  return ((SeriesTotal * exp (-0.5 * x)) - ChiParams->Alpha);
+
+}                                // ChiArea
+
+/**
+ * This routine looks at all samples in the specified cluster.
+ * It computes a running estimate of the percentage of the
+ * characters which have more than 1 sample in the cluster.
+ * When this percentage exceeds MaxIllegal, true is returned.
+ * Otherwise false is returned.  The CharID
+ * fields must contain integers which identify the training
+ * characters which were used to generate the sample.  One
+ * integer is used for each sample.  The NumChar field in
+ * the Clusterer must contain the number of characters in the
+ * training set.  All CharID fields must be between 0 and
+ * NumChar-1.  The main function of this routine is to help
+ * identify clusters which need to be split further, i.e. if
+ * numerous training characters have 2 or more features which are
+ * contained in the same cluster, then the cluster should be
+ * split.
+ *
+ * @param Clusterer data structure holding cluster tree
+ * @param Cluster   cluster containing samples to be tested
+ * @param MaxIllegal  max percentage of samples allowed to have
+ *        more than 1 feature in the cluster
+ * @return true if the cluster should be split, false otherwise.
+ */
+static bool
+MultipleCharSamples(CLUSTERER* Clusterer,
+                    CLUSTER* Cluster, float MaxIllegal)
+#define ILLEGAL_CHAR    2
+{
+  static std::vector<uint8_t> CharFlags;
+  LIST SearchState;
+  SAMPLE *Sample;
+  int32_t CharID;
+  int32_t NumCharInCluster;
+  int32_t NumIllegalInCluster;
+  float PercentIllegal;
+
+  // initial estimate assumes that no illegal chars exist in the cluster
+  NumCharInCluster = Cluster->SampleCount;
+  NumIllegalInCluster = 0;
+
+  if (Clusterer->NumChar > CharFlags.size()) {
+    CharFlags.resize(Clusterer->NumChar);
+  }
+
+  for (auto& CharFlag : CharFlags)
+    CharFlag = false;
+
+  // find each sample in the cluster and check if we have seen it before
+  InitSampleSearch(SearchState, Cluster);
+  while ((Sample = NextSample (&SearchState)) != nullptr) {
+    CharID = Sample->CharID;
+    if (CharFlags[CharID] == false) {
+      CharFlags[CharID] = true;
+    }
+    else {
+      if (CharFlags[CharID] == true) {
+        NumIllegalInCluster++;
+        CharFlags[CharID] = ILLEGAL_CHAR;
+      }
+      NumCharInCluster--;
+      PercentIllegal = static_cast<float>(NumIllegalInCluster) / NumCharInCluster;
+      if (PercentIllegal > MaxIllegal) {
+        destroy(SearchState);
+        return true;
+      }
+    }
+  }
+  return false;
+
+}                                // MultipleCharSamples
+
+/**
+ * Compute the inverse of a matrix using LU decomposition with partial pivoting.
+ * The return value is the sum of norms of the off-diagonal terms of the
+ * product of a and inv. (A measure of the error.)
+ */
+static double InvertMatrix(const float* input, int size, float* inv) {
+  // Allocate memory for the 2D arrays.
+  GENERIC_2D_ARRAY<double> U(size, size, 0.0);
+  GENERIC_2D_ARRAY<double> U_inv(size, size, 0.0);
+  GENERIC_2D_ARRAY<double> L(size, size, 0.0);
+
+  // Initialize the working matrices. U starts as input, L as I and U_inv as O.
+  int row;
+  int col;
+  for (row = 0; row < size; row++) {
+    for (col = 0; col < size; col++) {
+      U[row][col] = input[row*size + col];
+      L[row][col] = row == col ? 1.0 : 0.0;
+      U_inv[row][col] = 0.0;
+    }
+  }
+
+  // Compute forward matrix by inversion by LU decomposition of input.
+  for (col = 0; col < size; ++col) {
+    // Find best pivot
+    int best_row = 0;
+    double best_pivot = -1.0;
+    for (row = col; row < size; ++row) {
+      if (Abs(U[row][col]) > best_pivot) {
+        best_pivot = Abs(U[row][col]);
+        best_row = row;
+      }
+    }
+    // Exchange pivot rows.
+    if (best_row != col) {
+      for (int k = 0; k < size; ++k) {
+        double tmp = U[best_row][k];
+        U[best_row][k] = U[col][k];
+        U[col][k] = tmp;
+        tmp = L[best_row][k];
+        L[best_row][k] = L[col][k];
+        L[col][k] = tmp;
+      }
+    }
+    // Now do the pivot itself.
+    for (row = col + 1; row < size; ++row) {
+      double ratio = -U[row][col] / U[col][col];
+      for (int j = col; j < size; ++j) {
+        U[row][j] += U[col][j] * ratio;
+      }
+      for (int k = 0; k < size; ++k) {
+        L[row][k] += L[col][k] * ratio;
+      }
+    }
+  }
+  // Next invert U.
+  for (col = 0; col < size; ++col) {
+    U_inv[col][col] = 1.0 / U[col][col];
+    for (row = col - 1; row >= 0; --row) {
+      double total = 0.0;
+      for (int k = col; k > row; --k) {
+        total += U[row][k] * U_inv[k][col];
+      }
+      U_inv[row][col] = -total / U[row][row];
+    }
+  }
+  // Now the answer is U_inv.L.
+  for (row = 0; row < size; row++) {
+    for (col = 0; col < size; col++) {
+      double sum = 0.0;
+      for (int k = row; k < size; ++k) {
+        sum += U_inv[row][k] * L[k][col];
+      }
+      inv[row*size + col] = sum;
+    }
+  }
+  // Check matrix product.
+  double error_sum = 0.0;
+  for (row = 0; row < size; row++) {
+    for (col = 0; col < size; col++) {
+      double sum = 0.0;
+      for (int k = 0; k < size; ++k) {
+        sum += static_cast<double>(input[row * size + k]) * inv[k * size + col];
+      }
+      if (row != col) {
+        error_sum += Abs(sum);
+      }
+    }
+  }
+  return error_sum;
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/classify/cluster.h b/tesseract/src/classify/cluster.h
new file mode 100644
index 00000000..8a6a270a
--- /dev/null
+++ b/tesseract/src/classify/cluster.h
@@ -0,0 +1,138 @@
+/******************************************************************************
+ ** Filename:   cluster.h
+ ** Purpose:    Definition of feature space clustering routines
+ ** Author:     Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *****************************************************************************/
+
+#ifndef CLUSTER_H
+#define CLUSTER_H
+
+#include "kdtree.h"
+#include "oldlist.h"
+
+namespace tesseract {
+
+struct BUCKETS;
+
+#define MINBUCKETS 5
+#define MAXBUCKETS 39
+
+/*----------------------------------------------------------------------
+          Types
+----------------------------------------------------------------------*/
+typedef struct sample {
+  bool Clustered : 1;         // true if included in a higher cluster
+  bool Prototype : 1;         // true if cluster represented by a proto
+  unsigned SampleCount : 30;  // number of samples in this cluster
+  struct sample* Left;        // ptr to left sub-cluster
+  struct sample* Right;       // ptr to right sub-cluster
+  int32_t CharID;             // identifier of char sample came from
+  float Mean[1];              // mean of cluster - SampleSize floats
+} CLUSTER;
+
+using SAMPLE = CLUSTER;  // can refer to as either sample or cluster
+
+typedef enum { spherical, elliptical, mixed, automatic } PROTOSTYLE;
+
+typedef struct {          // parameters to control clustering
+  PROTOSTYLE ProtoStyle;  // specifies types of protos to be made
+  float MinSamples;       // min # of samples per proto - % of total
+  float MaxIllegal;       // max percentage of samples in a cluster which
+                          // have more than 1 feature in that cluster
+  float Independence;     // desired independence between dimensions
+  double Confidence;      // desired confidence in prototypes created
+  int MagicSamples;       // Ideal number of samples in a cluster.
+} CLUSTERCONFIG;
+
+typedef enum { normal, uniform, D_random, DISTRIBUTION_COUNT } DISTRIBUTION;
+
+typedef union {
+  float Spherical;
+  float* Elliptical;
+} FLOATUNION;
+
+typedef struct {
+  bool Significant : 1;      // true if prototype is significant
+  bool Merged : 1;           // Merged after clustering so do not output
+                             // but kept for display purposes. If it has no
+                             // samples then it was actually merged.
+                             // Otherwise it matched an already significant
+                             // cluster.
+  unsigned Style : 2;        // spherical, elliptical, or mixed
+  unsigned NumSamples : 28;  // number of samples in the cluster
+  CLUSTER* Cluster;          // ptr to cluster which made prototype
+  DISTRIBUTION* Distrib;     // different distribution for each dimension
+  float* Mean;               // prototype mean
+  float TotalMagnitude;      // total magnitude over all dimensions
+  float LogMagnitude;        // log base e of TotalMagnitude
+  FLOATUNION Variance;       // prototype variance
+  FLOATUNION Magnitude;      // magnitude of density function
+  FLOATUNION Weight;         // weight of density function
+} PROTOTYPE;
+
+typedef struct {
+  int16_t SampleSize;       // number of parameters per sample
+  PARAM_DESC* ParamDesc;    // description of each parameter
+  int32_t NumberOfSamples;  // total number of samples being clustered
+  KDTREE* KDTree;           // for optimal nearest neighbor searching
+  CLUSTER* Root;            // ptr to root cluster of cluster tree
+  LIST ProtoList;           // list of prototypes
+  int32_t NumChar;          // # of characters represented by samples
+  // cache of reusable histograms by distribution type and number of buckets.
+  BUCKETS* bucket_cache[DISTRIBUTION_COUNT][MAXBUCKETS + 1 - MINBUCKETS];
+} CLUSTERER;
+
+typedef struct {
+  int32_t NumSamples;     // number of samples in list
+  int32_t MaxNumSamples;  // maximum size of list
+  SAMPLE* Sample[1];      // array of ptrs to sample data structures
+} SAMPLELIST;
+
+// low level cluster tree analysis routines.
+#define InitSampleSearch(S, C) \
+  (((C) == nullptr) ? (S = NIL_LIST) : (S = push(NIL_LIST, (C))))
+
+/*--------------------------------------------------------------------------
+        Public Function Prototypes
+--------------------------------------------------------------------------*/
+TESS_API
+CLUSTERER* MakeClusterer(int16_t SampleSize, const PARAM_DESC ParamDesc[]);
+
+TESS_API
+SAMPLE* MakeSample(CLUSTERER* Clusterer, const float* Feature, int32_t CharID);
+
+TESS_API
+LIST ClusterSamples(CLUSTERER* Clusterer, CLUSTERCONFIG* Config);
+
+TESS_API
+void FreeClusterer(CLUSTERER* Clusterer);
+
+TESS_API
+void FreeProtoList(LIST* ProtoList);
+
+void FreePrototype(void* arg);  // PROTOTYPE *Prototype);
+
+CLUSTER* NextSample(LIST* SearchState);
+
+float Mean(PROTOTYPE* Proto, uint16_t Dimension);
+
+float StandardDeviation(PROTOTYPE* Proto, uint16_t Dimension);
+
+TESS_API
+int32_t MergeClusters(int16_t N, PARAM_DESC ParamDesc[], int32_t n1, int32_t n2,
+                      float m[], float m1[], float m2[]);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/classify/clusttool.cpp b/tesseract/src/classify/clusttool.cpp
new file mode 100644
index 00000000..4227a1f0
--- /dev/null
+++ b/tesseract/src/classify/clusttool.cpp
@@ -0,0 +1,319 @@
+/******************************************************************************
+ ** Filename: clusttool.cpp
+ ** Purpose:  Misc. tools for use with the clustering routines
+ ** Author:   Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *****************************************************************************/
+
+#define _USE_MATH_DEFINES   // for M_PI
+
+#include "clusttool.h"
+
+#include <cmath>            // for M_PI, std::isnan
+#include <locale>           // for std::locale::classic
+#include <sstream>          // for std::stringstream
+
+namespace tesseract {
+
+//---------------Global Data Definitions and Declarations--------------------
+#define TOKENSIZE 80         ///< max size of tokens read from an input file
+#define QUOTED_TOKENSIZE "79"
+#define MAXSAMPLESIZE 65535  ///< max num of dimensions in feature space
+
+/**
+ * This routine reads N floats from the specified text file
+ * and places them into Buffer.  If Buffer is nullptr, a buffer
+ * is created and passed back to the caller.  If EOF is
+ * encountered before any floats can be read, nullptr is
+ * returned.
+ * @param fp open text file to read floats from
+ * @param N number of floats to read
+ * @param Buffer pointer to buffer to place floats into
+ * @return Pointer to buffer holding floats or nullptr if EOF
+ * @note Globals: None
+ */
+static float *ReadNFloats(TFile *fp, uint16_t N, float Buffer[]) {
+  const int kMaxLineSize = 1024;
+  char line[kMaxLineSize];
+  if (fp->FGets(line, kMaxLineSize) == nullptr) {
+    tprintf("Hit EOF in ReadNFloats!\n");
+    return nullptr;
+  }
+  bool needs_free = false;
+
+  if (Buffer == nullptr) {
+    Buffer = static_cast<float *>(malloc(N * sizeof(float)));
+    needs_free = true;
+  }
+
+  std::stringstream stream(line);
+  // Use "C" locale (needed for float values Buffer[i]).
+  stream.imbue(std::locale::classic());
+  for (uint16_t i = 0; i < N; i++) {
+    float f = NAN;
+    stream >> f;
+    if (std::isnan(f)) {
+      tprintf("Read of %u floats failed!\n", N);
+      if (needs_free) free(Buffer);
+      return nullptr;
+    }
+    Buffer[i] = f;
+  }
+  return Buffer;
+}
+
+/**
+ * This routine writes a text representation of N floats from
+ * an array to a file.  All of the floats are placed on one line.
+ * @param File open text file to write N floats to
+ * @param N number of floats to write
+ * @param Array array of floats to write
+ */
+static void WriteNFloats(FILE * File, uint16_t N, float Array[]) {
+  for (int i = 0; i < N; i++)
+    fprintf(File, " %9.6f", Array[i]);
+  fprintf(File, "\n");
+}
+
+/**
+ * This routine writes to the specified text file a word
+ * which represents the ProtoStyle.  It does not append
+ * a carriage return to the end.
+ * @param File open text file to write prototype style to
+ * @param ProtoStyle prototype style to write
+ */
+static void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) {
+  switch (ProtoStyle) {
+    case spherical:
+      fprintf (File, "spherical");
+      break;
+    case elliptical:
+      fprintf (File, "elliptical");
+      break;
+    case mixed:
+      fprintf (File, "mixed");
+      break;
+    case automatic:
+      fprintf (File, "automatic");
+      break;
+  }
+}
+
+/**
+ * This routine reads a single integer from the specified
+ * file and checks to ensure that it is between 0 and
+ * MAXSAMPLESIZE.
+ * @param fp open text file to read sample size from
+ * @return Sample size
+ * @note Globals: None
+ */
+uint16_t ReadSampleSize(TFile *fp) {
+  int SampleSize = 0;
+
+  const int kMaxLineSize = 100;
+  char line[kMaxLineSize];
+  ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr);
+  ASSERT_HOST(sscanf(line, "%d", &SampleSize) == 1);
+  ASSERT_HOST(SampleSize >= 0 && SampleSize <= MAXSAMPLESIZE);
+  return SampleSize;
+}
+
+/**
+ * This routine reads textual descriptions of sets of parameters
+ * which describe the characteristics of feature dimensions.
+ *
+ * @param fp open text file to read N parameter descriptions from
+ * @param N number of parameter descriptions to read
+ * @return Pointer to an array of parameter descriptors.
+ * @note Globals: None
+ */
+PARAM_DESC *ReadParamDesc(TFile *fp, uint16_t N) {
+  PARAM_DESC *ParamDesc;
+
+  ParamDesc = static_cast<PARAM_DESC *>(malloc (N * sizeof (PARAM_DESC)));
+  for (int i = 0; i < N; i++) {
+    const int kMaxLineSize = TOKENSIZE * 4;
+    char line[kMaxLineSize];
+    ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr);
+    std::istringstream stream(line);
+    // Use "C" locale (needed for float values Min, Max).
+    stream.imbue(std::locale::classic());
+    std::string linear_token;
+    stream >> linear_token;
+    std::string essential_token;
+    stream >> essential_token;
+    stream >> ParamDesc[i].Min;
+    stream >> ParamDesc[i].Max;
+    ASSERT_HOST(!stream.fail());
+    ParamDesc[i].Circular = (linear_token[0] == 'c');
+    ParamDesc[i].NonEssential = (essential_token[0] != 'e');
+    ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
+    ParamDesc[i].HalfRange = ParamDesc[i].Range / 2;
+    ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
+  }
+  return (ParamDesc);
+}
+
+/**
+ * This routine reads a textual description of a prototype from
+ * the specified file.
+ *
+ * @param fp open text file to read prototype from
+ * @param N number of dimensions used in prototype
+ * @return List of prototypes
+ * @note Globals: None
+ */
+PROTOTYPE *ReadPrototype(TFile *fp, uint16_t N) {
+  char sig_token[TOKENSIZE], shape_token[TOKENSIZE];
+  PROTOTYPE *Proto;
+  int SampleCount;
+  int i;
+
+  const int kMaxLineSize = TOKENSIZE * 4;
+  char line[kMaxLineSize];
+  if (fp->FGets(line, kMaxLineSize) == nullptr ||
+      sscanf(line, "%" QUOTED_TOKENSIZE "s %" QUOTED_TOKENSIZE "s %d",
+             sig_token, shape_token, &SampleCount) != 3) {
+    tprintf("Invalid prototype: %s\n", line);
+    return nullptr;
+  }
+  Proto = static_cast<PROTOTYPE *>(malloc(sizeof(PROTOTYPE)));
+  Proto->Cluster = nullptr;
+  Proto->Significant = (sig_token[0] == 's');
+
+  switch (shape_token[0]) {
+    case 's':
+      Proto->Style = spherical;
+      break;
+    case 'e':
+      Proto->Style = elliptical;
+      break;
+    case 'a':
+      Proto->Style = automatic;
+      break;
+    default:
+      tprintf("Invalid prototype style specification:%s\n", shape_token);
+      Proto->Style = elliptical;
+  }
+
+  ASSERT_HOST(SampleCount >= 0);
+  Proto->NumSamples = SampleCount;
+
+  Proto->Mean = ReadNFloats(fp, N, nullptr);
+  ASSERT_HOST(Proto->Mean != nullptr);
+
+  switch (Proto->Style) {
+    case spherical:
+      ASSERT_HOST(ReadNFloats(fp, 1, &(Proto->Variance.Spherical)) != nullptr);
+      Proto->Magnitude.Spherical =
+          1.0 / sqrt(2.0 * M_PI * Proto->Variance.Spherical);
+      Proto->TotalMagnitude = pow(Proto->Magnitude.Spherical, static_cast<float>(N));
+      Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude));
+      Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
+      Proto->Distrib = nullptr;
+      break;
+    case elliptical:
+      Proto->Variance.Elliptical = ReadNFloats(fp, N, nullptr);
+      ASSERT_HOST(Proto->Variance.Elliptical != nullptr);
+      Proto->Magnitude.Elliptical = static_cast<float *>(malloc(N * sizeof(float)));
+      Proto->Weight.Elliptical = static_cast<float *>(malloc(N * sizeof(float)));
+      Proto->TotalMagnitude = 1.0;
+      for (i = 0; i < N; i++) {
+        Proto->Magnitude.Elliptical[i] =
+            1.0 / sqrt(2.0 * M_PI * Proto->Variance.Elliptical[i]);
+        Proto->Weight.Elliptical[i] = 1.0 / Proto->Variance.Elliptical[i];
+        Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
+      }
+      Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude));
+      Proto->Distrib = nullptr;
+      break;
+    default:
+      free(Proto);
+      tprintf("Invalid prototype style\n");
+      return nullptr;
+  }
+  return Proto;
+}
+
+/**
+ * This routine writes an array of dimension descriptors to
+ * the specified text file.
+ * @param File open text file to write param descriptors to
+ * @param N number of param descriptors to write
+ * @param ParamDesc array of param descriptors to write
+ */
+void WriteParamDesc(FILE *File, uint16_t N, const PARAM_DESC ParamDesc[]) {
+  int i;
+
+  for (i = 0; i < N; i++) {
+    if (ParamDesc[i].Circular)
+      fprintf (File, "circular ");
+    else
+      fprintf (File, "linear   ");
+
+    if (ParamDesc[i].NonEssential)
+      fprintf (File, "non-essential ");
+    else
+      fprintf (File, "essential     ");
+
+    fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max);
+  }
+}
+
+/**
+ * This routine writes a textual description of a prototype
+ * to the specified text file.
+ * @param File open text file to write prototype to
+ * @param N number of dimensions in feature space
+ * @param Proto prototype to write out
+ */
+void WritePrototype(FILE *File, uint16_t N, PROTOTYPE *Proto) {
+  int i;
+
+  if (Proto->Significant)
+    fprintf (File, "significant   ");
+  else
+    fprintf (File, "insignificant ");
+  WriteProtoStyle (File, static_cast<PROTOSTYLE>(Proto->Style));
+  fprintf (File, "%6d\n\t", Proto->NumSamples);
+  WriteNFloats (File, N, Proto->Mean);
+  fprintf (File, "\t");
+
+  switch (Proto->Style) {
+    case spherical:
+      WriteNFloats (File, 1, &(Proto->Variance.Spherical));
+      break;
+    case elliptical:
+      WriteNFloats (File, N, Proto->Variance.Elliptical);
+      break;
+    case mixed:
+      for (i = 0; i < N; i++)
+      switch (Proto->Distrib[i]) {
+        case normal:
+          fprintf (File, " %9s", "normal");
+          break;
+        case uniform:
+          fprintf (File, " %9s", "uniform");
+          break;
+        case D_random:
+          fprintf (File, " %9s", "random");
+          break;
+        case DISTRIBUTION_COUNT:
+          ASSERT_HOST(!"Distribution count not allowed!");
+      }
+      fprintf (File, "\n\t");
+      WriteNFloats (File, N, Proto->Variance.Elliptical);
+  }
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/classify/clusttool.h b/tesseract/src/classify/clusttool.h
new file mode 100644
index 00000000..ead65618
--- /dev/null
+++ b/tesseract/src/classify/clusttool.h
@@ -0,0 +1,43 @@
+/******************************************************************************
+ ** Filename: clusttool.h
+ ** Purpose:  Definition of clustering utility tools
+ ** Author:   Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#ifndef TESSERACT_CLASSIFY_CLUSTTOOL_H_
+#define TESSERACT_CLASSIFY_CLUSTTOOL_H_
+
+#include "cluster.h"
+
+#include "serialis.h"
+
+#include <cstdio>
+
+namespace tesseract {
+
+uint16_t ReadSampleSize(tesseract::TFile *fp);
+
+PARAM_DESC *ReadParamDesc(tesseract::TFile *fp, uint16_t N);
+
+PROTOTYPE *ReadPrototype(tesseract::TFile *fp, uint16_t N);
+
+TESS_API
+void WriteParamDesc(FILE *File, uint16_t N, const PARAM_DESC ParamDesc[]);
+
+TESS_API
+void WritePrototype(FILE *File, uint16_t N, PROTOTYPE *Proto);
+
+} // namespace tesseract
+
+#endif  // TESSERACT_CLASSIFY_CLUSTTOOL_H_
diff --git a/tesseract/src/classify/cutoffs.cpp b/tesseract/src/classify/cutoffs.cpp
new file mode 100644
index 00000000..f75788d8
--- /dev/null
+++ b/tesseract/src/classify/cutoffs.cpp
@@ -0,0 +1,73 @@
+/******************************************************************************
+ ** Filename:    cutoffs.c
+ ** Purpose:     Routines to manipulate an array of class cutoffs.
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+/*----------------------------------------------------------------------------
+          Include Files and Type Defines
+----------------------------------------------------------------------------*/
+
+#include <cstdio>
+#include <sstream>    // for std::istringstream
+#include <string>     // for std::string
+
+#include "classify.h"
+#include "helpers.h"
+#include "serialis.h"
+#include <tesseract/unichar.h>
+
+#define MAX_CUTOFF      1000
+
+namespace tesseract {
+/**
+ * Open file, read in all of the class-id/cutoff pairs
+ * and insert them into the Cutoffs array.  Cutoffs are
+ * indexed in the array by class id.  Unused entries in the
+ * array are set to an arbitrarily high cutoff value.
+ * @param fp file containing cutoff definitions
+ * @param Cutoffs array to put cutoffs into
+ */
+void Classify::ReadNewCutoffs(TFile* fp, uint16_t* Cutoffs) {
+  int Cutoff;
+
+  if (shape_table_ != nullptr) {
+    if (!shapetable_cutoffs_.DeSerialize(fp)) {
+      tprintf("Error during read of shapetable pffmtable!\n");
+    }
+  }
+  for (int i = 0; i < MAX_NUM_CLASSES; i++)
+    Cutoffs[i] = MAX_CUTOFF;
+
+  const int kMaxLineSize = 100;
+  char line[kMaxLineSize];
+  while (fp->FGets(line, kMaxLineSize) != nullptr) {
+    std::string Class;
+    CLASS_ID ClassId;
+    std::istringstream stream(line);
+    stream.imbue(std::locale::classic());
+    stream >> Class >> Cutoff;
+    if (stream.fail()) {
+      break;
+    }
+    if (Class.compare("NULL") == 0) {
+      ClassId = unicharset.unichar_to_id(" ");
+    } else {
+      ClassId = unicharset.unichar_to_id(Class.c_str());
+    }
+    ASSERT_HOST(ClassId >= 0 && ClassId < MAX_NUM_CLASSES);
+    Cutoffs[ClassId] = Cutoff;
+  }
+}
+
+}  // namespace tesseract
diff --git a/tesseract/src/classify/featdefs.cpp b/tesseract/src/classify/featdefs.cpp
new file mode 100644
index 00000000..54647431
--- /dev/null
+++ b/tesseract/src/classify/featdefs.cpp
@@ -0,0 +1,280 @@
+/******************************************************************************
+ ** Filename:    featdefs.cpp
+ ** Purpose:     Definitions of currently defined feature types.
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#include "featdefs.h"
+
+#include "picofeat.h"  // for PicoFeatureLength
+#include "scanutils.h"
+
+#include <cstring>
+#include <cstdio>
+
+namespace tesseract {
+
+#define PICO_FEATURE_LENGTH 0.05
+
+/*-----------------------------------------------------------------------------
+        Global Data Definitions and Declarations
+-----------------------------------------------------------------------------*/
+const char* const kMicroFeatureType = "mf";
+const char* const kCNFeatureType = "cn";
+const char* const kIntFeatureType = "if";
+const char* const kGeoFeatureType = "tb";
+
+// Define all of the parameters for the MicroFeature type.
+StartParamDesc(MicroFeatureParams)
+DefineParam(0, 0, -0.5, 0.5)
+DefineParam(0, 0, -0.25, 0.75)
+DefineParam(0, 1, 0.0, 1.0)
+DefineParam(1, 0, 0.0, 1.0)
+DefineParam (0, 1, -0.5, 0.5)
+DefineParam (0, 1, -0.5, 0.5)
+EndParamDesc
+// Now define the feature type itself (see features.h for parameters).
+DefineFeature(MicroFeatureDesc, 5, 1, kMicroFeatureType, MicroFeatureParams)
+
+// Define all of the parameters for the NormFeat type.
+StartParamDesc (CharNormParams)
+DefineParam(0, 0, -0.25, 0.75)
+DefineParam(0, 1, 0.0, 1.0)
+DefineParam(0, 0, 0.0, 1.0)
+DefineParam(0, 0, 0.0, 1.0)
+EndParamDesc
+// Now define the feature type itself (see features.h for parameters).
+DefineFeature(CharNormDesc, 4, 0, kCNFeatureType, CharNormParams)
+
+// Define all of the parameters for the IntFeature type
+StartParamDesc(IntFeatParams)
+DefineParam(0, 0, 0.0, 255.0)
+DefineParam(0, 0, 0.0, 255.0)
+DefineParam(1, 0, 0.0, 255.0)
+EndParamDesc
+// Now define the feature type itself (see features.h for parameters).
+DefineFeature(IntFeatDesc, 2, 1, kIntFeatureType, IntFeatParams)
+
+// Define all of the parameters for the GeoFeature type
+StartParamDesc(GeoFeatParams)
+DefineParam(0, 0, 0.0, 255.0)
+DefineParam(0, 0, 0.0, 255.0)
+DefineParam(0, 0, 0.0, 255.0)
+EndParamDesc
+// Now define the feature type itself (see features.h for parameters).
+DefineFeature(GeoFeatDesc, 3, 0, kGeoFeatureType, GeoFeatParams)
+
+// Other features used for training the adaptive classifier, but not used
+// during normal training, therefore not in the DescDefs array.
+
+// Define all of the parameters for the PicoFeature type
+// define knob that can be used to adjust pico-feature length.
+float PicoFeatureLength = PICO_FEATURE_LENGTH;
+StartParamDesc(PicoFeatParams)
+DefineParam(0, 0, -0.25, 0.75)
+DefineParam(1, 0, 0.0, 1.0)
+DefineParam(0, 0, -0.5, 0.5)
+EndParamDesc
+// Now define the feature type itself (see features.h for parameters).
+DefineFeature(PicoFeatDesc, 2, 1, "pf", PicoFeatParams)
+
+// Define all of the parameters for the OutlineFeature type.
+StartParamDesc(OutlineFeatParams)
+DefineParam(0, 0, -0.5, 0.5)
+DefineParam(0, 0, -0.25, 0.75)
+DefineParam(0, 0, 0.0, 1.0)
+DefineParam(1, 0, 0.0, 1.0)
+EndParamDesc
+// Now define the feature type itself (see features.h for parameters).
+DefineFeature(OutlineFeatDesc, 3, 1, "of", OutlineFeatParams)
+
+// MUST be kept in-sync with ExtractorDefs in fxdefs.cpp.
+static const FEATURE_DESC_STRUCT *DescDefs[NUM_FEATURE_TYPES] = {
+  &MicroFeatureDesc,
+  &CharNormDesc,
+  &IntFeatDesc,
+  &GeoFeatDesc
+};
+
+/*-----------------------------------------------------------------------------
+              Public Code
+-----------------------------------------------------------------------------*/
+void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs) {
+  featuredefs->NumFeatureTypes = NUM_FEATURE_TYPES;
+  for (int i = 0; i < NUM_FEATURE_TYPES; ++i) {
+    featuredefs->FeatureDesc[i] = DescDefs[i];
+  }
+}
+
+/*---------------------------------------------------------------------------*/
+/**
+ * Release the memory consumed by the specified character
+ * description and all of the features in that description.
+ *
+ * @param CharDesc character description to be deallocated
+ *
+ * Globals:
+ * - none
+ */
+void FreeCharDescription(CHAR_DESC CharDesc) {
+  if (CharDesc) {
+    for (size_t i = 0; i < CharDesc->NumFeatureSets; i++)
+      FreeFeatureSet (CharDesc->FeatureSets[i]);
+    free(CharDesc);
+  }
+}                                /* FreeCharDescription */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * Allocate a new character description, initialize its
+ * feature sets to be empty, and return it.
+ *
+ * Globals:
+ * - none
+ *
+ * @return New character description structure.
+ */
+CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs) {
+  CHAR_DESC CharDesc;
+  CharDesc = static_cast<CHAR_DESC>(malloc (sizeof (CHAR_DESC_STRUCT)));
+  CharDesc->NumFeatureSets = FeatureDefs.NumFeatureTypes;
+
+  for (size_t i = 0; i < CharDesc->NumFeatureSets; i++)
+    CharDesc->FeatureSets[i] = nullptr;
+
+  return (CharDesc);
+}                                /* NewCharDescription */
+
+/*---------------------------------------------------------------------------*/
+/**
+ * Appends a textual representation of CharDesc to str.
+ * The format used is to write out the number of feature
+ * sets which will be written followed by a representation of
+ * each feature set.
+ *
+ * Each set starts with the short name for that feature followed
+ * by a description of the feature set.  Feature sets which are
+ * not present are not written.
+ *
+ * @param FeatureDefs    definitions of feature types/extractors
+ * @param str            string to append CharDesc to
+ * @param CharDesc       character description to write to File
+ */
+void WriteCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs,
+                          CHAR_DESC CharDesc, STRING* str) {
+  int NumSetsToWrite = 0;
+
+  for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++)
+    if (CharDesc->FeatureSets[Type])
+      NumSetsToWrite++;
+
+  str->add_str_int(" ", NumSetsToWrite);
+  *str += "\n";
+  for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) {
+    if (CharDesc->FeatureSets[Type]) {
+      *str += FeatureDefs.FeatureDesc[Type]->ShortName;
+      *str += " ";
+      WriteFeatureSet(CharDesc->FeatureSets[Type], str);
+    }
+  }
+}                                /* WriteCharDescription */
+
+// Return whether all of the fields of the given feature set
+// are well defined (not inf or nan).
+bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
+                          CHAR_DESC CharDesc) {
+  bool anything_written = false;
+  bool well_formed = true;
+  for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) {
+    if (CharDesc->FeatureSets[Type]) {
+      for (int i = 0; i < CharDesc->FeatureSets[Type]->NumFeatures; i++) {
+        FEATURE feat = CharDesc->FeatureSets[Type]->Features[i];
+        for (int p = 0; p < feat->Type->NumParams; p++) {
+          if (std::isnan(feat->Params[p]) || std::isinf(feat->Params[p]))
+            well_formed = false;
+          else
+            anything_written = true;
+        }
+      }
+    } else {
+      return false;
+    }
+  }
+  return anything_written && well_formed;
+}                                /* ValidCharDescription */
+
+/*---------------------------------------------------------------------------*/
+/**
+ * Read a character description from File, and return
+ * a data structure containing this information.  The data
+ * is formatted as follows:
+ * @verbatim
+     NumberOfSets
+             ShortNameForSet1 Set1
+             ShortNameForSet2 Set2
+             ...
+   @endverbatim
+ *
+ * Globals:
+ * - none
+ *
+ * @param FeatureDefs    definitions of feature types/extractors
+ * @param File open text file to read character description from
+ * @return Character description read from File.
+ */
+CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
+                              FILE *File) {
+  int NumSetsToRead;
+  char ShortName[FEAT_NAME_SIZE];
+  CHAR_DESC CharDesc;
+  int Type;
+
+  ASSERT_HOST(tfscanf(File, "%d", &NumSetsToRead) == 1);
+  ASSERT_HOST(NumSetsToRead >= 0);
+  ASSERT_HOST(NumSetsToRead <= FeatureDefs.NumFeatureTypes);
+
+  CharDesc = NewCharDescription(FeatureDefs);
+  for (; NumSetsToRead > 0; NumSetsToRead--) {
+    tfscanf(File, "%s", ShortName);
+    Type = ShortNameToFeatureType(FeatureDefs, ShortName);
+    CharDesc->FeatureSets[Type] =
+      ReadFeatureSet (File, FeatureDefs.FeatureDesc[Type]);
+  }
+  return CharDesc;
+}
+
+/*---------------------------------------------------------------------------*/
+/**
+ * Search through all features currently defined and return
+ * the feature type for the feature with the specified short
+ * name.  Trap an error if the specified name is not found.
+ *
+ * Globals:
+ * - none
+ *
+ * @param FeatureDefs    definitions of feature types/extractors
+ * @param ShortName short name of a feature type
+ * @return Feature type which corresponds to ShortName.
+ */
+uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs,
+                                const char *ShortName) {
+  for (int i = 0; i < FeatureDefs.NumFeatureTypes; i++)
+    if (!strcmp ((FeatureDefs.FeatureDesc[i]->ShortName), ShortName))
+      return static_cast<uint32_t>(i);
+  ASSERT_HOST(!"Illegal short name for a feature");
+  return 0;
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/classify/featdefs.h b/tesseract/src/classify/featdefs.h
new file mode 100644
index 00000000..eb8c66fe
--- /dev/null
+++ b/tesseract/src/classify/featdefs.h
@@ -0,0 +1,87 @@
+/******************************************************************************
+ ** Filename:    featdefs.h
+ ** Purpose:     Definitions of currently defined feature types.
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#ifndef FEATDEFS_H
+#define FEATDEFS_H
+
+#include "ocrfeatures.h"
+
+namespace tesseract {
+
+/* Enumerate the different types of features currently defined. */
+#define NUM_FEATURE_TYPES 4
+extern TESS_API const char* const kMicroFeatureType;
+extern TESS_API const char* const kCNFeatureType;
+extern TESS_API const char* const kIntFeatureType;
+extern TESS_API const char* const kGeoFeatureType;
+
+/* A character is described by multiple sets of extracted features.  Each
+  set contains a number of features of a particular type, for example, a
+  set of bays, or a set of closures, or a set of microfeatures.  Each
+  feature consists of a number of parameters.  All features within a
+  feature set contain the same number of parameters.*/
+
+struct CHAR_DESC_STRUCT {
+  uint32_t NumFeatureSets;
+  FEATURE_SET FeatureSets[NUM_FEATURE_TYPES];
+};
+using CHAR_DESC = CHAR_DESC_STRUCT *;
+
+struct FEATURE_DEFS_STRUCT {
+  int32_t NumFeatureTypes;
+  const FEATURE_DESC_STRUCT* FeatureDesc[NUM_FEATURE_TYPES];
+};
+using FEATURE_DEFS = FEATURE_DEFS_STRUCT *;
+
+/*----------------------------------------------------------------------
+    Generic functions for manipulating character descriptions
+----------------------------------------------------------------------*/
+TESS_API
+void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs);
+
+TESS_API
+void FreeCharDescription(CHAR_DESC CharDesc);
+
+CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs);
+
+bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
+                          CHAR_DESC CharDesc);
+
+void WriteCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs,
+                          CHAR_DESC CharDesc, STRING* str);
+
+TESS_API
+CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
+                              FILE *File);
+
+TESS_API
+uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs,
+                                const char *ShortName);
+
+/**----------------------------------------------------------------------------
+        Global Data Definitions and Declarations
+----------------------------------------------------------------------------**/
+extern const FEATURE_DESC_STRUCT MicroFeatureDesc;
+extern TESS_API const FEATURE_DESC_STRUCT PicoFeatDesc;
+extern const FEATURE_DESC_STRUCT CharNormDesc;
+extern const FEATURE_DESC_STRUCT OutlineFeatDesc;
+extern const FEATURE_DESC_STRUCT IntFeatDesc;
+extern const FEATURE_DESC_STRUCT GeoFeatDesc;
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/classify/float2int.cpp b/tesseract/src/classify/float2int.cpp
new file mode 100644
index 00000000..1b48779b
--- /dev/null
+++ b/tesseract/src/classify/float2int.cpp
@@ -0,0 +1,109 @@
+/******************************************************************************
+ ** Filename:    float2int.cpp
+ ** Purpose:     Routines for converting float features to int features
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#include "float2int.h"
+
+#include "normmatch.h"
+#include "mfoutline.h"
+#include "classify.h"
+#include "picofeat.h"
+
+#include "helpers.h"
+
+#define MAX_INT_CHAR_NORM (INT_CHAR_NORM_RANGE - 1)
+
+/*---------------------------------------------------------------------------*/
+namespace tesseract {
+
+/**
+ * For each class in the unicharset, clears the corresponding
+ * entry in char_norm_array.  char_norm_array is indexed by unichar_id.
+ *
+ * Globals:
+ * - none
+ *
+ * @param char_norm_array array to be cleared
+ */
+void Classify::ClearCharNormArray(uint8_t* char_norm_array) {
+  memset(char_norm_array, 0, sizeof(*char_norm_array) * unicharset.size());
+}                                /* ClearCharNormArray */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * For each class in unicharset, computes the match between
+ * norm_feature and the normalization protos for that class.
+ * Converts this number to the range from 0 - 255 and stores it
+ * into char_norm_array.  CharNormArray is indexed by unichar_id.
+ *
+ * Globals:
+ * - PreTrainedTemplates current set of built-in templates
+ *
+ * @param norm_feature character normalization feature
+ * @param[out] char_norm_array place to put results of size unicharset.size()
+ */
+void Classify::ComputeIntCharNormArray(const FEATURE_STRUCT& norm_feature,
+                                       uint8_t* char_norm_array) {
+  for (int i = 0; i < unicharset.size(); i++) {
+    if (i < PreTrainedTemplates->NumClasses) {
+      int norm_adjust = static_cast<int>(INT_CHAR_NORM_RANGE *
+        ComputeNormMatch(i, norm_feature, false));
+      char_norm_array[i] = ClipToRange(norm_adjust, 0, MAX_INT_CHAR_NORM);
+    } else {
+      // Classes with no templates (eg. ambigs & ligatures) default
+      // to worst match.
+      char_norm_array[i] = MAX_INT_CHAR_NORM;
+    }
+  }
+}                                /* ComputeIntCharNormArray */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine converts each floating point pico-feature
+ * in Features into integer format and saves it into
+ * IntFeatures.
+ *
+ * Globals:
+ * - none
+ *
+ * @param Features floating point pico-features to be converted
+ * @param[out] IntFeatures array to put converted features into
+ */
+void Classify::ComputeIntFeatures(FEATURE_SET Features,
+                                  INT_FEATURE_ARRAY IntFeatures) {
+  float YShift;
+
+  if (classify_norm_method == baseline)
+    YShift = BASELINE_Y_SHIFT;
+  else
+    YShift = Y_SHIFT;
+
+  for (int Fid = 0; Fid < Features->NumFeatures; Fid++) {
+    FEATURE Feature = Features->Features[Fid];
+
+    IntFeatures[Fid].X =
+        Bucket8For(Feature->Params[PicoFeatX], X_SHIFT, INT_FEAT_RANGE);
+    IntFeatures[Fid].Y =
+        Bucket8For(Feature->Params[PicoFeatY], YShift, INT_FEAT_RANGE);
+    IntFeatures[Fid].Theta = CircBucketFor(Feature->Params[PicoFeatDir],
+                                           ANGLE_SHIFT, INT_FEAT_RANGE);
+    IntFeatures[Fid].CP_misses = 0;
+  }
+}                                /* ComputeIntFeatures */
+
+}  // namespace tesseract
diff --git a/tesseract/src/classify/float2int.h b/tesseract/src/classify/float2int.h
new file mode 100644
index 00000000..70a05ab6
--- /dev/null
+++ b/tesseract/src/classify/float2int.h
@@ -0,0 +1,30 @@
+/******************************************************************************
+ ** Filename:    float2int.h
+ ** Purpose:     Routines for converting float features to int features
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#ifndef FLOAT2INT_H
+#define FLOAT2INT_H
+
+/*-----------------------------------------------------------------------------
+          Include Files and Type Defines
+-----------------------------------------------------------------------------*/
+#include "intmatcher.h"
+#include "ocrfeatures.h"
+
+#define INT_FEAT_RANGE    256
+#define BASELINE_Y_SHIFT  (0.25)
+
+#endif
diff --git a/tesseract/src/classify/fpoint.cpp b/tesseract/src/classify/fpoint.cpp
new file mode 100644
index 00000000..333b1fc7
--- /dev/null
+++ b/tesseract/src/classify/fpoint.cpp
@@ -0,0 +1,54 @@
+/******************************************************************************
+ ** Filename:    fpoint.cpp
+ ** Purpose:     Abstract data type for a 2D point (floating point coords)
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+/*----------------------------------------------------------------------------
+          Include Files and Type Defines
+----------------------------------------------------------------------------*/
+#define _USE_MATH_DEFINES       // for M_PI
+#include "fpoint.h"
+#include <cstdio>
+#include <cmath>                // for M_PI
+
+/*----------------------------------------------------------------------------
+              Public Code
+----------------------------------------------------------------------------*/
+
+float DistanceBetween(FPOINT A, FPOINT B) {
+  const double xd = XDelta(A, B);
+  const double yd = YDelta(A, B);
+  return sqrt(static_cast<double>(xd * xd + yd * yd));
+}
+
+/**
+ * Return the angle from Point1 to Point2 normalized to
+ * lie in the range 0 to FullScale (where FullScale corresponds
+ * to 2*pi or 360 degrees).
+ * @param Point1 points to compute angle between
+ * @param Point2 points to compute angle between
+ * @param FullScale value to associate with 2*pi
+ * @return angle
+ */
+float NormalizedAngleFrom(FPOINT *Point1, FPOINT *Point2, float FullScale) {
+  float NumRadsInCircle = 2.0 * M_PI;
+
+  float Angle = AngleFrom (*Point1, *Point2);
+  if (Angle < 0.0)
+    Angle += NumRadsInCircle;
+  Angle *= FullScale / NumRadsInCircle;
+  if (Angle < 0.0 || Angle >= FullScale)
+    Angle = 0.0;
+  return (Angle);
+}
diff --git a/tesseract/src/classify/fpoint.h b/tesseract/src/classify/fpoint.h
new file mode 100644
index 00000000..93f5a20f
--- /dev/null
+++ b/tesseract/src/classify/fpoint.h
@@ -0,0 +1,53 @@
+/******************************************************************************
+ ** Filename:    fpoint.h
+ ** Purpose:     Abstract data type for 2D points (floating point coords)
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#ifndef FPOINT_H
+#define FPOINT_H
+
+/**----------------------------------------------------------------------------
+          Include Files and Type Defines
+----------------------------------------------------------------------------**/
+#include <cmath>
+#include <cstdio>
+
+/* define data structure to hold 2D points or vectors using floating point */
+typedef struct {
+  float x, y;
+} FPOINT;
+using FVECTOR = FPOINT;
+
+/**----------------------------------------------------------------------------
+            Macros
+----------------------------------------------------------------------------**/
+/* macros for computing miscellaneous functions of 2 points */
+#define XDelta(A, B) ((B).x - (A).x)
+#define YDelta(A, B) ((B).y - (A).y)
+#define SlopeFrom(A, B) (YDelta(A, B) / XDelta(A, B))
+#define AngleFrom(A, B) (atan2((double)YDelta(A, B), (double)XDelta(A, B)))
+
+#define XIntersectionOf(A, B, X) (SlopeFrom(A, B) * ((X)-A.x) + A.y)
+
+/*-------------------------------------------------------------------------
+        Public Function Prototypes
+---------------------------------------------------------------------------*/
+
+float DistanceBetween(FPOINT A, FPOINT B);
+
+float NormalizedAngleFrom(FPOINT* Point1, FPOINT* Point2, float FullScale);
+
+#endif
diff --git a/tesseract/src/classify/intfeaturespace.cpp b/tesseract/src/classify/intfeaturespace.cpp
new file mode 100644
index 00000000..9ddd9777
--- /dev/null
+++ b/tesseract/src/classify/intfeaturespace.cpp
@@ -0,0 +1,124 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+///////////////////////////////////////////////////////////////////////
+// File:        intfeaturespace.cpp
+// Description: Indexed feature space based on INT_FEATURE_STRUCT.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#define _USE_MATH_DEFINES       // for M_PI
+#include "intfeaturespace.h"
+#include <cmath>                // for M_PI
+#include "intfx.h"
+
+namespace tesseract {
+
+IntFeatureSpace::IntFeatureSpace()
+  : x_buckets_(0), y_buckets_(0), theta_buckets_(0) {
+}
+
+void IntFeatureSpace::Init(uint8_t xbuckets, uint8_t ybuckets, uint8_t thetabuckets) {
+  x_buckets_ = xbuckets;
+  y_buckets_ = ybuckets;
+  theta_buckets_ = thetabuckets;
+}
+
+// Serializes the feature space definition to the given file.
+// Returns false on error.
+bool IntFeatureSpace::Serialize(FILE* fp) const {
+  if (fwrite(&x_buckets_, sizeof(x_buckets_), 1, fp) != 1)
+    return false;
+  if (fwrite(&y_buckets_, sizeof(y_buckets_), 1, fp) != 1)
+    return false;
+  if (fwrite(&theta_buckets_, sizeof(theta_buckets_), 1, fp) != 1)
+    return false;
+  return true;
+}
+
+// Returns an INT_FEATURE_STRUCT corresponding to the given index.
+// This is the inverse of the Index member.
+INT_FEATURE_STRUCT IntFeatureSpace::PositionFromIndex(int index) const {
+  return PositionFromBuckets(index / (y_buckets_ * theta_buckets_),
+                             index / theta_buckets_ % y_buckets_,
+                             index % theta_buckets_);
+}
+
+// Bulk calls to Index. Maps the given array of features to a vector of
+// int32_t indices in the same order as the input.
+void IntFeatureSpace::IndexFeatures(const INT_FEATURE_STRUCT* features,
+                                    int num_features,
+                                    GenericVector<int>* mapped_features) const {
+  mapped_features->truncate(0);
+  for (int f = 0; f < num_features; ++f)
+    mapped_features->push_back(Index(features[f]));
+}
+
+// Bulk calls to Index. Maps the given array of features to a vector of
+// sorted int32_t indices.
+void IntFeatureSpace::IndexAndSortFeatures(
+    const INT_FEATURE_STRUCT* features, int num_features,
+    GenericVector<int>* sorted_features) const {
+  sorted_features->truncate(0);
+  for (int f = 0; f < num_features; ++f)
+    sorted_features->push_back(Index(features[f]));
+  sorted_features->sort();
+}
+
+// Returns a feature space index for the given x,y position in a display
+// window, or -1 if the feature is a miss.
+int IntFeatureSpace::XYToFeatureIndex(int x, int y) const {
+  // Round the x,y position to a feature. Search for a valid theta.
+  INT_FEATURE_STRUCT feature(x, y, 0);
+  int index = -1;
+  for (int theta = 0; theta <= UINT8_MAX && index < 0; ++theta) {
+    feature.Theta = theta;
+    index = Index(feature);
+  }
+  if (index < 0) {
+    tprintf("(%d,%d) does not exist in feature space!\n", x, y);
+    return -1;
+  }
+  feature = PositionFromIndex(index);
+  tprintf("Click at (%d, %d) ->(%d, %d), ->(%d, %d)\n",
+          x, y, feature.X, feature.Y, x - feature.X, y - feature.Y);
+  // Get the relative position of x,y from the rounded feature.
+  x -= feature.X;
+  y -= feature.Y;
+  if (x != 0 || y != 0) {
+    double angle = atan2(static_cast<double>(y), static_cast<double>(x)) + M_PI;
+    angle *= kIntFeatureExtent / (2.0 * M_PI);
+    feature.Theta = static_cast<uint8_t>(angle + 0.5);
+    index = Index(feature);
+    if (index < 0) {
+      tprintf("Feature failed to map to a valid index:");
+      feature.print();
+      return -1;
+    }
+    feature = PositionFromIndex(index);
+  }
+  feature.print();
+  return index;
+}
+
+// Returns an INT_FEATURE_STRUCT corresponding to the given bucket coords.
+INT_FEATURE_STRUCT IntFeatureSpace::PositionFromBuckets(int x,
+                                                        int y,
+                                                        int theta) const {
+  INT_FEATURE_STRUCT pos(
+      (x * kIntFeatureExtent + kIntFeatureExtent / 2) / x_buckets_,
+      (y * kIntFeatureExtent + kIntFeatureExtent / 2) / y_buckets_,
+      DivRounded(theta * kIntFeatureExtent, theta_buckets_));
+  return pos;
+}
+
+}  // namespace tesseract.
diff --git a/tesseract/src/classify/intfeaturespace.h b/tesseract/src/classify/intfeaturespace.h
new file mode 100644
index 00000000..3f21e4d3
--- /dev/null
+++ b/tesseract/src/classify/intfeaturespace.h
@@ -0,0 +1,104 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+///////////////////////////////////////////////////////////////////////
+// File:        intfeaturespace.h
+// Description: Indexed feature space based on INT_FEATURE_STRUCT.
+// Created:     Wed Mar 24 10:55:30 PDT 2010
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CLASSIFY_INTFEATURESPACE_H_
+#define TESSERACT_CLASSIFY_INTFEATURESPACE_H_
+
+#include "genericvector.h"
+#include "intproto.h"
+
+// Extent of x,y,theta in the input feature space. [0,255].
+const int kIntFeatureExtent = 256;
+// Extent of x,y,theta dimensions in the quantized feature space.
+const int kBoostXYBuckets = 16;
+const int kBoostDirBuckets = 16;
+
+namespace tesseract {
+
+class IndexMap;
+
+// Down-sampling quantization of the INT_FEATURE_STRUCT feature space and
+// conversion to a single scalar index value, used as a binary feature space.
+class TESS_API IntFeatureSpace {
+ public:
+  IntFeatureSpace();
+  // Default copy constructors and assignment OK!
+
+  // Setup the feature space with the given dimensions.
+  void Init(uint8_t xbuckets, uint8_t ybuckets, uint8_t thetabuckets);
+
+  // Serializes the feature space definition to the given file.
+  // Returns false on error.
+  bool Serialize(FILE* fp) const;
+
+  // Returns the total size of the feature space.
+  int Size() const {
+    return static_cast<int>(x_buckets_) * y_buckets_ * theta_buckets_;
+  }
+  // Returns an INT_FEATURE_STRUCT corresponding to the given index.
+  // This is the inverse of the Index member.
+  INT_FEATURE_STRUCT PositionFromIndex(int index) const;
+
+  // Returns a 1-dimensional index corresponding to the given feature value.
+  // Range is [0, Size()-1]. Inverse of PositionFromIndex member.
+  int Index(const INT_FEATURE_STRUCT& f) const {
+    return (XBucket(f.X) * y_buckets_ + YBucket(f.Y)) * theta_buckets_ +
+        ThetaBucket(f.Theta);
+  }
+  // Bulk calls to Index. Maps the given array of features to a vector of
+  // int32_t indices in the same order as the input.
+  void IndexFeatures(const INT_FEATURE_STRUCT* features, int num_features,
+                     GenericVector<int>* mapped_features) const;
+  // Bulk calls to Index. Maps the given array of features to a vector of
+  // sorted int32_t indices.
+  void IndexAndSortFeatures(const INT_FEATURE_STRUCT* features,
+                            int num_features,
+                            GenericVector<int>* sorted_features) const;
+  // Returns a feature space index for the given x,y position in a display
+  // window, or -1 if the feature is a miss.
+  int XYToFeatureIndex(int x, int y) const;
+
+ protected:
+  // Converters to generate indices for individual feature dimensions.
+  int XBucket(int x) const {
+    int bucket = x * x_buckets_ / kIntFeatureExtent;
+    return ClipToRange(bucket, 0, static_cast<int>(x_buckets_) - 1);
+  }
+  int YBucket(int y) const {
+    int bucket = y * y_buckets_ / kIntFeatureExtent;
+    return ClipToRange(bucket, 0, static_cast<int>(y_buckets_) - 1);
+  }
+  // Use DivRounded for theta so that exactly vertical and horizontal are in
+  // the middle of a bucket. The Modulo takes care of the wrap-around.
+  int ThetaBucket(int theta) const {
+    int bucket = DivRounded(theta * theta_buckets_, kIntFeatureExtent);
+    return Modulo(bucket, theta_buckets_);
+  }
+  // Returns an INT_FEATURE_STRUCT corresponding to the given buckets.
+  INT_FEATURE_STRUCT PositionFromBuckets(int x, int y, int theta) const;
+
+  // Feature space definition - serialized.
+  uint8_t x_buckets_;
+  uint8_t y_buckets_;
+  uint8_t theta_buckets_;
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_CLASSIFY_INTFEATURESPACE_H_
diff --git a/tesseract/src/classify/intfx.cpp b/tesseract/src/classify/intfx.cpp
new file mode 100644
index 00000000..062b0f1e
--- /dev/null
+++ b/tesseract/src/classify/intfx.cpp
@@ -0,0 +1,488 @@
+/******************************************************************************
+ ** Filename:    intfx.c
+ ** Purpose:     Integer character normalization & feature extraction
+ ** Author:      Robert Moss, rays@google.com (Ray Smith)
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *****************************************************************************/
+/**----------------------------------------------------------------------------
+          Include Files and Type Defines
+----------------------------------------------------------------------------**/
+
+#define _USE_MATH_DEFINES       // for M_PI
+
+#include "intfx.h"
+
+#include "classify.h"
+#include "intmatcher.h"
+#include "linlsq.h"
+#include "normalis.h"
+#include "statistc.h"
+#include "trainingsample.h"
+
+#include "helpers.h"
+
+#include "allheaders.h"
+
+#include <cmath>                // for M_PI
+#include <mutex>                // for std::mutex
+
+namespace tesseract {
+
+/**----------------------------------------------------------------------------
+        Global Data Definitions and Declarations
+----------------------------------------------------------------------------**/
+// Look up table for cos and sin to turn the intfx feature angle to a vector.
+// Protected by atan_table_mutex.
+// The entries are in binary degrees where a full circle is 256 binary degrees.
+static float cos_table[INT_CHAR_NORM_RANGE];
+static float sin_table[INT_CHAR_NORM_RANGE];
+
+/**----------------------------------------------------------------------------
+            Public Code
+----------------------------------------------------------------------------**/
+
+void InitIntegerFX() {
+  // Guards write access to AtanTable so we don't create it more than once.
+  static std::mutex atan_table_mutex;
+  static bool atan_table_init = false;
+  std::lock_guard<std::mutex> guard(atan_table_mutex);
+  if (!atan_table_init) {
+    for (int i = 0; i < INT_CHAR_NORM_RANGE; ++i) {
+      cos_table[i] = cos(i * 2 * M_PI / INT_CHAR_NORM_RANGE + M_PI);
+      sin_table[i] = sin(i * 2 * M_PI / INT_CHAR_NORM_RANGE + M_PI);
+    }
+    atan_table_init = true;
+  }
+}
+
+// Returns a vector representing the direction of a feature with the given
+// theta direction in an INT_FEATURE_STRUCT.
+FCOORD FeatureDirection(uint8_t theta) {
+  return FCOORD(cos_table[theta], sin_table[theta]);
+}
+
+// Generates a TrainingSample from a TBLOB. Extracts features and sets
+// the bounding box, so classifiers that operate on the image can work.
+// TODO(rays) Make BlobToTrainingSample a member of Classify now that
+// the FlexFx and FeatureDescription code have been removed and LearnBlob
+// is now a member of Classify.
+TrainingSample* BlobToTrainingSample(
+    const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info,
+    std::vector<INT_FEATURE_STRUCT>* bl_features) {
+  std::vector<INT_FEATURE_STRUCT> cn_features;
+  Classify::ExtractFeatures(blob, nonlinear_norm, bl_features,
+                            &cn_features, fx_info, nullptr);
+  // TODO(rays) Use blob->PreciseBoundingBox() instead.
+  TBOX box = blob.bounding_box();
+  TrainingSample* sample = nullptr;
+  int num_features = fx_info->NumCN;
+  if (num_features > 0) {
+    sample = TrainingSample::CopyFromFeatures(*fx_info, box, &cn_features[0],
+                                              num_features);
+  }
+  if (sample != nullptr) {
+    // Set the bounding box (in original image coordinates) in the sample.
+    TPOINT topleft, botright;
+    topleft.x = box.left();
+    topleft.y = box.top();
+    botright.x = box.right();
+    botright.y = box.bottom();
+    TPOINT original_topleft, original_botright;
+    blob.denorm().DenormTransform(nullptr, topleft, &original_topleft);
+    blob.denorm().DenormTransform(nullptr, botright, &original_botright);
+    sample->set_bounding_box(TBOX(original_topleft.x, original_botright.y,
+                                  original_botright.x, original_topleft.y));
+  }
+  return sample;
+}
+
+// Computes the DENORMS for bl(baseline) and cn(character) normalization
+// during feature extraction. The input denorm describes the current state
+// of the blob, which is usually a baseline-normalized word.
+// The Transforms setup are as follows:
+// Baseline Normalized (bl) Output:
+//   We center the grapheme by aligning the x-coordinate of its centroid with
+//   x=128 and leaving the already-baseline-normalized y as-is.
+//
+// Character Normalized (cn) Output:
+//   We align the grapheme's centroid at the origin and scale it
+//   asymmetrically in x and y so that the 2nd moments are a standard value
+//   (51.2) ie the result is vaguely square.
+// If classify_nonlinear_norm is true:
+//   A non-linear normalization is setup that attempts to evenly distribute
+//   edges across x and y.
+//
+// Some of the fields of fx_info are also setup:
+// Length: Total length of outline.
+// Rx:     Rounded y second moment. (Reversed by convention.)
+// Ry:     rounded x second moment.
+// Xmean:  Rounded x center of mass of the blob.
+// Ymean:  Rounded y center of mass of the blob.
+void Classify::SetupBLCNDenorms(const TBLOB& blob, bool nonlinear_norm,
+                                DENORM* bl_denorm, DENORM* cn_denorm,
+                                INT_FX_RESULT_STRUCT* fx_info) {
+  // Compute 1st and 2nd moments of the original outline.
+  FCOORD center, second_moments;
+  int length = blob.ComputeMoments(&center, &second_moments);
+  if (fx_info != nullptr) {
+    fx_info->Length = length;
+    fx_info->Rx = IntCastRounded(second_moments.y());
+    fx_info->Ry = IntCastRounded(second_moments.x());
+
+    fx_info->Xmean = IntCastRounded(center.x());
+    fx_info->Ymean = IntCastRounded(center.y());
+  }
+  // Setup the denorm for Baseline normalization.
+  bl_denorm->SetupNormalization(nullptr, nullptr, &blob.denorm(), center.x(), 128.0f,
+                                1.0f, 1.0f, 128.0f, 128.0f);
+  // Setup the denorm for character normalization.
+  if (nonlinear_norm) {
+    GenericVector<GenericVector<int> > x_coords;
+    GenericVector<GenericVector<int> > y_coords;
+    TBOX box;
+    blob.GetPreciseBoundingBox(&box);
+    box.pad(1, 1);
+    blob.GetEdgeCoords(box, &x_coords, &y_coords);
+    cn_denorm->SetupNonLinear(&blob.denorm(), box, UINT8_MAX, UINT8_MAX,
+                              0.0f, 0.0f, x_coords, y_coords);
+  } else {
+    cn_denorm->SetupNormalization(nullptr, nullptr, &blob.denorm(),
+                                  center.x(), center.y(),
+                                  51.2f / second_moments.x(),
+                                  51.2f / second_moments.y(),
+                                  128.0f, 128.0f);
+  }
+}
+
+// Helper normalizes the direction, assuming that it is at the given
+// unnormed_pos, using the given denorm, starting at the root_denorm.
+static uint8_t NormalizeDirection(uint8_t dir, const FCOORD& unnormed_pos,
+                                  const DENORM& denorm,
+                                  const DENORM* root_denorm) {
+  // Convert direction to a vector.
+  FCOORD unnormed_end;
+  unnormed_end.from_direction(dir);
+  unnormed_end += unnormed_pos;
+  FCOORD normed_pos, normed_end;
+  denorm.NormTransform(root_denorm, unnormed_pos, &normed_pos);
+  denorm.NormTransform(root_denorm, unnormed_end, &normed_end);
+  normed_end -= normed_pos;
+  return normed_end.to_direction();
+}
+
+// Helper returns the mean direction vector from the given stats. Use the
+// mean direction from dirs if there is information available, otherwise, use
+// the fit_vector from point_diffs.
+static FCOORD MeanDirectionVector(const LLSQ& point_diffs, const LLSQ& dirs,
+                                  const FCOORD& start_pt,
+                                  const FCOORD& end_pt) {
+  FCOORD fit_vector;
+  if (dirs.count() > 0) {
+    // There were directions, so use them. To avoid wrap-around problems, we
+    // have 2 accumulators in dirs: x for normal directions and y for
+    // directions offset by 128. We will use the one with the least variance.
+    FCOORD mean_pt = dirs.mean_point();
+    double mean_dir = 0.0;
+    if (dirs.x_variance() <= dirs.y_variance()) {
+      mean_dir = mean_pt.x();
+    } else {
+      mean_dir = mean_pt.y() + 128;
+    }
+    fit_vector.from_direction(Modulo(IntCastRounded(mean_dir), 256));
+  } else {
+    // There were no directions, so we rely on the vector_fit to the points.
+    // Since the vector_fit is 180 degrees ambiguous, we align with the
+    // supplied feature_dir by making the scalar product non-negative.
+    FCOORD feature_dir(end_pt - start_pt);
+    fit_vector = point_diffs.vector_fit();
+    if (fit_vector.x() == 0.0f && fit_vector.y() == 0.0f) {
+      // There was only a single point. Use feature_dir directly.
+      fit_vector = feature_dir;
+    } else {
+      // Sometimes the least mean squares fit is wrong, due to the small sample
+      // of points and scaling. Use a 90 degree rotated vector if that matches
+      // feature_dir better.
+      FCOORD fit_vector2 = !fit_vector;
+      // The fit_vector is 180 degrees ambiguous, so resolve the ambiguity by
+      // insisting that the scalar product with the feature_dir should be +ve.
+      if (fit_vector % feature_dir < 0.0)
+        fit_vector = -fit_vector;
+      if (fit_vector2 % feature_dir < 0.0)
+        fit_vector2 = -fit_vector2;
+      // Even though fit_vector2 has a higher mean squared error, it might be
+      // a better fit, so use it if the dot product with feature_dir is bigger.
+      if (fit_vector2 % feature_dir > fit_vector % feature_dir)
+        fit_vector = fit_vector2;
+    }
+  }
+  return fit_vector;
+}
+
+// Helper computes one or more features corresponding to the given points.
+// Emitted features are on the line defined by:
+// start_pt + lambda * (end_pt - start_pt) for scalar lambda.
+// Features are spaced at feature_length intervals.
+static int ComputeFeatures(const FCOORD& start_pt, const FCOORD& end_pt,
+                           double feature_length,
+                           std::vector<INT_FEATURE_STRUCT>* features) {
+  FCOORD feature_vector(end_pt - start_pt);
+  if (feature_vector.x() == 0.0f && feature_vector.y() == 0.0f) return 0;
+  // Compute theta for the feature based on its direction.
+  uint8_t theta = feature_vector.to_direction();
+  // Compute the number of features and lambda_step.
+  double target_length = feature_vector.length();
+  int num_features = IntCastRounded(target_length / feature_length);
+  if (num_features == 0) return 0;
+  // Divide the length evenly into num_features pieces.
+  double lambda_step = 1.0 / num_features;
+  double lambda = lambda_step / 2.0;
+  for (int f = 0; f < num_features; ++f, lambda += lambda_step) {
+    FCOORD feature_pt(start_pt);
+    feature_pt += feature_vector * lambda;
+    INT_FEATURE_STRUCT feature(feature_pt, theta);
+    features->push_back(feature);
+  }
+  return num_features;
+}
+
+// Gathers outline points and their directions from start_index into dirs by
+// stepping along the outline and normalizing the coordinates until the
+// required feature_length has been collected or end_index is reached.
+// On input pos must point to the position corresponding to start_index and on
+// return pos is updated to the current raw position, and pos_normed is set to
+// the normed version of pos.
+// Since directions wrap-around, they need special treatment to get the mean.
+// Provided the cluster of directions doesn't straddle the wrap-around point,
+// the simple mean works. If they do, then, unless the directions are wildly
+// varying, the cluster rotated by 180 degrees will not straddle the wrap-
+// around point, so mean(dir + 180 degrees) - 180 degrees will work. Since
+// LLSQ conveniently stores the mean of 2 variables, we use it to store
+// dir and dir+128 (128 is 180 degrees) and then use the resulting mean
+// with the least variance.
+static int GatherPoints(const C_OUTLINE* outline, double feature_length,
+                        const DENORM& denorm, const DENORM* root_denorm,
+                        int start_index, int end_index,
+                        ICOORD* pos, FCOORD* pos_normed,
+                        LLSQ* points, LLSQ* dirs) {
+  int step_length = outline->pathlength();
+  ICOORD step = outline->step(start_index % step_length);
+  // Prev_normed is the start point of this collection and will be set on the
+  // first iteration, and on later iterations used to determine the length
+  // that has been collected.
+  FCOORD prev_normed;
+  points->clear();
+  dirs->clear();
+  int num_points = 0;
+  int index;
+  for (index = start_index; index <= end_index; ++index, *pos += step) {
+    step = outline->step(index % step_length);
+    int edge_weight = outline->edge_strength_at_index(index % step_length);
+    if (edge_weight == 0) {
+      // This point has conflicting gradient and step direction, so ignore it.
+      continue;
+    }
+    // Get the sub-pixel precise location and normalize.
+    FCOORD f_pos = outline->sub_pixel_pos_at_index(*pos, index % step_length);
+    denorm.NormTransform(root_denorm, f_pos, pos_normed);
+    if (num_points == 0) {
+      // The start of this segment.
+      prev_normed = *pos_normed;
+    } else {
+      FCOORD offset = *pos_normed - prev_normed;
+      float length = offset.length();
+      if (length > feature_length) {
+        // We have gone far enough from the start. We will use this point in
+        // the next set so return what we have so far.
+        return index;
+      }
+    }
+    points->add(pos_normed->x(), pos_normed->y(), edge_weight);
+    int direction = outline->direction_at_index(index % step_length);
+    if (direction >= 0) {
+      direction = NormalizeDirection(direction, f_pos, denorm, root_denorm);
+      // Use both the direction and direction +128 so we are not trying to
+      // take the mean of something straddling the wrap-around point.
+      dirs->add(direction, Modulo(direction + 128, 256));
+    }
+    ++num_points;
+  }
+  return index;
+}
+
+// Extracts Tesseract features and appends them to the features vector.
+// Startpt to lastpt, inclusive, MUST have the same src_outline member,
+// which may be nullptr. The vector from lastpt to its next is included in
+// the feature extraction. Hidden edges should be excluded by the caller.
+// If force_poly is true, the features will be extracted from the polygonal
+// approximation even if more accurate data is available.
+static void ExtractFeaturesFromRun(
+    const EDGEPT* startpt, const EDGEPT* lastpt,
+    const DENORM& denorm, double feature_length, bool force_poly,
+    std::vector<INT_FEATURE_STRUCT>* features) {
+  const EDGEPT* endpt = lastpt->next;
+  const C_OUTLINE* outline = startpt->src_outline;
+  if (outline != nullptr && !force_poly) {
+    // Detailed information is available. We have to normalize only from
+    // the root_denorm to denorm.
+    const DENORM* root_denorm = denorm.RootDenorm();
+    int total_features = 0;
+    // Get the features from the outline.
+    int step_length = outline->pathlength();
+    int start_index = startpt->start_step;
+    // pos is the integer coordinates of the binary image steps.
+    ICOORD pos = outline->position_at_index(start_index);
+    // We use an end_index that allows us to use a positive increment, but that
+    // may be beyond the bounds of the outline steps/ due to wrap-around, to
+    // so we use % step_length everywhere, except for start_index.
+    int end_index = lastpt->start_step + lastpt->step_count;
+    if (end_index <= start_index)
+      end_index += step_length;
+    LLSQ prev_points;
+    LLSQ prev_dirs;
+    FCOORD prev_normed_pos = outline->sub_pixel_pos_at_index(pos, start_index);
+    denorm.NormTransform(root_denorm, prev_normed_pos, &prev_normed_pos);
+    LLSQ points;
+    LLSQ dirs;
+    FCOORD normed_pos(0.0f, 0.0f);
+    int index = GatherPoints(outline, feature_length, denorm, root_denorm,
+                             start_index, end_index, &pos, &normed_pos,
+                             &points, &dirs);
+    while (index <= end_index) {
+      // At each iteration we nominally have 3 accumulated sets of points and
+      // dirs: prev_points/dirs, points/dirs, next_points/dirs and sum them
+      // into sum_points/dirs, but we don't necessarily get any features out,
+      // so if that is the case, we keep accumulating instead of rotating the
+      // accumulators.
+      LLSQ next_points;
+      LLSQ next_dirs;
+      FCOORD next_normed_pos(0.0f, 0.0f);
+      index = GatherPoints(outline, feature_length, denorm, root_denorm,
+                           index, end_index, &pos, &next_normed_pos,
+                           &next_points, &next_dirs);
+      LLSQ sum_points(prev_points);
+      // TODO(rays) find out why it is better to use just dirs and next_dirs
+      // in sum_dirs, instead of using prev_dirs as well.
+      LLSQ sum_dirs(dirs);
+      sum_points.add(points);
+      sum_points.add(next_points);
+      sum_dirs.add(next_dirs);
+      bool made_features = false;
+      // If we have some points, we can try making some features.
+      if (sum_points.count() > 0) {
+        // We have gone far enough from the start. Make a feature and restart.
+        FCOORD fit_pt = sum_points.mean_point();
+        FCOORD fit_vector = MeanDirectionVector(sum_points, sum_dirs,
+                                                prev_normed_pos, normed_pos);
+        // The segment to which we fit features is the line passing through
+        // fit_pt in direction of fit_vector that starts nearest to
+        // prev_normed_pos and ends nearest to normed_pos.
+        FCOORD start_pos = prev_normed_pos.nearest_pt_on_line(fit_pt,
+                                                              fit_vector);
+        FCOORD end_pos = normed_pos.nearest_pt_on_line(fit_pt, fit_vector);
+        // Possible correction to match the adjacent polygon segment.
+        if (total_features == 0 && startpt != endpt) {
+          FCOORD poly_pos(startpt->pos.x, startpt->pos.y);
+          denorm.LocalNormTransform(poly_pos, &start_pos);
+        }
+        if (index > end_index && startpt != endpt) {
+          FCOORD poly_pos(endpt->pos.x, endpt->pos.y);
+          denorm.LocalNormTransform(poly_pos, &end_pos);
+        }
+        int num_features = ComputeFeatures(start_pos, end_pos, feature_length,
+                                           features);
+        if (num_features > 0) {
+          // We made some features so shuffle the accumulators.
+          prev_points = points;
+          prev_dirs = dirs;
+          prev_normed_pos = normed_pos;
+          points = next_points;
+          dirs = next_dirs;
+          made_features = true;
+          total_features += num_features;
+        }
+        // The end of the next set becomes the end next time around.
+        normed_pos = next_normed_pos;
+      }
+      if (!made_features) {
+        // We didn't make any features, so keep the prev accumulators and
+        // add the next ones into the current.
+        points.add(next_points);
+        dirs.add(next_dirs);
+      }
+    }
+  } else {
+    // There is no outline, so we are forced to use the polygonal approximation.
+    const EDGEPT* pt = startpt;
+    do {
+      FCOORD start_pos(pt->pos.x, pt->pos.y);
+      FCOORD end_pos(pt->next->pos.x, pt->next->pos.y);
+      denorm.LocalNormTransform(start_pos, &start_pos);
+      denorm.LocalNormTransform(end_pos, &end_pos);
+      ComputeFeatures(start_pos, end_pos, feature_length, features);
+    } while ((pt = pt->next) != endpt);
+  }
+}
+
+// Extracts sets of 3-D features of length kStandardFeatureLength (=12.8), as
+// (x,y) position and angle as measured counterclockwise from the vector
+// <-1, 0>, from blob using two normalizations defined by bl_denorm and
+// cn_denorm. See SetpuBLCNDenorms for definitions.
+// If outline_cn_counts is not nullptr, on return it contains the cumulative
+// number of cn features generated for each outline in the blob (in order).
+// Thus after the first outline, there were (*outline_cn_counts)[0] features,
+// after the second outline, there were (*outline_cn_counts)[1] features etc.
+void Classify::ExtractFeatures(const TBLOB& blob,
+                               bool nonlinear_norm,
+                               std::vector<INT_FEATURE_STRUCT>* bl_features,
+                               std::vector<INT_FEATURE_STRUCT>* cn_features,
+                               INT_FX_RESULT_STRUCT* results,
+                               GenericVector<int>* outline_cn_counts) {
+  DENORM bl_denorm, cn_denorm;
+  tesseract::Classify::SetupBLCNDenorms(blob, nonlinear_norm,
+                                        &bl_denorm, &cn_denorm, results);
+  if (outline_cn_counts != nullptr)
+    outline_cn_counts->truncate(0);
+  // Iterate the outlines.
+  for (TESSLINE* ol = blob.outlines; ol != nullptr; ol = ol->next) {
+    // Iterate the polygon.
+    EDGEPT* loop_pt = ol->FindBestStartPt();
+    EDGEPT* pt = loop_pt;
+    if (pt == nullptr) continue;
+    do {
+      if (pt->IsHidden()) continue;
+      // Find a run of equal src_outline.
+      EDGEPT* last_pt = pt;
+      do {
+        last_pt = last_pt->next;
+      } while (last_pt != loop_pt && !last_pt->IsHidden() &&
+               last_pt->src_outline == pt->src_outline);
+      last_pt = last_pt->prev;
+      // Until the adaptive classifier can be weaned off polygon segments,
+      // we have to force extraction from the polygon for the bl_features.
+      ExtractFeaturesFromRun(pt, last_pt, bl_denorm, kStandardFeatureLength,
+                             true, bl_features);
+      ExtractFeaturesFromRun(pt, last_pt, cn_denorm, kStandardFeatureLength,
+                             false, cn_features);
+      pt = last_pt;
+    } while ((pt = pt->next) != loop_pt);
+    if (outline_cn_counts != nullptr)
+      outline_cn_counts->push_back(cn_features->size());
+  }
+  results->NumBL = bl_features->size();
+  results->NumCN = cn_features->size();
+  results->YBottom = blob.bounding_box().bottom();
+  results->YTop = blob.bounding_box().top();
+  results->Width = blob.bounding_box().width();
+}
+
+}  // namespace tesseract
diff --git a/tesseract/src/classify/intfx.h b/tesseract/src/classify/intfx.h
new file mode 100644
index 00000000..f4f8fd1a
--- /dev/null
+++ b/tesseract/src/classify/intfx.h
@@ -0,0 +1,68 @@
+/******************************************************************************
+ **  Filename:    intfx.h
+ **  Purpose:     Interface to high level integer feature extractor.
+ **  Author:      Robert Moss
+ **  History:     Tue May 21 15:51:57 MDT 1991, RWM, Created.
+ **
+ **  (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+#ifndef   INTFX_H
+#define   INTFX_H
+
+#include "blobs.h"
+#include "intproto.h"
+#include "normalis.h"
+
+#include <cmath>
+
+namespace tesseract {
+
+class DENORM;
+
+class TrainingSample;
+
+struct INT_FX_RESULT_STRUCT {
+  int32_t Length;                  // total length of all outlines
+  int16_t Xmean, Ymean;            // center of mass of all outlines
+  int16_t Rx, Ry;                  // radius of gyration
+  int16_t NumBL, NumCN;            // number of features extracted
+  int16_t Width;                   // Width of blob in BLN coords.
+  uint8_t YBottom;                 // Bottom of blob in BLN coords.
+  uint8_t YTop;                    // Top of blob in BLN coords.
+};
+
+// The standard feature length
+const double kStandardFeatureLength = 64.0 / 5;
+
+/**----------------------------------------------------------------------------
+          Public Function Prototypes
+----------------------------------------------------------------------------**/
+TESS_API
+void InitIntegerFX();
+
+// Returns a vector representing the direction of a feature with the given
+// theta direction in an INT_FEATURE_STRUCT.
+TESS_API
+FCOORD FeatureDirection(uint8_t theta);
+
+// Generates a TrainingSample from a TBLOB. Extracts features and sets
+// the bounding box, so classifiers that operate on the image can work.
+// TODO(rays) BlobToTrainingSample must remain a global function until
+// the FlexFx and FeatureDescription code can be removed and LearnBlob
+// made a member of Classify.
+TrainingSample* BlobToTrainingSample(
+    const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info,
+    std::vector<INT_FEATURE_STRUCT>* bl_features);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/classify/intmatcher.cpp b/tesseract/src/classify/intmatcher.cpp
new file mode 100644
index 00000000..b78c700f
--- /dev/null
+++ b/tesseract/src/classify/intmatcher.cpp
@@ -0,0 +1,1226 @@
+/******************************************************************************
+ ** Filename:    intmatcher.cpp
+ ** Purpose:     Generic high level classification routines.
+ ** Author:      Robert Moss
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "intmatcher.h"
+
+#include "fontinfo.h"
+#include "intproto.h"
+#include "scrollview.h"
+#include "float2int.h"
+#include "classify.h"
+#include "shapetable.h"
+
+#include "helpers.h"
+
+#include <cassert>
+#include <cmath>
+
+namespace tesseract {
+
+/*----------------------------------------------------------------------------
+                    Global Data Definitions and Declarations
+----------------------------------------------------------------------------*/
+// Parameters of the sigmoid used to convert similarity to evidence in the
+// similarity_evidence_table_ that is used to convert distance metric to an
+// 8 bit evidence value in the secondary matcher. (See IntMatcher::Init).
+const float IntegerMatcher::kSEExponentialMultiplier = 0.0f;
+const float IntegerMatcher::kSimilarityCenter = 0.0075f;
+
+static const uint8_t offset_table[] = {
+  255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,
+    0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,
+    0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3,
+    0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5,
+    0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,
+    0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,
+    0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3,
+    0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6,
+    0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,
+    0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,
+    0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
+};
+
+static const uint8_t next_table[] = {
+  0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e,
+  0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, 0x10, 0x18, 0x18, 0x1a,
+  0x18, 0x1c, 0x1c, 0x1e, 0, 0x20, 0x20, 0x22, 0x20, 0x24, 0x24, 0x26,
+  0x20, 0x28, 0x28, 0x2a, 0x28, 0x2c, 0x2c, 0x2e, 0x20, 0x30, 0x30, 0x32,
+  0x30, 0x34, 0x34, 0x36, 0x30, 0x38, 0x38, 0x3a, 0x38, 0x3c, 0x3c, 0x3e,
+  0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, 0x40, 0x48, 0x48, 0x4a,
+  0x48, 0x4c, 0x4c, 0x4e, 0x40, 0x50, 0x50, 0x52, 0x50, 0x54, 0x54, 0x56,
+  0x50, 0x58, 0x58, 0x5a, 0x58, 0x5c, 0x5c, 0x5e, 0x40, 0x60, 0x60, 0x62,
+  0x60, 0x64, 0x64, 0x66, 0x60, 0x68, 0x68, 0x6a, 0x68, 0x6c, 0x6c, 0x6e,
+  0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, 0x70, 0x78, 0x78, 0x7a,
+  0x78, 0x7c, 0x7c, 0x7e, 0, 0x80, 0x80, 0x82, 0x80, 0x84, 0x84, 0x86,
+  0x80, 0x88, 0x88, 0x8a, 0x88, 0x8c, 0x8c, 0x8e, 0x80, 0x90, 0x90, 0x92,
+  0x90, 0x94, 0x94, 0x96, 0x90, 0x98, 0x98, 0x9a, 0x98, 0x9c, 0x9c, 0x9e,
+  0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, 0xa0, 0xa8, 0xa8, 0xaa,
+  0xa8, 0xac, 0xac, 0xae, 0xa0, 0xb0, 0xb0, 0xb2, 0xb0, 0xb4, 0xb4, 0xb6,
+  0xb0, 0xb8, 0xb8, 0xba, 0xb8, 0xbc, 0xbc, 0xbe, 0x80, 0xc0, 0xc0, 0xc2,
+  0xc0, 0xc4, 0xc4, 0xc6, 0xc0, 0xc8, 0xc8, 0xca, 0xc8, 0xcc, 0xcc, 0xce,
+  0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, 0xd0, 0xd8, 0xd8, 0xda,
+  0xd8, 0xdc, 0xdc, 0xde, 0xc0, 0xe0, 0xe0, 0xe2, 0xe0, 0xe4, 0xe4, 0xe6,
+  0xe0, 0xe8, 0xe8, 0xea, 0xe8, 0xec, 0xec, 0xee, 0xe0, 0xf0, 0xf0, 0xf2,
+  0xf0, 0xf4, 0xf4, 0xf6, 0xf0, 0xf8, 0xf8, 0xfa, 0xf8, 0xfc, 0xfc, 0xfe
+};
+
+// See http://b/19318793 (#6) for a complete discussion.
+
+/**
+ * Sort Key array in ascending order using heap sort
+ * algorithm.  Also sort Index array that is tied to
+ * the key array.
+ * @param n Number of elements to sort
+ * @param ra     Key array [1..n]
+ * @param rb     Index array [1..n]
+ */
+static void
+HeapSort (int n, int ra[], int rb[]) {
+  int i, rra, rrb;
+  int l, j, ir;
+
+  l = (n >> 1) + 1;
+  ir = n;
+  for (;;) {
+    if (l > 1) {
+      rra = ra[--l];
+      rrb = rb[l];
+    }
+    else {
+      rra = ra[ir];
+      rrb = rb[ir];
+      ra[ir] = ra[1];
+      rb[ir] = rb[1];
+      if (--ir == 1) {
+        ra[1] = rra;
+        rb[1] = rrb;
+        return;
+      }
+    }
+    i = l;
+    j = l << 1;
+    while (j <= ir) {
+      if (j < ir && ra[j] < ra[j + 1])
+        ++j;
+      if (rra < ra[j]) {
+        ra[i] = ra[j];
+        rb[i] = rb[j];
+        j += (i = j);
+      }
+      else
+        j = ir + 1;
+    }
+    ra[i] = rra;
+    rb[i] = rrb;
+  }
+}
+
+// Encapsulation of the intermediate data and computations made by the class
+// pruner. The class pruner implements a simple linear classifier on binary
+// features by heavily quantizing the feature space, and applying
+// NUM_BITS_PER_CLASS (2)-bit weights to the features. Lack of resolution in
+// weights is compensated by a non-constant bias that is dependent on the
+// number of features present.
+class ClassPruner {
+ public:
+  ClassPruner(int max_classes) {
+    // The unrolled loop in ComputeScores means that the array sizes need to
+    // be rounded up so that the array is big enough to accommodate the extra
+    // entries accessed by the unrolling. Each pruner word is of sized
+    // BITS_PER_WERD and each entry is NUM_BITS_PER_CLASS, so there are
+    // BITS_PER_WERD / NUM_BITS_PER_CLASS entries.
+    // See ComputeScores.
+    max_classes_ = max_classes;
+    rounded_classes_ = RoundUp(
+        max_classes, WERDS_PER_CP_VECTOR * BITS_PER_WERD / NUM_BITS_PER_CLASS);
+    class_count_ = new int[rounded_classes_];
+    norm_count_ = new int[rounded_classes_];
+    sort_key_ = new int[rounded_classes_ + 1];
+    sort_index_ = new int[rounded_classes_ + 1];
+    for (int i = 0; i < rounded_classes_; i++) {
+      class_count_[i] = 0;
+    }
+    pruning_threshold_ = 0;
+    num_features_ = 0;
+    num_classes_ = 0;
+  }
+
+  ~ClassPruner() {
+    delete []class_count_;
+    delete []norm_count_;
+    delete []sort_key_;
+    delete []sort_index_;
+  }
+
+  /// Computes the scores for every class in the character set, by summing the
+  /// weights for each feature and stores the sums internally in class_count_.
+  void ComputeScores(const INT_TEMPLATES_STRUCT* int_templates,
+                     int num_features, const INT_FEATURE_STRUCT* features) {
+    num_features_ = num_features;
+    int num_pruners = int_templates->NumClassPruners;
+    for (int f = 0; f < num_features; ++f) {
+      const INT_FEATURE_STRUCT* feature = &features[f];
+      // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS.
+      int x = feature->X * NUM_CP_BUCKETS >> 8;
+      int y = feature->Y * NUM_CP_BUCKETS >> 8;
+      int theta = feature->Theta * NUM_CP_BUCKETS >> 8;
+      int class_id = 0;
+      // Each CLASS_PRUNER_STRUCT only covers CLASSES_PER_CP(32) classes, so
+      // we need a collection of them, indexed by pruner_set.
+      for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
+        // Look up quantized feature in a 3-D array, an array of weights for
+        // each class.
+        const uint32_t* pruner_word_ptr =
+            int_templates->ClassPruners[pruner_set]->p[x][y][theta];
+        for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) {
+          uint32_t pruner_word = *pruner_word_ptr++;
+          // This inner loop is unrolled to speed up the ClassPruner.
+          // Currently gcc would not unroll it unless it is set to O3
+          // level of optimization or -funroll-loops is specified.
+          /*
+          uint32_t class_mask = (1 << NUM_BITS_PER_CLASS) - 1;
+          for (int bit = 0; bit < BITS_PER_WERD/NUM_BITS_PER_CLASS; bit++) {
+            class_count_[class_id++] += pruner_word & class_mask;
+            pruner_word >>= NUM_BITS_PER_CLASS;
+          }
+          */
+          class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
+          pruner_word >>= NUM_BITS_PER_CLASS;
+          class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
+          pruner_word >>= NUM_BITS_PER_CLASS;
+          class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
+          pruner_word >>= NUM_BITS_PER_CLASS;
+          class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
+          pruner_word >>= NUM_BITS_PER_CLASS;
+          class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
+          pruner_word >>= NUM_BITS_PER_CLASS;
+          class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
+          pruner_word >>= NUM_BITS_PER_CLASS;
+          class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
+          pruner_word >>= NUM_BITS_PER_CLASS;
+          class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
+          pruner_word >>= NUM_BITS_PER_CLASS;
+          class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
+          pruner_word >>= NUM_BITS_PER_CLASS;
+          class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
+          pruner_word >>= NUM_BITS_PER_CLASS;
+          class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
+          pruner_word >>= NUM_BITS_PER_CLASS;
+          class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
+          pruner_word >>= NUM_BITS_PER_CLASS;
+          class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
+          pruner_word >>= NUM_BITS_PER_CLASS;
+          class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
+          pruner_word >>= NUM_BITS_PER_CLASS;
+          class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
+          pruner_word >>= NUM_BITS_PER_CLASS;
+          class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
+        }
+      }
+    }
+  }
+
+  /// Adjusts the scores according to the number of expected features. Used
+  /// in lieu of a constant bias, this penalizes classes that expect more
+  /// features than there are present. Thus an actual c will score higher for c
+  /// than e, even though almost all the features match e as well as c, because
+  /// e expects more features to be present.
+  void AdjustForExpectedNumFeatures(const uint16_t* expected_num_features,
+                                    int cutoff_strength) {
+    for (int class_id = 0; class_id < max_classes_; ++class_id) {
+      if (num_features_ < expected_num_features[class_id]) {
+        int deficit = expected_num_features[class_id] - num_features_;
+        class_count_[class_id] -= class_count_[class_id] * deficit /
+          (num_features_ * cutoff_strength + deficit);
+      }
+    }
+  }
+
+  /// Zeros the scores for classes disabled in the unicharset.
+  /// Implements the black-list to recognize a subset of the character set.
+  void DisableDisabledClasses(const UNICHARSET& unicharset) {
+    for (int class_id = 0; class_id < max_classes_; ++class_id) {
+      if (!unicharset.get_enabled(class_id))
+        class_count_[class_id] = 0;  // This char is disabled!
+    }
+  }
+
+  /** Zeros the scores of fragments. */
+  void DisableFragments(const UNICHARSET& unicharset) {
+    for (int class_id = 0; class_id < max_classes_; ++class_id) {
+      // Do not include character fragments in the class pruner
+      // results if disable_character_fragments is true.
+      if (unicharset.get_fragment(class_id)) {
+        class_count_[class_id] = 0;
+      }
+    }
+  }
+
+  /// Normalizes the counts for xheight, putting the normalized result in
+  /// norm_count_. Applies a simple subtractive penalty for incorrect vertical
+  /// position provided by the normalization_factors array, indexed by
+  /// character class, and scaled by the norm_multiplier.
+  void NormalizeForXheight(int norm_multiplier,
+                           const uint8_t* normalization_factors) {
+    for (int class_id = 0; class_id < max_classes_; class_id++) {
+      norm_count_[class_id] = class_count_[class_id] -
+          ((norm_multiplier * normalization_factors[class_id]) >> 8);
+    }
+  }
+
+  /** The nop normalization copies the class_count_ array to norm_count_. */
+  void NoNormalization() {
+    for (int class_id = 0; class_id < max_classes_; class_id++) {
+      norm_count_[class_id] = class_count_[class_id];
+    }
+  }
+
+  /// Prunes the classes using &lt;the maximum count> * pruning_factor/256 as a
+  /// threshold for keeping classes. If max_of_non_fragments, then ignore
+  /// fragments in computing the maximum count.
+  void PruneAndSort(int pruning_factor, int keep_this,
+                    bool max_of_non_fragments, const UNICHARSET& unicharset) {
+    int max_count = 0;
+    for (int c = 0; c < max_classes_; ++c) {
+      if (norm_count_[c] > max_count &&
+          // This additional check is added in order to ensure that
+          // the classifier will return at least one non-fragmented
+          // character match.
+          // TODO(daria): verify that this helps accuracy and does not
+          // hurt performance.
+          (!max_of_non_fragments || !unicharset.get_fragment(c))) {
+        max_count = norm_count_[c];
+      }
+    }
+    // Prune Classes.
+    pruning_threshold_ = (max_count * pruning_factor) >> 8;
+    // Select Classes.
+    if (pruning_threshold_ < 1)
+      pruning_threshold_ = 1;
+    num_classes_ = 0;
+    for (int class_id = 0; class_id < max_classes_; class_id++) {
+      if (norm_count_[class_id] >= pruning_threshold_ ||
+          class_id == keep_this) {
+          ++num_classes_;
+        sort_index_[num_classes_] = class_id;
+        sort_key_[num_classes_] = norm_count_[class_id];
+      }
+    }
+
+    // Sort Classes using Heapsort Algorithm.
+    if (num_classes_ > 1)
+      HeapSort(num_classes_, sort_key_, sort_index_);
+  }
+
+  /** Prints debug info on the class pruner matches for the pruned classes only.
+   */
+  void DebugMatch(const Classify& classify,
+                  const INT_TEMPLATES_STRUCT* int_templates,
+                  const INT_FEATURE_STRUCT* features) const {
+    int num_pruners = int_templates->NumClassPruners;
+    int max_num_classes = int_templates->NumClasses;
+    for (int f = 0; f < num_features_; ++f) {
+      const INT_FEATURE_STRUCT* feature = &features[f];
+      tprintf("F=%3d(%d,%d,%d),", f, feature->X, feature->Y, feature->Theta);
+      // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS.
+      int x = feature->X * NUM_CP_BUCKETS >> 8;
+      int y = feature->Y * NUM_CP_BUCKETS >> 8;
+      int theta = feature->Theta * NUM_CP_BUCKETS >> 8;
+      int class_id = 0;
+      for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
+        // Look up quantized feature in a 3-D array, an array of weights for
+        // each class.
+        const uint32_t* pruner_word_ptr =
+            int_templates->ClassPruners[pruner_set]->p[x][y][theta];
+        for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) {
+          uint32_t pruner_word = *pruner_word_ptr++;
+          for (int word_class = 0; word_class < 16 &&
+               class_id < max_num_classes; ++word_class, ++class_id) {
+            if (norm_count_[class_id] >= pruning_threshold_) {
+              tprintf(" %s=%d,",
+                      classify.ClassIDToDebugStr(int_templates,
+                                                 class_id, 0).c_str(),
+                      pruner_word & CLASS_PRUNER_CLASS_MASK);
+            }
+            pruner_word >>= NUM_BITS_PER_CLASS;
+          }
+        }
+        tprintf("\n");
+      }
+    }
+  }
+
+  /** Prints a summary of the pruner result. */
+  void SummarizeResult(const Classify& classify,
+                       const INT_TEMPLATES_STRUCT* int_templates,
+                       const uint16_t* expected_num_features,
+                       int norm_multiplier,
+                       const uint8_t* normalization_factors) const {
+    tprintf("CP:%d classes, %d features:\n", num_classes_, num_features_);
+    for (int i = 0; i < num_classes_; ++i) {
+      int class_id = sort_index_[num_classes_ - i];
+      STRING class_string = classify.ClassIDToDebugStr(int_templates,
+                                                       class_id, 0);
+      tprintf("%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n",
+              class_string.c_str(),
+              class_count_[class_id],
+              expected_num_features[class_id],
+              (norm_multiplier * normalization_factors[class_id]) >> 8,
+              sort_key_[num_classes_ - i],
+              100.0 - 100.0 * sort_key_[num_classes_ - i] /
+                (CLASS_PRUNER_CLASS_MASK * num_features_));
+    }
+  }
+
+  /// Copies the pruned, sorted classes into the output results and returns
+  /// the number of classes.
+  int SetupResults(std::vector<CP_RESULT_STRUCT>* results) const {
+    results->resize(num_classes_);
+    for (int c = 0; c < num_classes_; ++c) {
+      (*results)[c].Class = sort_index_[num_classes_ - c];
+      (*results)[c].Rating = 1.0f - sort_key_[num_classes_ - c] /
+        (static_cast<float>(CLASS_PRUNER_CLASS_MASK) * num_features_);
+    }
+    return num_classes_;
+  }
+
+ private:
+  /** Array[rounded_classes_] of initial counts for each class. */
+  int *class_count_;
+  /// Array[rounded_classes_] of modified counts for each class after
+  /// normalizing for expected number of features, disabled classes, fragments,
+  /// and xheights.
+  int *norm_count_;
+  /** Array[rounded_classes_ +1] of pruned counts that gets sorted */
+  int *sort_key_;
+  /** Array[rounded_classes_ +1] of classes corresponding to sort_key_. */
+  int *sort_index_;
+  /** Number of classes in this class pruner. */
+  int max_classes_;
+  /** Rounded up number of classes used for array sizes. */
+  int rounded_classes_;
+  /** Threshold count applied to prune classes. */
+  int pruning_threshold_;
+  /** The number of features used to compute the scores. */
+  int num_features_;
+  /** Final number of pruned classes. */
+  int num_classes_;
+};
+
+/*----------------------------------------------------------------------------
+              Public Code
+----------------------------------------------------------------------------*/
+/**
+ * Runs the class pruner from int_templates on the given features, returning
+ * the number of classes output in results.
+ * @param int_templates          Class pruner tables
+ * @param num_features           Number of features in blob
+ * @param features               Array of features
+ * @param normalization_factors  Array of fudge factors from blob
+ *                               normalization process (by CLASS_INDEX)
+ * @param expected_num_features  Array of expected number of features
+ *                               for each class (by CLASS_INDEX)
+ * @param results                Sorted Array of pruned classes. Must be an
+ *                               array of size at least
+ *                               int_templates->NumClasses.
+ * @param keep_this
+ */
+int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates,
+                           int num_features, int keep_this,
+                           const INT_FEATURE_STRUCT* features,
+                           const uint8_t* normalization_factors,
+                           const uint16_t* expected_num_features,
+                           std::vector<CP_RESULT_STRUCT>* results) {
+  ClassPruner pruner(int_templates->NumClasses);
+  // Compute initial match scores for all classes.
+  pruner.ComputeScores(int_templates, num_features, features);
+  // Adjust match scores for number of expected features.
+  pruner.AdjustForExpectedNumFeatures(expected_num_features,
+                                      classify_cp_cutoff_strength);
+  // Apply disabled classes in unicharset - only works without a shape_table.
+  if (shape_table_ == nullptr)
+    pruner.DisableDisabledClasses(unicharset);
+  // If fragments are disabled, remove them, also only without a shape table.
+  if (disable_character_fragments && shape_table_ == nullptr)
+    pruner.DisableFragments(unicharset);
+
+  // If we have good x-heights, apply the given normalization factors.
+  if (normalization_factors != nullptr) {
+    pruner.NormalizeForXheight(classify_class_pruner_multiplier,
+                               normalization_factors);
+  } else {
+    pruner.NoNormalization();
+  }
+  // Do the actual pruning and sort the short-list.
+  pruner.PruneAndSort(classify_class_pruner_threshold, keep_this,
+                      shape_table_ == nullptr, unicharset);
+
+  if (classify_debug_level > 2) {
+    pruner.DebugMatch(*this, int_templates, features);
+  }
+  if (classify_debug_level > 1) {
+    pruner.SummarizeResult(*this, int_templates, expected_num_features,
+                           classify_class_pruner_multiplier,
+                           normalization_factors);
+  }
+  // Convert to the expected output format.
+  return pruner.SetupResults(results);
+}
+
+/**
+ * IntegerMatcher returns the best configuration and rating
+ * for a single class.  The class matched against is determined
+ * by the uniqueness of the ClassTemplate parameter.  The
+ * best rating and its associated configuration are returned.
+ *
+ * Globals:
+ * - local_matcher_multiplier_ Normalization factor multiplier
+ * param ClassTemplate Prototypes & tables for a class
+ * param NumFeatures Number of features in blob
+ * param Features Array of features
+ * param NormalizationFactor Fudge factor from blob normalization process
+ * param Result Class rating & configuration: (0.0 -> 1.0), 0=bad, 1=good
+ * param Debug Debugger flag: 1=debugger on
+ */
+void IntegerMatcher::Match(INT_CLASS ClassTemplate,
+                           BIT_VECTOR ProtoMask,
+                           BIT_VECTOR ConfigMask,
+                           int16_t NumFeatures,
+                           const INT_FEATURE_STRUCT* Features,
+                           UnicharRating* Result,
+                           int AdaptFeatureThreshold,
+                           int Debug,
+                           bool SeparateDebugWindows) {
+  auto *tables = new ScratchEvidence();
+  int Feature;
+
+  if (MatchDebuggingOn (Debug))
+    tprintf ("Integer Matcher -------------------------------------------\n");
+
+  tables->Clear(ClassTemplate);
+  Result->feature_misses = 0;
+
+  for (Feature = 0; Feature < NumFeatures; Feature++) {
+    int csum = UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask,
+                                      Feature, &Features[Feature],
+                                      tables, Debug);
+    // Count features that were missed over all configs.
+    if (csum == 0)
+      ++Result->feature_misses;
+  }
+
+#ifndef GRAPHICS_DISABLED
+  if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) {
+    DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
+                           NumFeatures, Debug);
+  }
+
+  if (DisplayProtoMatchesOn(Debug)) {
+    DisplayProtoDebugInfo(ClassTemplate, ConfigMask,
+                          *tables, SeparateDebugWindows);
+  }
+
+  if (DisplayFeatureMatchesOn(Debug)) {
+    DisplayFeatureDebugInfo(ClassTemplate, ProtoMask, ConfigMask, NumFeatures,
+                            Features, AdaptFeatureThreshold, Debug,
+                            SeparateDebugWindows);
+  }
+#endif
+
+  tables->UpdateSumOfProtoEvidences(ClassTemplate, ConfigMask);
+  tables->NormalizeSums(ClassTemplate, NumFeatures);
+
+  FindBestMatch(ClassTemplate, *tables, Result);
+
+#ifndef GRAPHICS_DISABLED
+  if (PrintMatchSummaryOn(Debug))
+    Result->Print();
+
+  if (MatchDebuggingOn(Debug))
+    tprintf("Match Complete --------------------------------------------\n");
+#endif
+
+  delete tables;
+}
+
+/**
+ * FindGoodProtos finds all protos whose normalized proto-evidence
+ * exceed AdaptProtoThreshold.  The list is ordered by increasing
+ * proto id number.
+ *
+ * Globals:
+ * - local_matcher_multiplier_    Normalization factor multiplier
+ * param ClassTemplate Prototypes & tables for a class
+ * param ProtoMask AND Mask for proto word
+ * param ConfigMask AND Mask for config word
+ * param NumFeatures Number of features in blob
+ * param Features Array of features
+ * param ProtoArray Array of good protos
+ * param AdaptProtoThreshold Threshold for good protos
+ * param Debug Debugger flag: 1=debugger on
+ * @return Number of good protos in ProtoArray.
+ */
+int IntegerMatcher::FindGoodProtos(
+    INT_CLASS ClassTemplate,
+    BIT_VECTOR ProtoMask,
+    BIT_VECTOR ConfigMask,
+    int16_t NumFeatures,
+    INT_FEATURE_ARRAY Features,
+    PROTO_ID *ProtoArray,
+    int AdaptProtoThreshold,
+    int Debug) {
+  auto *tables = new ScratchEvidence();
+  int NumGoodProtos = 0;
+
+  /* DEBUG opening heading */
+  if (MatchDebuggingOn (Debug))
+    tprintf
+      ("Find Good Protos -------------------------------------------\n");
+
+  tables->Clear(ClassTemplate);
+
+  for (int Feature = 0; Feature < NumFeatures; Feature++)
+    UpdateTablesForFeature(
+        ClassTemplate, ProtoMask, ConfigMask, Feature, &(Features[Feature]),
+        tables, Debug);
+
+#ifndef GRAPHICS_DISABLED
+  if (PrintProtoMatchesOn (Debug) || PrintMatchSummaryOn (Debug))
+    DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
+                           NumFeatures, Debug);
+#endif
+
+  /* Average Proto Evidences & Find Good Protos */
+  for (int proto = 0; proto < ClassTemplate->NumProtos; proto++) {
+    /* Compute Average for Actual Proto */
+    int Temp = 0;
+    for (uint8_t i = 0;
+         i < MAX_PROTO_INDEX && i < ClassTemplate->ProtoLengths[proto]; i++)
+      Temp += tables->proto_evidence_[proto][i];
+
+    Temp /= ClassTemplate->ProtoLengths[proto];
+
+    /* Find Good Protos */
+    if (Temp >= AdaptProtoThreshold) {
+      *ProtoArray = proto;
+      ProtoArray++;
+      NumGoodProtos++;
+    }
+  }
+
+  if (MatchDebuggingOn (Debug))
+    tprintf ("Match Complete --------------------------------------------\n");
+  delete tables;
+
+  return NumGoodProtos;
+}
+
+/**
+ * FindBadFeatures finds all features with maximum feature-evidence <
+ * AdaptFeatureThresh. The list is ordered by increasing feature number.
+ * @param ClassTemplate Prototypes & tables for a class
+ * @param ProtoMask AND Mask for proto word
+ * @param ConfigMask AND Mask for config word
+ * @param NumFeatures Number of features in blob
+ * @param Features Array of features
+ * @param FeatureArray Array of bad features
+ * @param AdaptFeatureThreshold Threshold for bad features
+ * @param Debug Debugger flag: 1=debugger on
+ * @return Number of bad features in FeatureArray.
+ */
+int IntegerMatcher::FindBadFeatures(
+    INT_CLASS ClassTemplate,
+    BIT_VECTOR ProtoMask,
+    BIT_VECTOR ConfigMask,
+    int16_t NumFeatures,
+    INT_FEATURE_ARRAY Features,
+    FEATURE_ID *FeatureArray,
+    int AdaptFeatureThreshold,
+    int Debug) {
+  auto *tables = new ScratchEvidence();
+  int NumBadFeatures = 0;
+
+  /* DEBUG opening heading */
+  if (MatchDebuggingOn(Debug))
+    tprintf("Find Bad Features -------------------------------------------\n");
+
+  tables->Clear(ClassTemplate);
+
+  for (int Feature = 0; Feature < NumFeatures; Feature++) {
+    UpdateTablesForFeature(
+        ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
+        tables, Debug);
+
+    /* Find Best Evidence for Current Feature */
+    int best = 0;
+    assert(ClassTemplate->NumConfigs < MAX_NUM_CONFIGS);
+    for (int i = 0; i < MAX_NUM_CONFIGS && i < ClassTemplate->NumConfigs; i++)
+      if (tables->feature_evidence_[i] > best)
+        best = tables->feature_evidence_[i];
+
+    /* Find Bad Features */
+    if (best < AdaptFeatureThreshold) {
+      *FeatureArray = Feature;
+      FeatureArray++;
+      NumBadFeatures++;
+    }
+  }
+
+#ifndef GRAPHICS_DISABLED
+  if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug))
+    DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
+                           NumFeatures, Debug);
+#endif
+
+  if (MatchDebuggingOn(Debug))
+    tprintf("Match Complete --------------------------------------------\n");
+
+  delete tables;
+  return NumBadFeatures;
+}
+
+
+IntegerMatcher::IntegerMatcher(tesseract::IntParam *classify_debug_level)
+  : classify_debug_level_(classify_debug_level)
+{
+  /* Initialize table for evidence to similarity lookup */
+  for (int i = 0; i < SE_TABLE_SIZE; i++) {
+    uint32_t IntSimilarity = i << (27 - SE_TABLE_BITS);
+    double Similarity = (static_cast<double>(IntSimilarity)) / 65536.0 / 65536.0;
+    double evidence = Similarity / kSimilarityCenter;
+    evidence = 255.0 / (evidence * evidence + 1.0);
+
+    if (kSEExponentialMultiplier > 0.0) {
+      double scale = 1.0 - exp(-kSEExponentialMultiplier) *
+        exp(kSEExponentialMultiplier * (static_cast<double>(i) / SE_TABLE_SIZE));
+      evidence *= ClipToRange(scale, 0.0, 1.0);
+    }
+
+    similarity_evidence_table_[i] = static_cast<uint8_t>(evidence + 0.5);
+  }
+
+  /* Initialize evidence computation variables */
+  evidence_table_mask_ =
+    ((1 << kEvidenceTableBits) - 1) << (9 - kEvidenceTableBits);
+  mult_trunc_shift_bits_ = (14 - kIntEvidenceTruncBits);
+  table_trunc_shift_bits_ = (27 - SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1));
+  evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1);
+}
+
+/*----------------------------------------------------------------------------
+              Private Code
+----------------------------------------------------------------------------*/
+void ScratchEvidence::Clear(const INT_CLASS class_template) {
+  memset(sum_feature_evidence_, 0,
+         class_template->NumConfigs * sizeof(sum_feature_evidence_[0]));
+  memset(proto_evidence_, 0,
+         class_template->NumProtos * sizeof(proto_evidence_[0]));
+}
+
+void ScratchEvidence::ClearFeatureEvidence(const INT_CLASS class_template) {
+  memset(feature_evidence_, 0,
+         class_template->NumConfigs * sizeof(feature_evidence_[0]));
+}
+
+/**
+ * Print debugging information for Configurations
+ */
+static void IMDebugConfiguration(int FeatureNum, uint16_t ActualProtoNum,
+                                 uint8_t Evidence, uint32_t ConfigWord) {
+  tprintf ("F = %3d, P = %3d, E = %3d, Configs = ",
+    FeatureNum, static_cast<int>(ActualProtoNum), static_cast<int>(Evidence));
+  while (ConfigWord) {
+    if (ConfigWord & 1)
+      tprintf ("1");
+    else
+      tprintf ("0");
+    ConfigWord >>= 1;
+  }
+  tprintf ("\n");
+}
+
+/**
+ * Print debugging information for Configurations
+ */
+static void IMDebugConfigurationSum(int FeatureNum, uint8_t *FeatureEvidence,
+                                    int32_t ConfigCount) {
+  tprintf("F=%3d, C=", FeatureNum);
+  for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) {
+    tprintf("%4d", FeatureEvidence[ConfigNum]);
+  }
+  tprintf("\n");
+}
+
+/**
+ * For the given feature: prune protos, compute evidence,
+ * update Feature Evidence, Proto Evidence, and Sum of Feature
+ * Evidence tables.
+ * @param ClassTemplate Prototypes & tables for a class
+ * @param FeatureNum Current feature number (for DEBUG only)
+ * @param Feature Pointer to a feature struct
+ * @param tables Evidence tables
+ * @param Debug Debugger flag: 1=debugger on
+ * @return sum of feature evidence tables
+ */
+int IntegerMatcher::UpdateTablesForFeature(
+    INT_CLASS ClassTemplate,
+    BIT_VECTOR ProtoMask,
+    BIT_VECTOR ConfigMask,
+    int FeatureNum,
+    const INT_FEATURE_STRUCT* Feature,
+    ScratchEvidence *tables,
+    int Debug) {
+  uint32_t ConfigWord;
+  uint32_t ProtoWord;
+  uint32_t ProtoNum;
+  uint32_t ActualProtoNum;
+  uint8_t proto_byte;
+  int32_t proto_word_offset;
+  int32_t proto_offset;
+  PROTO_SET ProtoSet;
+  uint32_t *ProtoPrunerPtr;
+  INT_PROTO Proto;
+  int ProtoSetIndex;
+  uint8_t Evidence;
+  uint32_t XFeatureAddress;
+  uint32_t YFeatureAddress;
+  uint32_t ThetaFeatureAddress;
+
+  tables->ClearFeatureEvidence(ClassTemplate);
+
+  /* Precompute Feature Address offset for Proto Pruning */
+  XFeatureAddress = ((Feature->X >> 2) << 1);
+  YFeatureAddress = (NUM_PP_BUCKETS << 1) + ((Feature->Y >> 2) << 1);
+  ThetaFeatureAddress = (NUM_PP_BUCKETS << 2) + ((Feature->Theta >> 2) << 1);
+
+  for (ProtoSetIndex = 0, ActualProtoNum = 0;
+  ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) {
+    ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
+    ProtoPrunerPtr = reinterpret_cast<uint32_t *>((*ProtoSet).ProtoPruner);
+    for (ProtoNum = 0; ProtoNum < PROTOS_PER_PROTO_SET;
+      ProtoNum += (PROTOS_PER_PROTO_SET >> 1), ActualProtoNum +=
+    (PROTOS_PER_PROTO_SET >> 1), ProtoMask++, ProtoPrunerPtr++) {
+      /* Prune Protos of current Proto Set */
+      ProtoWord = *(ProtoPrunerPtr + XFeatureAddress);
+      ProtoWord &= *(ProtoPrunerPtr + YFeatureAddress);
+      ProtoWord &= *(ProtoPrunerPtr + ThetaFeatureAddress);
+      ProtoWord &= *ProtoMask;
+
+      if (ProtoWord != 0) {
+        proto_byte = ProtoWord & 0xff;
+        ProtoWord >>= 8;
+        proto_word_offset = 0;
+        while (ProtoWord != 0 || proto_byte != 0) {
+          while (proto_byte == 0) {
+            proto_byte = ProtoWord & 0xff;
+            ProtoWord >>= 8;
+            proto_word_offset += 8;
+          }
+          proto_offset = offset_table[proto_byte] + proto_word_offset;
+          proto_byte = next_table[proto_byte];
+          Proto = &(ProtoSet->Protos[ProtoNum + proto_offset]);
+          ConfigWord = Proto->Configs[0];
+          int32_t A3 = (((Proto->A * (Feature->X - 128)) * 2)
+            - (Proto->B * (Feature->Y - 128)) + (Proto->C * 512));
+          int32_t M3 = ((static_cast<int8_t>(Feature->Theta - Proto->Angle)) *
+                        kIntThetaFudge) * 2;
+
+          if (A3 < 0)
+            A3 = ~A3;
+          if (M3 < 0)
+            M3 = ~M3;
+          A3 >>= mult_trunc_shift_bits_;
+          M3 >>= mult_trunc_shift_bits_;
+          if (static_cast<uint32_t>(A3) > evidence_mult_mask_)
+            A3 = evidence_mult_mask_;
+          if (static_cast<uint32_t>(M3) > evidence_mult_mask_)
+            M3 = evidence_mult_mask_;
+
+          uint32_t A4 = (A3 * A3) + (M3 * M3);
+          A4 >>= table_trunc_shift_bits_;
+          if (A4 > evidence_table_mask_)
+            Evidence = 0;
+          else
+            Evidence = similarity_evidence_table_[A4];
+
+          if (PrintFeatureMatchesOn (Debug))
+            IMDebugConfiguration (FeatureNum,
+              ActualProtoNum + proto_offset,
+              Evidence, ConfigWord);
+
+          ConfigWord &= *ConfigMask;
+
+          uint8_t feature_evidence_index = 0;
+          uint8_t config_byte = 0;
+          while (ConfigWord != 0 || config_byte != 0) {
+            while (config_byte == 0) {
+              config_byte = ConfigWord & 0xff;
+              ConfigWord >>= 8;
+              feature_evidence_index += 8;
+            }
+            const uint8_t config_offset =
+              offset_table[config_byte] + feature_evidence_index - 8;
+            config_byte = next_table[config_byte];
+            if (Evidence > tables->feature_evidence_[config_offset])
+              tables->feature_evidence_[config_offset] = Evidence;
+          }
+
+          uint8_t ProtoIndex =
+            ClassTemplate->ProtoLengths[ActualProtoNum + proto_offset];
+          if (ProtoIndex > MAX_PROTO_INDEX) {
+            // Avoid buffer overflow.
+            // TODO: A better fix is still open.
+            ProtoIndex = MAX_PROTO_INDEX;
+          }
+          uint8_t* UINT8Pointer =
+            &(tables->proto_evidence_[ActualProtoNum + proto_offset][0]);
+          for (; Evidence > 0 && ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) {
+            if (Evidence > *UINT8Pointer) {
+              uint8_t Temp = *UINT8Pointer;
+              *UINT8Pointer = Evidence;
+              Evidence = Temp;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (PrintFeatureMatchesOn(Debug)) {
+    IMDebugConfigurationSum(FeatureNum, tables->feature_evidence_,
+                            ClassTemplate->NumConfigs);
+  }
+
+  int* IntPointer = tables->sum_feature_evidence_;
+  uint8_t* UINT8Pointer = tables->feature_evidence_;
+  int SumOverConfigs = 0;
+  for (int ConfigNum = ClassTemplate->NumConfigs; ConfigNum > 0; ConfigNum--) {
+    int evidence = *UINT8Pointer++;
+    SumOverConfigs += evidence;
+    *IntPointer++ += evidence;
+  }
+  return SumOverConfigs;
+}
+
+/**
+ * Print debugging information for Configurations
+ */
+#ifndef GRAPHICS_DISABLED
+void IntegerMatcher::DebugFeatureProtoError(
+    INT_CLASS ClassTemplate,
+    BIT_VECTOR ProtoMask,
+    BIT_VECTOR ConfigMask,
+    const ScratchEvidence& tables,
+    int16_t NumFeatures,
+    int Debug) {
+  float ProtoConfigs[MAX_NUM_CONFIGS];
+  int ConfigNum;
+  uint32_t ConfigWord;
+  int ProtoSetIndex;
+  uint16_t ProtoNum;
+  uint8_t ProtoWordNum;
+  PROTO_SET ProtoSet;
+  uint16_t ActualProtoNum;
+
+  if (PrintMatchSummaryOn(Debug)) {
+    tprintf("Configuration Mask:\n");
+    for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
+      tprintf("%1d", (((*ConfigMask) >> ConfigNum) & 1));
+    tprintf("\n");
+
+    tprintf("Feature Error for Configurations:\n");
+    for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) {
+      tprintf(
+          " %5.1f",
+          100.0 * (1.0 - static_cast<float>(tables.sum_feature_evidence_[ConfigNum])
+          / NumFeatures / 256.0));
+    }
+    tprintf("\n\n\n");
+  }
+
+  if (PrintMatchSummaryOn (Debug)) {
+    tprintf ("Proto Mask:\n");
+    for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
+    ProtoSetIndex++) {
+      ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
+      for (ProtoWordNum = 0; ProtoWordNum < 2;
+      ProtoWordNum++, ProtoMask++) {
+        ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
+        for (ProtoNum = 0;
+          ((ProtoNum < (PROTOS_PER_PROTO_SET >> 1))
+          && (ActualProtoNum < ClassTemplate->NumProtos));
+          ProtoNum++, ActualProtoNum++)
+        tprintf ("%1d", (((*ProtoMask) >> ProtoNum) & 1));
+        tprintf ("\n");
+      }
+    }
+    tprintf ("\n");
+  }
+
+  for (int i = 0; i < ClassTemplate->NumConfigs; i++)
+    ProtoConfigs[i] = 0;
+
+  if (PrintProtoMatchesOn (Debug)) {
+    tprintf ("Proto Evidence:\n");
+    for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
+    ProtoSetIndex++) {
+      ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
+      ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
+      for (ProtoNum = 0;
+           ((ProtoNum < PROTOS_PER_PROTO_SET) &&
+            (ActualProtoNum < ClassTemplate->NumProtos));
+           ProtoNum++, ActualProtoNum++) {
+        tprintf ("P %3d =", ActualProtoNum);
+        int temp = 0;
+        for (uint8_t j = 0; j < ClassTemplate->ProtoLengths[ActualProtoNum]; j++) {
+          uint8_t data = tables.proto_evidence_[ActualProtoNum][j];
+          tprintf(" %d", data);
+          temp += data;
+        }
+
+        tprintf(" = %6.4f%%\n",
+                temp / 256.0 / ClassTemplate->ProtoLengths[ActualProtoNum]);
+
+        ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0];
+        ConfigNum = 0;
+        while (ConfigWord) {
+          tprintf ("%5d", ConfigWord & 1 ? temp : 0);
+          if (ConfigWord & 1)
+            ProtoConfigs[ConfigNum] += temp;
+          ConfigNum++;
+          ConfigWord >>= 1;
+        }
+        tprintf("\n");
+      }
+    }
+  }
+
+  if (PrintMatchSummaryOn (Debug)) {
+    tprintf ("Proto Error for Configurations:\n");
+    for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
+      tprintf (" %5.1f",
+        100.0 * (1.0 -
+        ProtoConfigs[ConfigNum] /
+        ClassTemplate->ConfigLengths[ConfigNum] / 256.0));
+    tprintf ("\n\n");
+  }
+
+  if (PrintProtoMatchesOn (Debug)) {
+    tprintf ("Proto Sum for Configurations:\n");
+    for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
+      tprintf (" %4.1f", ProtoConfigs[ConfigNum] / 256.0);
+    tprintf ("\n\n");
+
+    tprintf ("Proto Length for Configurations:\n");
+    for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
+      tprintf (" %4.1f",
+        static_cast<float>(ClassTemplate->ConfigLengths[ConfigNum]));
+    tprintf ("\n\n");
+  }
+
+}
+
+void IntegerMatcher::DisplayProtoDebugInfo(
+    INT_CLASS ClassTemplate,
+    BIT_VECTOR ConfigMask,
+    const ScratchEvidence& tables,
+    bool SeparateDebugWindows) {
+  uint16_t ProtoNum;
+  uint16_t ActualProtoNum;
+  PROTO_SET ProtoSet;
+  int ProtoSetIndex;
+
+  InitIntMatchWindowIfReqd();
+  if (SeparateDebugWindows) {
+    InitFeatureDisplayWindowIfReqd();
+    InitProtoDisplayWindowIfReqd();
+  }
+
+  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
+       ProtoSetIndex++) {
+    ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
+    ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET;
+    for (ProtoNum = 0;
+         ((ProtoNum < PROTOS_PER_PROTO_SET) &&
+          (ActualProtoNum < ClassTemplate->NumProtos));
+         ProtoNum++, ActualProtoNum++) {
+      /* Compute Average for Actual Proto */
+      int temp = 0;
+      for (uint8_t i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++)
+        temp += tables.proto_evidence_[ActualProtoNum][i];
+
+      temp /= ClassTemplate->ProtoLengths[ActualProtoNum];
+
+      if ((ProtoSet->Protos[ProtoNum]).Configs[0] & (*ConfigMask)) {
+        DisplayIntProto(ClassTemplate, ActualProtoNum, temp / 255.0);
+      }
+    }
+  }
+}
+
+
+void IntegerMatcher::DisplayFeatureDebugInfo(
+    INT_CLASS ClassTemplate,
+    BIT_VECTOR ProtoMask,
+    BIT_VECTOR ConfigMask,
+    int16_t NumFeatures,
+    const INT_FEATURE_STRUCT* Features,
+    int AdaptFeatureThreshold,
+    int Debug,
+    bool SeparateDebugWindows) {
+  auto *tables = new ScratchEvidence();
+
+  tables->Clear(ClassTemplate);
+
+  InitIntMatchWindowIfReqd();
+  if (SeparateDebugWindows) {
+    InitFeatureDisplayWindowIfReqd();
+    InitProtoDisplayWindowIfReqd();
+  }
+
+  for (int Feature = 0; Feature < NumFeatures; Feature++) {
+    UpdateTablesForFeature(
+        ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
+        tables, 0);
+
+    /* Find Best Evidence for Current Feature */
+    int best = 0;
+    assert(ClassTemplate->NumConfigs < MAX_NUM_CONFIGS);
+    for (int i = 0; i < MAX_NUM_CONFIGS && i < ClassTemplate->NumConfigs; i++)
+      if (tables->feature_evidence_[i] > best)
+        best = tables->feature_evidence_[i];
+
+    /* Update display for current feature */
+    if (ClipMatchEvidenceOn(Debug)) {
+      if (best < AdaptFeatureThreshold)
+        DisplayIntFeature(&Features[Feature], 0.0);
+      else
+        DisplayIntFeature(&Features[Feature], 1.0);
+    } else {
+      DisplayIntFeature(&Features[Feature], best / 255.0);
+    }
+  }
+
+  delete tables;
+}
+#endif
+
+/**
+ * Add sum of Proto Evidences into Sum Of Feature Evidence Array
+ */
+void ScratchEvidence::UpdateSumOfProtoEvidences(
+    INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask) {
+
+  int *IntPointer;
+  uint32_t ConfigWord;
+  int ProtoSetIndex;
+  uint16_t ProtoNum;
+  PROTO_SET ProtoSet;
+  int NumProtos;
+  uint16_t ActualProtoNum;
+
+  NumProtos = ClassTemplate->NumProtos;
+
+  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
+       ProtoSetIndex++) {
+    ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
+    ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
+    for (ProtoNum = 0;
+         ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < NumProtos));
+         ProtoNum++, ActualProtoNum++) {
+      int temp = 0;
+      for (uint8_t i = 0; i < MAX_PROTO_INDEX &&
+           i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++)
+        temp += proto_evidence_[ActualProtoNum] [i];
+
+      ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0];
+      ConfigWord &= *ConfigMask;
+      IntPointer = sum_feature_evidence_;
+      while (ConfigWord) {
+        if (ConfigWord & 1)
+          *IntPointer += temp;
+        IntPointer++;
+        ConfigWord >>= 1;
+      }
+    }
+  }
+}
+
+/**
+ * Normalize Sum of Proto and Feature Evidence by dividing by the sum of
+ * the Feature Lengths and the Proto Lengths for each configuration.
+ */
+void ScratchEvidence::NormalizeSums(
+    INT_CLASS ClassTemplate, int16_t NumFeatures) {
+
+  assert(ClassTemplate->NumConfigs < MAX_NUM_CONFIGS);
+  for (int i = 0; i < MAX_NUM_CONFIGS && i < ClassTemplate->NumConfigs; i++) {
+    sum_feature_evidence_[i] = (sum_feature_evidence_[i] << 8) /
+        (NumFeatures + ClassTemplate->ConfigLengths[i]);
+  }
+}
+
+/**
+ * Find the best match for the current class and update the Result
+ * with the configuration and match rating.
+ * @return The best normalized sum of evidences
+ */
+int IntegerMatcher::FindBestMatch(
+    INT_CLASS class_template,
+    const ScratchEvidence &tables,
+    UnicharRating* result) {
+  int best_match = 0;
+  result->config = 0;
+  result->fonts.clear();
+  result->fonts.reserve(class_template->NumConfigs);
+
+  /* Find best match */
+  assert(class_template->NumConfigs < MAX_NUM_CONFIGS);
+  for (int c = 0; c < MAX_NUM_CONFIGS && c < class_template->NumConfigs; ++c) {
+    int rating = tables.sum_feature_evidence_[c];
+    if (*classify_debug_level_ > 2)
+      tprintf("Config %d, rating=%d\n", c, rating);
+    if (rating > best_match) {
+      result->config = c;
+      best_match = rating;
+    }
+    result->fonts.push_back(ScoredFont(c, rating));
+  }
+
+  // Compute confidence on a Probability scale.
+  result->rating = best_match / 65536.0f;
+
+  return best_match;
+}
+
+/**
+ * Applies the CN normalization factor to the given rating and returns
+ * the modified rating.
+ */
+float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length,
+                                        int normalization_factor,
+                                        int matcher_multiplier) {
+  int divisor = blob_length + matcher_multiplier;
+  return divisor == 0 ? 1.0f : (rating * blob_length +
+      matcher_multiplier * normalization_factor / 256.0f) / divisor;
+}
+
+}  // namespace tesseract
diff --git a/tesseract/src/classify/intmatcher.h b/tesseract/src/classify/intmatcher.h
new file mode 100644
index 00000000..8c6a1251
--- /dev/null
+++ b/tesseract/src/classify/intmatcher.h
@@ -0,0 +1,165 @@
+/******************************************************************************
+ ** Filename:    intmatcher.h
+ ** Purpose:     Interface to high level generic classifier routines.
+ ** Author:      Robert Moss
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+#ifndef   INTMATCHER_H
+#define   INTMATCHER_H
+
+#include "params.h"
+#include "intproto.h"
+
+namespace tesseract {
+
+// Character fragments could be present in the trained templaes
+// but turned on/off on the language-by-language basis or depending
+// on particular properties of the corpus (e.g. when we expect the
+// images to have low exposure).
+extern BOOL_VAR_H(disable_character_fragments, false,
+                  "Do not include character fragments in the"
+                  " results of the classifier");
+
+extern INT_VAR_H(classify_integer_matcher_multiplier, 10,
+                 "Integer Matcher Multiplier  0-255:   ");
+
+struct UnicharRating;
+
+struct CP_RESULT_STRUCT {
+  CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {}
+
+  float Rating;
+  CLASS_ID Class;
+};
+
+
+/**----------------------------------------------------------------------------
+          Public Function Prototypes
+----------------------------------------------------------------------------**/
+
+#define  SE_TABLE_BITS    9
+#define  SE_TABLE_SIZE  512
+
+struct ScratchEvidence {
+  uint8_t feature_evidence_[MAX_NUM_CONFIGS];
+  int sum_feature_evidence_[MAX_NUM_CONFIGS];
+  uint8_t proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX];
+
+  void Clear(const INT_CLASS class_template);
+  void ClearFeatureEvidence(const INT_CLASS class_template);
+  void NormalizeSums(INT_CLASS ClassTemplate, int16_t NumFeatures);
+  void UpdateSumOfProtoEvidences(
+    INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask);
+};
+
+
+class IntegerMatcher {
+ public:
+  // Integer Matcher Theta Fudge (0-255).
+  static const int kIntThetaFudge = 128;
+  // Bits in Similarity to Evidence Lookup (8-9).
+  static const int kEvidenceTableBits = 9;
+  // Integer Evidence Truncation Bits (8-14).
+  static const int kIntEvidenceTruncBits = 14;
+  // Similarity to Evidence Table Exponential Multiplier.
+  static const float kSEExponentialMultiplier;
+  // Center of Similarity Curve.
+  static const float kSimilarityCenter;
+
+  IntegerMatcher(tesseract::IntParam *classify_debug_level);
+
+  void Match(INT_CLASS ClassTemplate,
+             BIT_VECTOR ProtoMask,
+             BIT_VECTOR ConfigMask,
+             int16_t NumFeatures,
+             const INT_FEATURE_STRUCT* Features,
+             tesseract::UnicharRating* Result,
+             int AdaptFeatureThreshold,
+             int Debug,
+             bool SeparateDebugWindows);
+
+  // Applies the CN normalization factor to the given rating and returns
+  // the modified rating.
+  float ApplyCNCorrection(float rating, int blob_length,
+                          int normalization_factor, int matcher_multiplier);
+
+  int FindGoodProtos(INT_CLASS ClassTemplate,
+                     BIT_VECTOR ProtoMask,
+                     BIT_VECTOR ConfigMask,
+                     int16_t NumFeatures,
+                     INT_FEATURE_ARRAY Features,
+                     PROTO_ID *ProtoArray,
+                     int AdaptProtoThreshold,
+                     int Debug);
+
+  int FindBadFeatures(INT_CLASS ClassTemplate,
+                      BIT_VECTOR ProtoMask,
+                      BIT_VECTOR ConfigMask,
+                      int16_t NumFeatures,
+                      INT_FEATURE_ARRAY Features,
+                      FEATURE_ID *FeatureArray,
+                      int AdaptFeatureThreshold,
+                      int Debug);
+
+ private:
+  int UpdateTablesForFeature(
+      INT_CLASS ClassTemplate,
+      BIT_VECTOR ProtoMask,
+      BIT_VECTOR ConfigMask,
+      int FeatureNum,
+      const INT_FEATURE_STRUCT* Feature,
+      ScratchEvidence *evidence,
+      int Debug);
+
+  int FindBestMatch(INT_CLASS ClassTemplate,
+                    const ScratchEvidence &tables,
+                    tesseract::UnicharRating* Result);
+
+#ifndef GRAPHICS_DISABLED
+  void DebugFeatureProtoError(
+      INT_CLASS ClassTemplate,
+      BIT_VECTOR ProtoMask,
+      BIT_VECTOR ConfigMask,
+      const ScratchEvidence &tables,
+      int16_t NumFeatures,
+      int Debug);
+
+  void DisplayProtoDebugInfo(
+      INT_CLASS ClassTemplate,
+      BIT_VECTOR ConfigMask,
+      const ScratchEvidence &tables,
+      bool SeparateDebugWindows);
+
+  void DisplayFeatureDebugInfo(
+      INT_CLASS ClassTemplate,
+      BIT_VECTOR ProtoMask,
+      BIT_VECTOR ConfigMask,
+      int16_t NumFeatures,
+      const INT_FEATURE_STRUCT* Features,
+      int AdaptFeatureThreshold,
+      int Debug,
+      bool SeparateDebugWindows);
+#endif
+
+ private:
+  tesseract::IntParam *classify_debug_level_;
+  uint8_t similarity_evidence_table_[SE_TABLE_SIZE];
+  uint32_t evidence_table_mask_;
+  uint32_t mult_trunc_shift_bits_;
+  uint32_t table_trunc_shift_bits_;
+  uint32_t evidence_mult_mask_;
+};
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/classify/intproto.cpp b/tesseract/src/classify/intproto.cpp
new file mode 100644
index 00000000..37a92f7b
--- /dev/null
+++ b/tesseract/src/classify/intproto.cpp
@@ -0,0 +1,1743 @@
+/******************************************************************************
+ ** Filename:    intproto.c
+ ** Purpose:     Definition of data structures for integer protos.
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+/*-----------------------------------------------------------------------------
+          Include Files and Type Defines
+-----------------------------------------------------------------------------*/
+
+#define _USE_MATH_DEFINES  // for M_PI
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "intproto.h"
+
+#include "classify.h"
+#include "fontinfo.h"
+#include "mfoutline.h"
+#include "picofeat.h"
+#include "points.h"
+#include "shapetable.h"
+#include "svmnode.h"
+
+#include "helpers.h"
+
+#include <algorithm>
+#include <cmath>           // for M_PI, std::floor
+#include <cstdio>
+#include <cassert>
+
+namespace tesseract {
+
+/* match debug display constants*/
+#define PROTO_PRUNER_SCALE  (4.0)
+
+#define INT_DESCENDER (0.0  * INT_CHAR_NORM_RANGE)
+#define INT_BASELINE  (0.25 * INT_CHAR_NORM_RANGE)
+#define INT_XHEIGHT (0.75 * INT_CHAR_NORM_RANGE)
+#define INT_CAPHEIGHT (1.0  * INT_CHAR_NORM_RANGE)
+
+#define INT_XCENTER (0.5  * INT_CHAR_NORM_RANGE)
+#define INT_YCENTER (0.5  * INT_CHAR_NORM_RANGE)
+#define INT_XRADIUS (0.2  * INT_CHAR_NORM_RANGE)
+#define INT_YRADIUS (0.2  * INT_CHAR_NORM_RANGE)
+#define INT_MIN_X 0
+#define INT_MIN_Y 0
+#define INT_MAX_X INT_CHAR_NORM_RANGE
+#define INT_MAX_Y INT_CHAR_NORM_RANGE
+
+/** define pad used to snap near horiz/vertical protos to horiz/vertical */
+#define HV_TOLERANCE  (0.0025)   /* approx 0.9 degrees */
+
+typedef enum
+{ StartSwitch, EndSwitch, LastSwitch }
+SWITCH_TYPE;
+#define MAX_NUM_SWITCHES  3
+
+typedef struct
+{
+  SWITCH_TYPE Type;
+  int8_t X, Y;
+  int16_t YInit;
+  int16_t Delta;
+}
+FILL_SWITCH;
+
+typedef struct
+{
+  uint8_t NextSwitch;
+  uint8_t AngleStart, AngleEnd;
+  int8_t X;
+  int16_t YStart, YEnd;
+  int16_t StartDelta, EndDelta;
+  FILL_SWITCH Switch[MAX_NUM_SWITCHES];
+}
+TABLE_FILLER;
+
+typedef struct
+{
+  int8_t X;
+  int8_t YStart, YEnd;
+  uint8_t AngleStart, AngleEnd;
+}
+FILL_SPEC;
+
+
+/* constants for conversion from old inttemp format */
+#define OLD_MAX_NUM_CONFIGS      32
+#define OLD_WERDS_PER_CONFIG_VEC ((OLD_MAX_NUM_CONFIGS + BITS_PER_WERD - 1) /\
+                                  BITS_PER_WERD)
+
+/*-----------------------------------------------------------------------------
+            Macros
+-----------------------------------------------------------------------------*/
+/** macro for performing circular increments of bucket indices */
+#define CircularIncrement(i,r)  (((i) < (r) - 1)?((i)++):((i) = 0))
+
+/** macro for mapping floats to ints without bounds checking */
+#define MapParam(P,O,N)   (std::floor(((P) + (O)) * (N)))
+
+/*---------------------------------------------------------------------------
+            Private Function Prototypes
+----------------------------------------------------------------------------*/
+float BucketStart(int Bucket, float Offset, int NumBuckets);
+
+float BucketEnd(int Bucket, float Offset, int NumBuckets);
+
+void DoFill(FILL_SPEC *FillSpec,
+            CLASS_PRUNER_STRUCT* Pruner,
+            uint32_t ClassMask,
+            uint32_t ClassCount,
+            uint32_t WordIndex);
+
+bool FillerDone(TABLE_FILLER* Filler);
+
+void FillPPCircularBits(uint32_t
+                        ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR],
+                        int Bit, float Center, float Spread, bool debug);
+
+void FillPPLinearBits(uint32_t ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR],
+                      int Bit, float Center, float Spread, bool debug);
+
+void GetCPPadsForLevel(int Level,
+                       float *EndPad,
+                       float *SidePad,
+                       float *AnglePad);
+
+ScrollView::Color GetMatchColorFor(float Evidence);
+
+void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill);
+
+void InitTableFiller(float EndPad,
+                     float SidePad,
+                     float AnglePad,
+                     PROTO Proto,
+                     TABLE_FILLER *Filler);
+
+#ifndef GRAPHICS_DISABLED
+void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature,
+                      ScrollView::Color color);
+
+void RenderIntProto(ScrollView *window,
+                    INT_CLASS Class,
+                    PROTO_ID ProtoId,
+                    ScrollView::Color color);
+#endif // !GRAPHICS_DISABLED
+
+/*-----------------------------------------------------------------------------
+        Global Data Definitions and Declarations
+-----------------------------------------------------------------------------*/
+
+#ifndef GRAPHICS_DISABLED
+/* global display lists used to display proto and feature match information*/
+static ScrollView* IntMatchWindow = nullptr;
+static ScrollView* FeatureDisplayWindow = nullptr;
+static ScrollView* ProtoDisplayWindow = nullptr;
+#endif
+
+/*-----------------------------------------------------------------------------
+        Variables
+-----------------------------------------------------------------------------*/
+
+/* control knobs */
+static INT_VAR(classify_num_cp_levels, 3, "Number of Class Pruner Levels");
+static double_VAR(classify_cp_angle_pad_loose, 45.0,
+                  "Class Pruner Angle Pad Loose");
+static double_VAR(classify_cp_angle_pad_medium, 20.0,
+                  "Class Pruner Angle Pad Medium");
+static double_VAR(classify_cp_angle_pad_tight, 10.0,
+                  "CLass Pruner Angle Pad Tight");
+static double_VAR(classify_cp_end_pad_loose, 0.5, "Class Pruner End Pad Loose");
+static double_VAR(classify_cp_end_pad_medium, 0.5, "Class Pruner End Pad Medium");
+static double_VAR(classify_cp_end_pad_tight, 0.5, "Class Pruner End Pad Tight");
+static double_VAR(classify_cp_side_pad_loose, 2.5, "Class Pruner Side Pad Loose");
+static double_VAR(classify_cp_side_pad_medium, 1.2, "Class Pruner Side Pad Medium");
+static double_VAR(classify_cp_side_pad_tight, 0.6, "Class Pruner Side Pad Tight");
+static double_VAR(classify_pp_angle_pad, 45.0, "Proto Pruner Angle Pad");
+static double_VAR(classify_pp_end_pad, 0.5, "Proto Prune End Pad");
+static double_VAR(classify_pp_side_pad, 2.5, "Proto Pruner Side Pad");
+
+/**
+ * This routine truncates Param to lie within the range
+ * of Min-Max inclusive.
+ *
+ * @param Param   parameter value to be truncated
+ * @param Min, Max  parameter limits (inclusive)
+ *
+ * @return Truncated parameter.
+ */
+static int TruncateParam(float Param, int Min, int Max) {
+  int result;
+  if (Param < Min) {
+    result = Min;
+  } else if (Param > Max) {
+    result = Max;
+  } else {
+    result = static_cast<int>(std::floor(Param));
+  }
+  return result;
+}
+
+/*-----------------------------------------------------------------------------
+              Public Code
+-----------------------------------------------------------------------------*/
+/// Builds a feature from an FCOORD for position with all the necessary
+/// clipping and rounding.
+INT_FEATURE_STRUCT::INT_FEATURE_STRUCT(const FCOORD& pos, uint8_t theta)
+  : X(ClipToRange<int16_t>(static_cast<int16_t>(pos.x() + 0.5), 0, 255)),
+    Y(ClipToRange<int16_t>(static_cast<int16_t>(pos.y() + 0.5), 0, 255)),
+    Theta(theta),
+    CP_misses(0) {
+}
+/** Builds a feature from ints with all the necessary clipping and casting. */
+INT_FEATURE_STRUCT::INT_FEATURE_STRUCT(int x, int y, int theta)
+  : X(static_cast<uint8_t>(ClipToRange<int>(x, 0, UINT8_MAX))),
+    Y(static_cast<uint8_t>(ClipToRange<int>(y, 0, UINT8_MAX))),
+    Theta(static_cast<uint8_t>(ClipToRange<int>(theta, 0, UINT8_MAX))),
+    CP_misses(0) {
+}
+
+/**
+ * This routine adds a new class structure to a set of
+ * templates. Classes have to be added to Templates in
+ * the order of increasing ClassIds.
+ *
+ * @param Templates templates to add new class to
+ * @param ClassId   class id to associate new class with
+ * @param Class   class data structure to add to templates
+ *
+ * Globals: none
+ */
+void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class) {
+  int Pruner;
+
+  assert (LegalClassId (ClassId));
+  if (ClassId != Templates->NumClasses) {
+    fprintf(stderr, "Please make sure that classes are added to templates");
+    fprintf(stderr, " in increasing order of ClassIds\n");
+    exit(1);
+  }
+  ClassForClassId (Templates, ClassId) = Class;
+  Templates->NumClasses++;
+
+  if (Templates->NumClasses > MaxNumClassesIn (Templates)) {
+    Pruner = Templates->NumClassPruners++;
+    Templates->ClassPruners[Pruner] = new CLASS_PRUNER_STRUCT;
+    memset(Templates->ClassPruners[Pruner], 0, sizeof(CLASS_PRUNER_STRUCT));
+  }
+}                                /* AddIntClass */
+
+
+/**
+ * This routine returns the index of the next free config
+ * in Class.
+ *
+ * @param Class class to add new configuration to
+ *
+ * Globals: none
+ *
+ * @return Index of next free config.
+ */
+int AddIntConfig(INT_CLASS Class) {
+  int Index;
+
+  assert(Class->NumConfigs < MAX_NUM_CONFIGS);
+
+  Index = Class->NumConfigs++;
+  Class->ConfigLengths[Index] = 0;
+  return Index;
+}                                /* AddIntConfig */
+
+
+/**
+ * This routine allocates the next free proto in Class and
+ * returns its index.
+ *
+ * @param Class class to add new proto to
+ *
+ * Globals: none
+ *
+ * @return Proto index of new proto.
+ */
+int AddIntProto(INT_CLASS Class) {
+  int Index;
+  int ProtoSetId;
+  PROTO_SET ProtoSet;
+  INT_PROTO Proto;
+  uint32_t *Word;
+
+  if (Class->NumProtos >= MAX_NUM_PROTOS)
+    return (NO_PROTO);
+
+  Index = Class->NumProtos++;
+
+  if (Class->NumProtos > MaxNumIntProtosIn(Class)) {
+    ProtoSetId = Class->NumProtoSets++;
+
+    ProtoSet = static_cast<PROTO_SET>(malloc(sizeof(PROTO_SET_STRUCT)));
+    Class->ProtoSets[ProtoSetId] = ProtoSet;
+    memset(ProtoSet, 0, sizeof(*ProtoSet));
+
+    /* reallocate space for the proto lengths and install in class */
+    Class->ProtoLengths =
+      static_cast<uint8_t *>(realloc(Class->ProtoLengths,
+                        MaxNumIntProtosIn(Class) * sizeof(uint8_t)));
+    memset(&Class->ProtoLengths[Index], 0,
+           sizeof(*Class->ProtoLengths) * (MaxNumIntProtosIn(Class) - Index));
+  }
+
+  /* initialize proto so its length is zero and it isn't in any configs */
+  Class->ProtoLengths[Index] = 0;
+  Proto = ProtoForProtoId (Class, Index);
+  for (Word = Proto->Configs;
+       Word < Proto->Configs + WERDS_PER_CONFIG_VEC; *Word++ = 0);
+
+  return (Index);
+}
+
+/**
+ * This routine adds Proto to the class pruning tables
+ * for the specified class in Templates.
+ *
+ * Globals:
+ *  - classify_num_cp_levels number of levels used in the class pruner
+ * @param Proto   floating-pt proto to add to class pruner
+ * @param ClassId   class id corresponding to Proto
+ * @param Templates set of templates containing class pruner
+ */
+void AddProtoToClassPruner (PROTO Proto, CLASS_ID ClassId,
+                            INT_TEMPLATES Templates)
+#define MAX_LEVEL     2
+{
+  CLASS_PRUNER_STRUCT* Pruner;
+  uint32_t ClassMask;
+  uint32_t ClassCount;
+  uint32_t WordIndex;
+  int Level;
+  float EndPad, SidePad, AnglePad;
+  TABLE_FILLER TableFiller;
+  FILL_SPEC FillSpec;
+
+  Pruner = CPrunerFor (Templates, ClassId);
+  WordIndex = CPrunerWordIndexFor (ClassId);
+  ClassMask = CPrunerMaskFor (MAX_LEVEL, ClassId);
+
+  for (Level = classify_num_cp_levels - 1; Level >= 0; Level--) {
+    GetCPPadsForLevel(Level, &EndPad, &SidePad, &AnglePad);
+    ClassCount = CPrunerMaskFor (Level, ClassId);
+    InitTableFiller(EndPad, SidePad, AnglePad, Proto, &TableFiller);
+
+    while (!FillerDone (&TableFiller)) {
+      GetNextFill(&TableFiller, &FillSpec);
+      DoFill(&FillSpec, Pruner, ClassMask, ClassCount, WordIndex);
+    }
+  }
+}                                /* AddProtoToClassPruner */
+
+/**
+ * This routine updates the proto pruner lookup tables
+ * for Class to include a new proto identified by ProtoId
+ * and described by Proto.
+ * @param Proto floating-pt proto to be added to proto pruner
+ * @param ProtoId id of proto
+ * @param Class integer class that contains desired proto pruner
+ * @param debug debug flag
+ * @note Globals: none
+ */
+void AddProtoToProtoPruner(PROTO Proto, int ProtoId,
+                           INT_CLASS Class, bool debug) {
+  float Angle, X, Y, Length;
+  float Pad;
+  int Index;
+  PROTO_SET ProtoSet;
+
+  if (ProtoId >= Class->NumProtos)
+    tprintf("AddProtoToProtoPruner:assert failed: %d < %d",
+            ProtoId, Class->NumProtos);
+  assert(ProtoId < Class->NumProtos);
+
+  Index = IndexForProto (ProtoId);
+  ProtoSet = Class->ProtoSets[SetForProto (ProtoId)];
+
+  Angle = Proto->Angle;
+#ifndef _WIN32
+  assert(!std::isnan(Angle));
+#endif
+
+  FillPPCircularBits (ProtoSet->ProtoPruner[PRUNER_ANGLE], Index,
+                      Angle + ANGLE_SHIFT, classify_pp_angle_pad / 360.0,
+                      debug);
+
+  Angle *= 2.0 * M_PI;
+  Length = Proto->Length;
+
+  X = Proto->X + X_SHIFT;
+  Pad = std::max(fabs (cos (Angle)) * (Length / 2.0 +
+                                   classify_pp_end_pad *
+                                   GetPicoFeatureLength ()),
+             fabs (sin (Angle)) * (classify_pp_side_pad *
+                                   GetPicoFeatureLength ()));
+
+  FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_X], Index, X, Pad, debug);
+
+  Y = Proto->Y + Y_SHIFT;
+  Pad = std::max(fabs (sin (Angle)) * (Length / 2.0 +
+                                   classify_pp_end_pad *
+                                   GetPicoFeatureLength ()),
+             fabs (cos (Angle)) * (classify_pp_side_pad *
+                                   GetPicoFeatureLength ()));
+
+  FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_Y], Index, Y, Pad, debug);
+}                                /* AddProtoToProtoPruner */
+
+/**
+ * Returns a quantized bucket for the given param shifted by offset,
+ * notionally (param + offset) * num_buckets, but clipped and casted to the
+ * appropriate type.
+ */
+uint8_t Bucket8For(float param, float offset, int num_buckets) {
+  int bucket = IntCastRounded(MapParam(param, offset, num_buckets));
+  return static_cast<uint8_t>(ClipToRange<int>(bucket, 0, num_buckets - 1));
+}
+uint16_t Bucket16For(float param, float offset, int num_buckets) {
+  int bucket = IntCastRounded(MapParam(param, offset, num_buckets));
+  return static_cast<uint16_t>(ClipToRange<int>(bucket, 0, num_buckets - 1));
+}
+
+/**
+ * Returns a quantized bucket for the given circular param shifted by offset,
+ * notionally (param + offset) * num_buckets, but modded and casted to the
+ * appropriate type.
+ */
+uint8_t CircBucketFor(float param, float offset, int num_buckets) {
+  int bucket = IntCastRounded(MapParam(param, offset, num_buckets));
+  return static_cast<uint8_t>(Modulo(bucket, num_buckets));
+}                                /* CircBucketFor */
+
+
+#ifndef GRAPHICS_DISABLED
+/**
+ * This routine clears the global feature and proto
+ * display lists.
+ *
+ * Globals:
+ * - FeatureShapes display list for features
+ * - ProtoShapes display list for protos
+ */
+void UpdateMatchDisplay() {
+  if (IntMatchWindow != nullptr)
+    IntMatchWindow->Update();
+}                                /* ClearMatchDisplay */
+#endif
+
+/**
+ * This operation updates the config vectors of all protos
+ * in Class to indicate that the protos with 1's in Config
+ * belong to a new configuration identified by ConfigId.
+ * It is assumed that the length of the Config bit vector is
+ * equal to the number of protos in Class.
+ * @param Config    config to be added to class
+ * @param ConfigId  id to be used for new config
+ * @param Class   class to add new config to
+ */
+void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class) {
+  int ProtoId;
+  INT_PROTO Proto;
+  int TotalLength;
+
+  for (ProtoId = 0, TotalLength = 0;
+    ProtoId < Class->NumProtos; ProtoId++) {
+    if (test_bit(Config, ProtoId)) {
+      Proto = ProtoForProtoId(Class, ProtoId);
+      SET_BIT(Proto->Configs, ConfigId);
+      TotalLength += Class->ProtoLengths[ProtoId];
+    }
+  }
+  Class->ConfigLengths[ConfigId] = TotalLength;
+}                                /* ConvertConfig */
+
+/**
+ * This routine converts Proto to integer format and
+ * installs it as ProtoId in Class.
+ * @param Proto floating-pt proto to be converted to integer format
+ * @param ProtoId id of proto
+ * @param Class integer class to add converted proto to
+ */
+void Classify::ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class) {
+  assert(ProtoId < Class->NumProtos);
+
+  INT_PROTO P = ProtoForProtoId(Class, ProtoId);
+
+  float Param = Proto->A * 128;
+  P->A = TruncateParam(Param, -128, 127);
+
+  Param = -Proto->B * 256;
+  P->B = TruncateParam(Param, 0, 255);
+
+  Param = Proto->C * 128;
+  P->C = TruncateParam(Param, -128, 127);
+
+  Param = Proto->Angle * 256;
+  if (Param < 0 || Param >= 256)
+    P->Angle = 0;
+  else
+    P->Angle = static_cast<uint8_t>(Param);
+
+  /* round proto length to nearest integer number of pico-features */
+  Param = (Proto->Length / GetPicoFeatureLength()) + 0.5;
+  Class->ProtoLengths[ProtoId] = TruncateParam(Param, 1, 255);
+  if (classify_learning_debug_level >= 2)
+    tprintf("Converted ffeat to (A=%d,B=%d,C=%d,L=%d)",
+            P->A, P->B, P->C, Class->ProtoLengths[ProtoId]);
+}                                /* ConvertProto */
+
+/**
+ * This routine converts from the old floating point format
+ * to the new integer format.
+ * @param FloatProtos prototypes in old floating pt format
+ * @param target_unicharset the UNICHARSET to use
+ * @return New set of training templates in integer format.
+ * @note Globals: none
+ */
+INT_TEMPLATES Classify::CreateIntTemplates(CLASSES FloatProtos,
+                                           const UNICHARSET&
+                                           target_unicharset) {
+  INT_TEMPLATES IntTemplates;
+  CLASS_TYPE FClass;
+  INT_CLASS IClass;
+  int ClassId;
+  int ProtoId;
+  int ConfigId;
+
+  IntTemplates = NewIntTemplates();
+
+  for (ClassId = 0; ClassId < target_unicharset.size(); ClassId++) {
+    FClass = &(FloatProtos[ClassId]);
+    if (FClass->NumProtos == 0 && FClass->NumConfigs == 0 &&
+        strcmp(target_unicharset.id_to_unichar(ClassId), " ") != 0) {
+      tprintf("Warning: no protos/configs for %s in CreateIntTemplates()\n",
+              target_unicharset.id_to_unichar(ClassId));
+    }
+    assert(UnusedClassIdIn(IntTemplates, ClassId));
+    IClass = NewIntClass(FClass->NumProtos, FClass->NumConfigs);
+    FontSet fs;
+    fs.size = FClass->font_set.size();
+    fs.configs = new int[fs.size];
+    for (int i = 0; i < fs.size; ++i) {
+      fs.configs[i] = FClass->font_set.get(i);
+    }
+    if (this->fontset_table_.contains(fs)) {
+      IClass->font_set_id = this->fontset_table_.get_id(fs);
+      delete[] fs.configs;
+    } else {
+      IClass->font_set_id = this->fontset_table_.push_back(fs);
+    }
+    AddIntClass(IntTemplates, ClassId, IClass);
+
+    for (ProtoId = 0; ProtoId < FClass->NumProtos; ProtoId++) {
+      AddIntProto(IClass);
+      ConvertProto(ProtoIn(FClass, ProtoId), ProtoId, IClass);
+      AddProtoToProtoPruner(ProtoIn(FClass, ProtoId), ProtoId, IClass,
+                            classify_learning_debug_level >= 2);
+      AddProtoToClassPruner(ProtoIn(FClass, ProtoId), ClassId, IntTemplates);
+    }
+
+    for (ConfigId = 0; ConfigId < FClass->NumConfigs; ConfigId++) {
+      AddIntConfig(IClass);
+      ConvertConfig(FClass->Configurations[ConfigId], ConfigId, IClass);
+    }
+  }
+  return (IntTemplates);
+}                                /* CreateIntTemplates */
+
+#ifndef GRAPHICS_DISABLED
+/**
+ * This routine renders the specified feature into a
+ * global display list.
+ *
+ * Globals:
+ * - FeatureShapes global display list for features
+ * @param Feature   pico-feature to be displayed
+ * @param Evidence  best evidence for this feature (0-1)
+ */
+void DisplayIntFeature(const INT_FEATURE_STRUCT *Feature, float Evidence) {
+  ScrollView::Color color = GetMatchColorFor(Evidence);
+  RenderIntFeature(IntMatchWindow, Feature, color);
+  if (FeatureDisplayWindow) {
+    RenderIntFeature(FeatureDisplayWindow, Feature, color);
+  }
+}                                /* DisplayIntFeature */
+
+/**
+ * This routine renders the specified proto into a
+ * global display list.
+ *
+ * Globals:
+ * - ProtoShapes global display list for protos
+ * @param Class   class to take proto from
+ * @param ProtoId   id of proto in Class to be displayed
+ * @param Evidence  total evidence for proto (0-1)
+ */
+void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, float Evidence) {
+  ScrollView::Color color = GetMatchColorFor(Evidence);
+  RenderIntProto(IntMatchWindow, Class, ProtoId, color);
+  if (ProtoDisplayWindow) {
+    RenderIntProto(ProtoDisplayWindow, Class, ProtoId, color);
+  }
+}                                /* DisplayIntProto */
+#endif
+
+/**
+ * This routine creates a new integer class data structure
+ * and returns it.  Sufficient space is allocated
+ * to handle the specified number of protos and configs.
+ * @param MaxNumProtos  number of protos to allocate space for
+ * @param MaxNumConfigs number of configs to allocate space for
+ * @return New class created.
+ * @note Globals: none
+ */
+INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs) {
+  INT_CLASS Class;
+  PROTO_SET ProtoSet;
+  int i;
+
+  assert(MaxNumConfigs <= MAX_NUM_CONFIGS);
+
+  Class = static_cast<INT_CLASS>(malloc(sizeof(INT_CLASS_STRUCT)));
+  Class->NumProtoSets = ((MaxNumProtos + PROTOS_PER_PROTO_SET - 1) /
+                            PROTOS_PER_PROTO_SET);
+
+  assert(Class->NumProtoSets <= MAX_NUM_PROTO_SETS);
+
+  Class->NumProtos = 0;
+  Class->NumConfigs = 0;
+
+  for (i = 0; i < Class->NumProtoSets; i++) {
+    /* allocate space for a proto set, install in class, and initialize */
+    ProtoSet = static_cast<PROTO_SET>(malloc(sizeof(PROTO_SET_STRUCT)));
+    memset(ProtoSet, 0, sizeof(*ProtoSet));
+    Class->ProtoSets[i] = ProtoSet;
+
+    /* allocate space for the proto lengths and install in class */
+  }
+  if (MaxNumIntProtosIn (Class) > 0) {
+    Class->ProtoLengths =
+      static_cast<uint8_t *>(malloc(MaxNumIntProtosIn (Class) * sizeof (uint8_t)));
+    memset(Class->ProtoLengths, 0,
+           MaxNumIntProtosIn(Class) * sizeof(*Class->ProtoLengths));
+  } else {
+    Class->ProtoLengths = nullptr;
+  }
+  memset(Class->ConfigLengths, 0, sizeof(Class->ConfigLengths));
+
+  return (Class);
+
+}                                /* NewIntClass */
+
+static void free_int_class(INT_CLASS int_class) {
+  int i;
+
+  for (i = 0; i < int_class->NumProtoSets; i++) {
+    free (int_class->ProtoSets[i]);
+  }
+  if (int_class->ProtoLengths != nullptr) {
+    free (int_class->ProtoLengths);
+  }
+  free(int_class);
+}
+
+/**
+ * This routine allocates a new set of integer templates
+ * initialized to hold 0 classes.
+ * @return The integer templates created.
+ * @note Globals: none
+ */
+INT_TEMPLATES NewIntTemplates() {
+  INT_TEMPLATES T;
+  int i;
+
+  T = static_cast<INT_TEMPLATES>(malloc (sizeof (INT_TEMPLATES_STRUCT)));
+  T->NumClasses = 0;
+  T->NumClassPruners = 0;
+
+  for (i = 0; i < MAX_NUM_CLASSES; i++)
+    ClassForClassId (T, i) = nullptr;
+
+  return (T);
+}                                /* NewIntTemplates */
+
+
+/*---------------------------------------------------------------------------*/
+void free_int_templates(INT_TEMPLATES templates) {
+  int i;
+
+  for (i = 0; i < templates->NumClasses; i++)
+    free_int_class(templates->Class[i]);
+  for (i = 0; i < templates->NumClassPruners; i++)
+    delete templates->ClassPruners[i];
+  free(templates);
+}
+
+/**
+ * This routine reads a set of integer templates from
+ * File.  File must already be open and must be in the
+ * correct binary format.
+ * @param  fp open file to read templates from
+ * @return Pointer to integer templates read from File.
+ * @note Globals: none
+ */
+INT_TEMPLATES Classify::ReadIntTemplates(TFile *fp) {
+  int i, j, w, x, y, z;
+  int unicharset_size;
+  int version_id = 0;
+  INT_TEMPLATES Templates;
+  CLASS_PRUNER_STRUCT* Pruner;
+  INT_CLASS Class;
+  uint8_t *Lengths;
+  PROTO_SET ProtoSet;
+
+  /* variables for conversion from older inttemp formats */
+  int b, bit_number, last_cp_bit_number, new_b, new_i, new_w;
+  CLASS_ID class_id, max_class_id;
+  auto *IndexFor = new int16_t[MAX_NUM_CLASSES];
+  auto *ClassIdFor = new CLASS_ID[MAX_NUM_CLASSES];
+  auto **TempClassPruner =
+      new CLASS_PRUNER_STRUCT*[MAX_NUM_CLASS_PRUNERS];
+  uint32_t SetBitsForMask =           // word with NUM_BITS_PER_CLASS
+    (1 << NUM_BITS_PER_CLASS) - 1;  // set starting at bit 0
+  uint32_t Mask, NewMask, ClassBits;
+  int MaxNumConfigs = MAX_NUM_CONFIGS;
+  int WerdsPerConfigVec = WERDS_PER_CONFIG_VEC;
+
+  /* first read the high level template struct */
+  Templates = NewIntTemplates();
+  // Read Templates in parts for 64 bit compatibility.
+  if (fp->FReadEndian(&unicharset_size, sizeof(unicharset_size), 1) != 1)
+    tprintf("Bad read of inttemp!\n");
+  if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses),
+                      1) != 1 ||
+      fp->FReadEndian(&Templates->NumClassPruners,
+                      sizeof(Templates->NumClassPruners), 1) != 1)
+    tprintf("Bad read of inttemp!\n");
+  if (Templates->NumClasses < 0) {
+    // This file has a version id!
+    version_id = -Templates->NumClasses;
+    if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses),
+                        1) != 1)
+      tprintf("Bad read of inttemp!\n");
+  }
+
+  if (version_id < 3) {
+    MaxNumConfigs = OLD_MAX_NUM_CONFIGS;
+    WerdsPerConfigVec = OLD_WERDS_PER_CONFIG_VEC;
+  }
+
+  if (version_id < 2) {
+    if (fp->FReadEndian(IndexFor, sizeof(IndexFor[0]), unicharset_size) !=
+        unicharset_size) {
+      tprintf("Bad read of inttemp!\n");
+    }
+    if (fp->FReadEndian(ClassIdFor, sizeof(ClassIdFor[0]),
+                        Templates->NumClasses) != Templates->NumClasses) {
+      tprintf("Bad read of inttemp!\n");
+    }
+  }
+
+  /* then read in the class pruners */
+  const int kNumBuckets =
+      NUM_CP_BUCKETS * NUM_CP_BUCKETS * NUM_CP_BUCKETS * WERDS_PER_CP_VECTOR;
+  for (i = 0; i < Templates->NumClassPruners; i++) {
+    Pruner = new CLASS_PRUNER_STRUCT;
+    if (fp->FReadEndian(Pruner, sizeof(Pruner->p[0][0][0][0]), kNumBuckets) !=
+        kNumBuckets) {
+      tprintf("Bad read of inttemp!\n");
+    }
+    if (version_id < 2) {
+      TempClassPruner[i] = Pruner;
+    } else {
+      Templates->ClassPruners[i] = Pruner;
+    }
+  }
+
+  /* fix class pruners if they came from an old version of inttemp */
+  if (version_id < 2) {
+    // Allocate enough class pruners to cover all the class ids.
+    max_class_id = 0;
+    for (i = 0; i < Templates->NumClasses; i++)
+      if (ClassIdFor[i] > max_class_id)
+        max_class_id = ClassIdFor[i];
+    for (i = 0; i <= CPrunerIdFor(max_class_id); i++) {
+      Templates->ClassPruners[i] = new CLASS_PRUNER_STRUCT;
+      memset(Templates->ClassPruners[i], 0, sizeof(CLASS_PRUNER_STRUCT));
+    }
+    // Convert class pruners from the old format (indexed by class index)
+    // to the new format (indexed by class id).
+    last_cp_bit_number = NUM_BITS_PER_CLASS * Templates->NumClasses - 1;
+    for (i = 0; i < Templates->NumClassPruners; i++) {
+      for (x = 0; x < NUM_CP_BUCKETS; x++)
+        for (y = 0; y < NUM_CP_BUCKETS; y++)
+          for (z = 0; z < NUM_CP_BUCKETS; z++)
+            for (w = 0; w < WERDS_PER_CP_VECTOR; w++) {
+              if (TempClassPruner[i]->p[x][y][z][w] == 0)
+                continue;
+              for (b = 0; b < BITS_PER_WERD; b += NUM_BITS_PER_CLASS) {
+                bit_number = i * BITS_PER_CP_VECTOR + w * BITS_PER_WERD + b;
+                if (bit_number > last_cp_bit_number)
+                  break; // the rest of the bits in this word are not used
+                class_id = ClassIdFor[bit_number / NUM_BITS_PER_CLASS];
+                // Single out NUM_BITS_PER_CLASS bits relating to class_id.
+                Mask = SetBitsForMask << b;
+                ClassBits = TempClassPruner[i]->p[x][y][z][w] & Mask;
+                // Move these bits to the new position in which they should
+                // appear (indexed corresponding to the class_id).
+                new_i = CPrunerIdFor(class_id);
+                new_w = CPrunerWordIndexFor(class_id);
+                new_b = CPrunerBitIndexFor(class_id) * NUM_BITS_PER_CLASS;
+                if (new_b > b) {
+                  ClassBits <<= (new_b - b);
+                } else {
+                  ClassBits >>= (b - new_b);
+                }
+                // Copy bits relating to class_id to the correct position
+                // in Templates->ClassPruner.
+                NewMask = SetBitsForMask << new_b;
+                Templates->ClassPruners[new_i]->p[x][y][z][new_w] &= ~NewMask;
+                Templates->ClassPruners[new_i]->p[x][y][z][new_w] |= ClassBits;
+              }
+            }
+    }
+    for (i = 0; i < Templates->NumClassPruners; i++) {
+      delete TempClassPruner[i];
+    }
+  }
+
+  /* then read in each class */
+  for (i = 0; i < Templates->NumClasses; i++) {
+    /* first read in the high level struct for the class */
+    Class = static_cast<INT_CLASS>(malloc (sizeof (INT_CLASS_STRUCT)));
+    if (fp->FReadEndian(&Class->NumProtos, sizeof(Class->NumProtos), 1) != 1 ||
+        fp->FRead(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1) != 1 ||
+        fp->FRead(&Class->NumConfigs, sizeof(Class->NumConfigs), 1) != 1)
+      tprintf("Bad read of inttemp!\n");
+    if (version_id == 0) {
+      // Only version 0 writes 5 pointless pointers to the file.
+      for (j = 0; j < 5; ++j) {
+        int32_t junk;
+        if (fp->FRead(&junk, sizeof(junk), 1) != 1)
+          tprintf("Bad read of inttemp!\n");
+      }
+    }
+    int num_configs = version_id < 4 ? MaxNumConfigs : Class->NumConfigs;
+    ASSERT_HOST(num_configs <= MaxNumConfigs);
+    if (fp->FReadEndian(Class->ConfigLengths, sizeof(uint16_t), num_configs) !=
+        num_configs) {
+      tprintf("Bad read of inttemp!\n");
+    }
+    if (version_id < 2) {
+      ClassForClassId (Templates, ClassIdFor[i]) = Class;
+    } else {
+      ClassForClassId (Templates, i) = Class;
+    }
+
+    /* then read in the proto lengths */
+    Lengths = nullptr;
+    if (MaxNumIntProtosIn (Class) > 0) {
+      Lengths = static_cast<uint8_t *>(malloc(sizeof(uint8_t) * MaxNumIntProtosIn(Class)));
+      if (fp->FRead(Lengths, sizeof(uint8_t), MaxNumIntProtosIn(Class)) !=
+          MaxNumIntProtosIn(Class))
+        tprintf("Bad read of inttemp!\n");
+    }
+    Class->ProtoLengths = Lengths;
+
+    /* then read in the proto sets */
+    for (j = 0; j < Class->NumProtoSets; j++) {
+      ProtoSet = static_cast<PROTO_SET>(malloc(sizeof(PROTO_SET_STRUCT)));
+      int num_buckets = NUM_PP_PARAMS * NUM_PP_BUCKETS * WERDS_PER_PP_VECTOR;
+      if (fp->FReadEndian(&ProtoSet->ProtoPruner,
+                          sizeof(ProtoSet->ProtoPruner[0][0][0]),
+                          num_buckets) != num_buckets)
+        tprintf("Bad read of inttemp!\n");
+      for (x = 0; x < PROTOS_PER_PROTO_SET; x++) {
+        if (fp->FRead(&ProtoSet->Protos[x].A, sizeof(ProtoSet->Protos[x].A),
+                      1) != 1 ||
+            fp->FRead(&ProtoSet->Protos[x].B, sizeof(ProtoSet->Protos[x].B),
+                      1) != 1 ||
+            fp->FRead(&ProtoSet->Protos[x].C, sizeof(ProtoSet->Protos[x].C),
+                      1) != 1 ||
+            fp->FRead(&ProtoSet->Protos[x].Angle,
+                      sizeof(ProtoSet->Protos[x].Angle), 1) != 1)
+          tprintf("Bad read of inttemp!\n");
+        if (fp->FReadEndian(&ProtoSet->Protos[x].Configs,
+                            sizeof(ProtoSet->Protos[x].Configs[0]),
+                            WerdsPerConfigVec) != WerdsPerConfigVec)
+          tprintf("Bad read of inttemp!\n");
+      }
+      Class->ProtoSets[j] = ProtoSet;
+    }
+    if (version_id < 4) {
+      Class->font_set_id = -1;
+    } else {
+      fp->FReadEndian(&Class->font_set_id, sizeof(Class->font_set_id), 1);
+    }
+  }
+
+  if (version_id < 2) {
+    /* add an empty nullptr class with class id 0 */
+    assert(UnusedClassIdIn (Templates, 0));
+    ClassForClassId (Templates, 0) = NewIntClass (1, 1);
+    ClassForClassId (Templates, 0)->font_set_id = -1;
+    Templates->NumClasses++;
+    /* make sure the classes are contiguous */
+    for (i = 0; i < MAX_NUM_CLASSES; i++) {
+      if (i < Templates->NumClasses) {
+        if (ClassForClassId (Templates, i) == nullptr) {
+          fprintf(stderr, "Non-contiguous class ids in inttemp\n");
+          exit(1);
+        }
+      } else {
+        if (ClassForClassId (Templates, i) != nullptr) {
+          fprintf(stderr, "Class id %d exceeds NumClassesIn (Templates) %d\n",
+                  i, Templates->NumClasses);
+          exit(1);
+        }
+      }
+    }
+  }
+  if (version_id >= 4) {
+    using namespace std::placeholders; // for _1, _2
+    this->fontinfo_table_.read(fp, std::bind(read_info, _1, _2));
+    if (version_id >= 5) {
+      this->fontinfo_table_.read(fp,
+                                 std::bind(read_spacing_info, _1, _2));
+    }
+    this->fontset_table_.read(fp, std::bind(read_set, _1, _2));
+  }
+
+  // Clean up.
+  delete[] IndexFor;
+  delete[] ClassIdFor;
+  delete[] TempClassPruner;
+
+  return (Templates);
+}                                /* ReadIntTemplates */
+
+
+#ifndef GRAPHICS_DISABLED
+/**
+ * This routine sends the shapes in the global display
+ * lists to the match debugger window.
+ *
+ * Globals:
+ * - FeatureShapes display list containing feature matches
+ * - ProtoShapes display list containing proto matches
+ */
+void Classify::ShowMatchDisplay() {
+  InitIntMatchWindowIfReqd();
+  if (ProtoDisplayWindow) {
+    ProtoDisplayWindow->Clear();
+  }
+  if (FeatureDisplayWindow) {
+    FeatureDisplayWindow->Clear();
+  }
+  ClearFeatureSpaceWindow(
+      static_cast<NORM_METHOD>(static_cast<int>(classify_norm_method)),
+      IntMatchWindow);
+  IntMatchWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y,
+                                  INT_MAX_X, INT_MAX_Y);
+  if (ProtoDisplayWindow) {
+    ProtoDisplayWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y,
+                                        INT_MAX_X, INT_MAX_Y);
+  }
+  if (FeatureDisplayWindow) {
+    FeatureDisplayWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y,
+                                          INT_MAX_X, INT_MAX_Y);
+  }
+}                                /* ShowMatchDisplay */
+
+/// Clears the given window and draws the featurespace guides for the
+/// appropriate normalization method.
+void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView* window) {
+  window->Clear();
+
+  window->Pen(ScrollView::GREY);
+  // Draw the feature space limit rectangle.
+  window->Rectangle(0, 0, INT_MAX_X, INT_MAX_Y);
+  if (norm_method == baseline) {
+    window->SetCursor(0, INT_DESCENDER);
+    window->DrawTo(INT_MAX_X, INT_DESCENDER);
+    window->SetCursor(0, INT_BASELINE);
+    window->DrawTo(INT_MAX_X, INT_BASELINE);
+    window->SetCursor(0, INT_XHEIGHT);
+    window->DrawTo(INT_MAX_X, INT_XHEIGHT);
+    window->SetCursor(0, INT_CAPHEIGHT);
+    window->DrawTo(INT_MAX_X, INT_CAPHEIGHT);
+  } else {
+    window->Rectangle(INT_XCENTER - INT_XRADIUS, INT_YCENTER - INT_YRADIUS,
+                      INT_XCENTER + INT_XRADIUS, INT_YCENTER + INT_YRADIUS);
+  }
+}
+#endif
+
+/**
+ * This routine writes Templates to File.  The format
+ * is an efficient binary format.  File must already be open
+ * for writing.
+ * @param File open file to write templates to
+ * @param Templates templates to save into File
+ * @param target_unicharset the UNICHARSET to use
+ */
+void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES Templates,
+                                 const UNICHARSET& target_unicharset) {
+  int i, j;
+  INT_CLASS Class;
+  int unicharset_size = target_unicharset.size();
+  int version_id = -5;  // When negated by the reader -1 becomes +1 etc.
+
+  if (Templates->NumClasses != unicharset_size) {
+    tprintf("Warning: executing WriteIntTemplates() with %d classes in"
+            " Templates, while target_unicharset size is %d\n",
+            Templates->NumClasses, unicharset_size);
+  }
+
+  /* first write the high level template struct */
+  fwrite(&unicharset_size, sizeof(unicharset_size), 1, File);
+  fwrite(&version_id, sizeof(version_id), 1, File);
+  fwrite(&Templates->NumClassPruners, sizeof(Templates->NumClassPruners),
+         1, File);
+  fwrite(&Templates->NumClasses, sizeof(Templates->NumClasses), 1, File);
+
+  /* then write out the class pruners */
+  for (i = 0; i < Templates->NumClassPruners; i++)
+    fwrite(Templates->ClassPruners[i],
+           sizeof(CLASS_PRUNER_STRUCT), 1, File);
+
+  /* then write out each class */
+  for (i = 0; i < Templates->NumClasses; i++) {
+    Class = Templates->Class[i];
+
+    /* first write out the high level struct for the class */
+    fwrite(&Class->NumProtos, sizeof(Class->NumProtos), 1, File);
+    fwrite(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1, File);
+    ASSERT_HOST(Class->NumConfigs == this->fontset_table_.get(Class->font_set_id).size);
+    fwrite(&Class->NumConfigs, sizeof(Class->NumConfigs), 1, File);
+    for (j = 0; j < Class->NumConfigs; ++j) {
+      fwrite(&Class->ConfigLengths[j], sizeof(uint16_t), 1, File);
+    }
+
+    /* then write out the proto lengths */
+    if (MaxNumIntProtosIn (Class) > 0) {
+      fwrite(Class->ProtoLengths, sizeof(uint8_t),
+             MaxNumIntProtosIn(Class), File);
+    }
+
+    /* then write out the proto sets */
+    for (j = 0; j < Class->NumProtoSets; j++)
+      fwrite(Class->ProtoSets[j], sizeof(PROTO_SET_STRUCT), 1, File);
+
+    /* then write the fonts info */
+    fwrite(&Class->font_set_id, sizeof(int), 1, File);
+  }
+
+  /* Write the fonts info tables */
+  using namespace std::placeholders; // for _1, _2
+  this->fontinfo_table_.write(File, std::bind(write_info, _1, _2));
+  this->fontinfo_table_.write(File,
+                              std::bind(write_spacing_info, _1, _2));
+  this->fontset_table_.write(File, std::bind(write_set, _1, _2));
+}                                /* WriteIntTemplates */
+
+/*-----------------------------------------------------------------------------
+              Private Code
+-----------------------------------------------------------------------------*/
+/**
+ * This routine returns the parameter value which
+ * corresponds to the beginning of the specified bucket.
+ * The bucket number should have been generated using the
+ * BucketFor() function with parameters Offset and NumBuckets.
+ * @param Bucket    bucket whose start is to be computed
+ * @param Offset    offset used to map params to buckets
+ * @param NumBuckets  total number of buckets
+ * @return Param value corresponding to start position of Bucket.
+ * @note Globals: none
+ */
+float BucketStart(int Bucket, float Offset, int NumBuckets) {
+  return ((static_cast<float>(Bucket) / NumBuckets) - Offset);
+
+}                                /* BucketStart */
+
+/**
+ * This routine returns the parameter value which
+ * corresponds to the end of the specified bucket.
+ * The bucket number should have been generated using the
+ * BucketFor() function with parameters Offset and NumBuckets.
+ * @param Bucket    bucket whose end is to be computed
+ * @param Offset    offset used to map params to buckets
+ * @param NumBuckets  total number of buckets
+ * @return Param value corresponding to end position of Bucket.
+ * @note Globals: none
+ */
+float BucketEnd(int Bucket, float Offset, int NumBuckets) {
+  return ((static_cast<float>(Bucket + 1) / NumBuckets) - Offset);
+}                                /* BucketEnd */
+
+/**
+ * This routine fills in the section of a class pruner
+ * corresponding to a single x value for a single proto of
+ * a class.
+ * @param FillSpec  specifies which bits to fill in pruner
+ * @param Pruner    class pruner to be filled
+ * @param ClassMask indicates which bits to change in each word
+ * @param ClassCount  indicates what to change bits to
+ * @param WordIndex indicates which word to change
+ */
+void DoFill(FILL_SPEC *FillSpec,
+            CLASS_PRUNER_STRUCT* Pruner,
+            uint32_t ClassMask,
+            uint32_t ClassCount,
+            uint32_t WordIndex) {
+  int X, Y, Angle;
+  uint32_t OldWord;
+
+  X = FillSpec->X;
+  if (X < 0)
+    X = 0;
+  if (X >= NUM_CP_BUCKETS)
+    X = NUM_CP_BUCKETS - 1;
+
+  if (FillSpec->YStart < 0)
+    FillSpec->YStart = 0;
+  if (FillSpec->YEnd >= NUM_CP_BUCKETS)
+    FillSpec->YEnd = NUM_CP_BUCKETS - 1;
+
+  for (Y = FillSpec->YStart; Y <= FillSpec->YEnd; Y++)
+    for (Angle = FillSpec->AngleStart; ;
+         CircularIncrement(Angle, NUM_CP_BUCKETS)) {
+      OldWord = Pruner->p[X][Y][Angle][WordIndex];
+      if (ClassCount > (OldWord & ClassMask)) {
+        OldWord &= ~ClassMask;
+        OldWord |= ClassCount;
+        Pruner->p[X][Y][Angle][WordIndex] = OldWord;
+      }
+      if (Angle == FillSpec->AngleEnd)
+        break;
+    }
+}                                /* DoFill */
+
+/**
+ * Return true if the specified table filler is done, i.e.
+ * if it has no more lines to fill.
+ * @param Filler    table filler to check if done
+ * @return true if no more lines to fill, false otherwise.
+ * @note Globals: none
+ */
+bool FillerDone(TABLE_FILLER* Filler) {
+  FILL_SWITCH *Next;
+
+  Next = &(Filler->Switch[Filler->NextSwitch]);
+
+  return Filler->X > Next->X && Next->Type == LastSwitch;
+
+}                                /* FillerDone */
+
+/**
+ * This routine sets Bit in each bit vector whose
+ * bucket lies within the range Center +- Spread.  The fill
+ * is done for a circular dimension, i.e. bucket 0 is adjacent
+ * to the last bucket.  It is assumed that Center and Spread
+ * are expressed in a circular coordinate system whose range
+ * is 0 to 1.
+ * @param ParamTable  table of bit vectors, one per param bucket
+ * @param Bit bit position in vectors to be filled
+ * @param Center center of filled area
+ * @param Spread spread of filled area
+ * @param debug debug flag
+ */
+void FillPPCircularBits(uint32_t ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR],
+                        int Bit, float Center, float Spread, bool debug) {
+  int i, FirstBucket, LastBucket;
+
+  if (Spread > 0.5)
+    Spread = 0.5;
+
+  FirstBucket = static_cast<int>(std::floor((Center - Spread) * NUM_PP_BUCKETS));
+  if (FirstBucket < 0)
+    FirstBucket += NUM_PP_BUCKETS;
+
+  LastBucket = static_cast<int>(std::floor((Center + Spread) * NUM_PP_BUCKETS));
+  if (LastBucket >= NUM_PP_BUCKETS)
+    LastBucket -= NUM_PP_BUCKETS;
+  if (debug) tprintf("Circular fill from %d to %d", FirstBucket, LastBucket);
+  for (i = FirstBucket; true; CircularIncrement (i, NUM_PP_BUCKETS)) {
+    SET_BIT (ParamTable[i], Bit);
+
+    /* exit loop after we have set the bit for the last bucket */
+    if (i == LastBucket)
+      break;
+  }
+
+}                                /* FillPPCircularBits */
+
+/**
+ * This routine sets Bit in each bit vector whose
+ * bucket lies within the range Center +- Spread.  The fill
+ * is done for a linear dimension, i.e. there is no wrap-around
+ * for this dimension.  It is assumed that Center and Spread
+ * are expressed in a linear coordinate system whose range
+ * is approximately 0 to 1.  Values outside this range will
+ * be clipped.
+ * @param ParamTable table of bit vectors, one per param bucket
+ * @param Bit bit number being filled
+ * @param Center center of filled area
+ * @param Spread spread of filled area
+ * @param debug debug flag
+ */
+void FillPPLinearBits(uint32_t ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR],
+                      int Bit, float Center, float Spread, bool debug) {
+  int i, FirstBucket, LastBucket;
+
+  FirstBucket = static_cast<int>(std::floor((Center - Spread) * NUM_PP_BUCKETS));
+  if (FirstBucket < 0)
+    FirstBucket = 0;
+
+  LastBucket = static_cast<int>(std::floor((Center + Spread) * NUM_PP_BUCKETS));
+  if (LastBucket >= NUM_PP_BUCKETS)
+    LastBucket = NUM_PP_BUCKETS - 1;
+
+  if (debug) tprintf("Linear fill from %d to %d", FirstBucket, LastBucket);
+  for (i = FirstBucket; i <= LastBucket; i++)
+    SET_BIT (ParamTable[i], Bit);
+
+}                                /* FillPPLinearBits */
+
+
+/*---------------------------------------------------------------------------*/
+#ifndef GRAPHICS_DISABLED
+/**
+ * This routine prompts the user with Prompt and waits
+ * for the user to enter something in the debug window.
+ * @param Prompt prompt to print while waiting for input from window
+ * @param adaptive_on
+ * @param pretrained_on
+ * @param shape_id
+ * @return Character entered in the debug window.
+ * @note Globals: none
+ */
+CLASS_ID Classify::GetClassToDebug(const char *Prompt, bool* adaptive_on,
+                                   bool* pretrained_on, int* shape_id) {
+  tprintf("%s\n", Prompt);
+  SVEvent* ev;
+  SVEventType ev_type;
+  int unichar_id = INVALID_UNICHAR_ID;
+  // Wait until a click or popup event.
+  do {
+    ev = IntMatchWindow->AwaitEvent(SVET_ANY);
+    ev_type = ev->type;
+    if (ev_type == SVET_POPUP) {
+      if (ev->command_id == IDA_SHAPE_INDEX) {
+        if (shape_table_ != nullptr) {
+          *shape_id = atoi(ev->parameter);
+          *adaptive_on = false;
+          *pretrained_on = true;
+          if (*shape_id >= 0 && *shape_id < shape_table_->NumShapes()) {
+            int font_id;
+            shape_table_->GetFirstUnicharAndFont(*shape_id, &unichar_id,
+                                                 &font_id);
+            tprintf("Shape %d, first unichar=%d, font=%d\n",
+                    *shape_id, unichar_id, font_id);
+            return unichar_id;
+          }
+          tprintf("Shape index '%s' not found in shape table\n", ev->parameter);
+        } else {
+          tprintf("No shape table loaded!\n");
+        }
+      } else {
+        if (unicharset.contains_unichar(ev->parameter)) {
+          unichar_id = unicharset.unichar_to_id(ev->parameter);
+          if (ev->command_id == IDA_ADAPTIVE) {
+            *adaptive_on = true;
+            *pretrained_on = false;
+            *shape_id = -1;
+          } else if (ev->command_id == IDA_STATIC) {
+            *adaptive_on = false;
+            *pretrained_on = true;
+          } else {
+            *adaptive_on = true;
+            *pretrained_on = true;
+          }
+          if (ev->command_id == IDA_ADAPTIVE || shape_table_ == nullptr) {
+            *shape_id = -1;
+            return unichar_id;
+          }
+          for (int s = 0; s < shape_table_->NumShapes(); ++s) {
+            if (shape_table_->GetShape(s).ContainsUnichar(unichar_id)) {
+              tprintf("%s\n", shape_table_->DebugStr(s).c_str());
+            }
+          }
+        } else {
+          tprintf("Char class '%s' not found in unicharset",
+                  ev->parameter);
+        }
+      }
+    }
+    delete ev;
+  } while (ev_type != SVET_CLICK);
+  return 0;
+}                                /* GetClassToDebug */
+
+#endif
+
+/**
+ * This routine copies the appropriate global pad variables
+ * into EndPad, SidePad, and AnglePad.  This is a kludge used
+ * to get around the fact that global control variables cannot
+ * be arrays.  If the specified level is illegal, the tightest
+ * possible pads are returned.
+ * @param Level   "tightness" level to return pads for
+ * @param EndPad    place to put end pad for Level
+ * @param SidePad   place to put side pad for Level
+ * @param AnglePad  place to put angle pad for Level
+ */
+void GetCPPadsForLevel(int Level,
+                       float *EndPad,
+                       float *SidePad,
+                       float *AnglePad) {
+  switch (Level) {
+    case 0:
+      *EndPad = classify_cp_end_pad_loose * GetPicoFeatureLength ();
+      *SidePad = classify_cp_side_pad_loose * GetPicoFeatureLength ();
+      *AnglePad = classify_cp_angle_pad_loose / 360.0;
+      break;
+
+    case 1:
+      *EndPad = classify_cp_end_pad_medium * GetPicoFeatureLength ();
+      *SidePad = classify_cp_side_pad_medium * GetPicoFeatureLength ();
+      *AnglePad = classify_cp_angle_pad_medium / 360.0;
+      break;
+
+    case 2:
+      *EndPad = classify_cp_end_pad_tight * GetPicoFeatureLength ();
+      *SidePad = classify_cp_side_pad_tight * GetPicoFeatureLength ();
+      *AnglePad = classify_cp_angle_pad_tight / 360.0;
+      break;
+
+    default:
+      *EndPad = classify_cp_end_pad_tight * GetPicoFeatureLength ();
+      *SidePad = classify_cp_side_pad_tight * GetPicoFeatureLength ();
+      *AnglePad = classify_cp_angle_pad_tight / 360.0;
+      break;
+  }
+  if (*AnglePad > 0.5)
+    *AnglePad = 0.5;
+
+}                                /* GetCPPadsForLevel */
+
+/**
+ * @param Evidence  evidence value to return color for
+ * @return Color which corresponds to specified Evidence value.
+ * @note Globals: none
+ */
+ScrollView::Color GetMatchColorFor(float Evidence) {
+  assert (Evidence >= 0.0);
+  assert (Evidence <= 1.0);
+
+  if (Evidence >= 0.90)
+    return ScrollView::WHITE;
+  else if (Evidence >= 0.75)
+    return ScrollView::GREEN;
+  else if (Evidence >= 0.50)
+    return ScrollView::RED;
+  else
+    return ScrollView::BLUE;
+}                                /* GetMatchColorFor */
+
+/**
+ * This routine returns (in Fill) the specification of
+ * the next line to be filled from Filler.  FillerDone() should
+ * always be called before GetNextFill() to ensure that we
+ * do not run past the end of the fill table.
+ * @param Filler    filler to get next fill spec from
+ * @param Fill    place to put spec for next fill
+ */
+void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill) {
+  FILL_SWITCH *Next;
+
+  /* compute the fill assuming no switches will be encountered */
+  Fill->AngleStart = Filler->AngleStart;
+  Fill->AngleEnd = Filler->AngleEnd;
+  Fill->X = Filler->X;
+  Fill->YStart = Filler->YStart >> 8;
+  Fill->YEnd = Filler->YEnd >> 8;
+
+  /* update the fill info and the filler for ALL switches at this X value */
+  Next = &(Filler->Switch[Filler->NextSwitch]);
+  while (Filler->X >= Next->X) {
+    Fill->X = Filler->X = Next->X;
+    if (Next->Type == StartSwitch) {
+      Fill->YStart = Next->Y;
+      Filler->StartDelta = Next->Delta;
+      Filler->YStart = Next->YInit;
+    }
+    else if (Next->Type == EndSwitch) {
+      Fill->YEnd = Next->Y;
+      Filler->EndDelta = Next->Delta;
+      Filler->YEnd = Next->YInit;
+    }
+    else {                       /* Type must be LastSwitch */
+      break;
+    }
+    Filler->NextSwitch++;
+    Next = &(Filler->Switch[Filler->NextSwitch]);
+  }
+
+  /* prepare the filler for the next call to this routine */
+  Filler->X++;
+  Filler->YStart += Filler->StartDelta;
+  Filler->YEnd += Filler->EndDelta;
+
+}                                /* GetNextFill */
+
+/**
+ * This routine computes a data structure (Filler)
+ * which can be used to fill in a rectangle surrounding
+ * the specified Proto. Results are returned in Filler.
+ *
+ * @param EndPad, SidePad, AnglePad padding to add to proto
+ * @param Proto       proto to create a filler for
+ * @param Filler        place to put table filler
+ */
+void InitTableFiller (float EndPad, float SidePad,
+                      float AnglePad, PROTO Proto, TABLE_FILLER * Filler)
+#define XS          X_SHIFT
+#define YS          Y_SHIFT
+#define AS          ANGLE_SHIFT
+#define NB          NUM_CP_BUCKETS
+{
+  float Angle;
+  float X, Y, HalfLength;
+  float Cos, Sin;
+  float XAdjust, YAdjust;
+  FPOINT Start, Switch1, Switch2, End;
+  int S1 = 0;
+  int S2 = 1;
+
+  Angle = Proto->Angle;
+  X = Proto->X;
+  Y = Proto->Y;
+  HalfLength = Proto->Length / 2.0;
+
+  Filler->AngleStart = CircBucketFor(Angle - AnglePad, AS, NB);
+  Filler->AngleEnd = CircBucketFor(Angle + AnglePad, AS, NB);
+  Filler->NextSwitch = 0;
+
+  if (fabs (Angle - 0.0) < HV_TOLERANCE || fabs (Angle - 0.5) < HV_TOLERANCE) {
+    /* horizontal proto - handle as special case */
+    Filler->X = Bucket8For(X - HalfLength - EndPad, XS, NB);
+    Filler->YStart = Bucket16For(Y - SidePad, YS, NB * 256);
+    Filler->YEnd = Bucket16For(Y + SidePad, YS, NB * 256);
+    Filler->StartDelta = 0;
+    Filler->EndDelta = 0;
+    Filler->Switch[0].Type = LastSwitch;
+    Filler->Switch[0].X = Bucket8For(X + HalfLength + EndPad, XS, NB);
+  } else if (fabs(Angle - 0.25) < HV_TOLERANCE ||
+           fabs(Angle - 0.75) < HV_TOLERANCE) {
+    /* vertical proto - handle as special case */
+    Filler->X = Bucket8For(X - SidePad, XS, NB);
+    Filler->YStart = Bucket16For(Y - HalfLength - EndPad, YS, NB * 256);
+    Filler->YEnd = Bucket16For(Y + HalfLength + EndPad, YS, NB * 256);
+    Filler->StartDelta = 0;
+    Filler->EndDelta = 0;
+    Filler->Switch[0].Type = LastSwitch;
+    Filler->Switch[0].X = Bucket8For(X + SidePad, XS, NB);
+  } else {
+    /* diagonal proto */
+
+    if ((Angle > 0.0 && Angle < 0.25) || (Angle > 0.5 && Angle < 0.75)) {
+      /* rising diagonal proto */
+      Angle *= 2.0 * M_PI;
+      Cos = fabs(cos(Angle));
+      Sin = fabs(sin(Angle));
+
+      /* compute the positions of the corners of the acceptance region */
+      Start.x = X - (HalfLength + EndPad) * Cos - SidePad * Sin;
+      Start.y = Y - (HalfLength + EndPad) * Sin + SidePad * Cos;
+      End.x = 2.0 * X - Start.x;
+      End.y = 2.0 * Y - Start.y;
+      Switch1.x = X - (HalfLength + EndPad) * Cos + SidePad * Sin;
+      Switch1.y = Y - (HalfLength + EndPad) * Sin - SidePad * Cos;
+      Switch2.x = 2.0 * X - Switch1.x;
+      Switch2.y = 2.0 * Y - Switch1.y;
+
+      if (Switch1.x > Switch2.x) {
+        S1 = 1;
+        S2 = 0;
+      }
+
+      /* translate into bucket positions and deltas */
+      Filler->X = Bucket8For(Start.x, XS, NB);
+      Filler->StartDelta = -static_cast<int16_t>((Cos / Sin) * 256);
+      Filler->EndDelta = static_cast<int16_t>((Sin / Cos) * 256);
+
+      XAdjust = BucketEnd(Filler->X, XS, NB) - Start.x;
+      YAdjust = XAdjust * Cos / Sin;
+      Filler->YStart = Bucket16For(Start.y - YAdjust, YS, NB * 256);
+      YAdjust = XAdjust * Sin / Cos;
+      Filler->YEnd = Bucket16For(Start.y + YAdjust, YS, NB * 256);
+
+      Filler->Switch[S1].Type = StartSwitch;
+      Filler->Switch[S1].X = Bucket8For(Switch1.x, XS, NB);
+      Filler->Switch[S1].Y = Bucket8For(Switch1.y, YS, NB);
+      XAdjust = Switch1.x - BucketStart(Filler->Switch[S1].X, XS, NB);
+      YAdjust = XAdjust * Sin / Cos;
+      Filler->Switch[S1].YInit = Bucket16For(Switch1.y - YAdjust, YS, NB * 256);
+      Filler->Switch[S1].Delta = Filler->EndDelta;
+
+      Filler->Switch[S2].Type = EndSwitch;
+      Filler->Switch[S2].X = Bucket8For(Switch2.x, XS, NB);
+      Filler->Switch[S2].Y = Bucket8For(Switch2.y, YS, NB);
+      XAdjust = Switch2.x - BucketStart(Filler->Switch[S2].X, XS, NB);
+      YAdjust = XAdjust * Cos / Sin;
+      Filler->Switch[S2].YInit = Bucket16For(Switch2.y + YAdjust, YS, NB * 256);
+      Filler->Switch[S2].Delta = Filler->StartDelta;
+
+      Filler->Switch[2].Type = LastSwitch;
+      Filler->Switch[2].X = Bucket8For(End.x, XS, NB);
+    } else {
+      /* falling diagonal proto */
+      Angle *= 2.0 * M_PI;
+      Cos = fabs(cos(Angle));
+      Sin = fabs(sin(Angle));
+
+      /* compute the positions of the corners of the acceptance region */
+      Start.x = X - (HalfLength + EndPad) * Cos - SidePad * Sin;
+      Start.y = Y + (HalfLength + EndPad) * Sin - SidePad * Cos;
+      End.x = 2.0 * X - Start.x;
+      End.y = 2.0 * Y - Start.y;
+      Switch1.x = X - (HalfLength + EndPad) * Cos + SidePad * Sin;
+      Switch1.y = Y + (HalfLength + EndPad) * Sin + SidePad * Cos;
+      Switch2.x = 2.0 * X - Switch1.x;
+      Switch2.y = 2.0 * Y - Switch1.y;
+
+      if (Switch1.x > Switch2.x) {
+        S1 = 1;
+        S2 = 0;
+      }
+
+      /* translate into bucket positions and deltas */
+      Filler->X = Bucket8For(Start.x, XS, NB);
+      Filler->StartDelta = static_cast<int16_t>(ClipToRange<int>(
+          -IntCastRounded((Sin / Cos) * 256), INT16_MIN, INT16_MAX));
+      Filler->EndDelta = static_cast<int16_t>(ClipToRange<int>(
+          IntCastRounded((Cos / Sin) * 256), INT16_MIN, INT16_MAX));
+
+      XAdjust = BucketEnd(Filler->X, XS, NB) - Start.x;
+      YAdjust = XAdjust * Sin / Cos;
+      Filler->YStart = Bucket16For(Start.y - YAdjust, YS, NB * 256);
+      YAdjust = XAdjust * Cos / Sin;
+      Filler->YEnd = Bucket16For(Start.y + YAdjust, YS, NB * 256);
+
+      Filler->Switch[S1].Type = EndSwitch;
+      Filler->Switch[S1].X = Bucket8For(Switch1.x, XS, NB);
+      Filler->Switch[S1].Y = Bucket8For(Switch1.y, YS, NB);
+      XAdjust = Switch1.x - BucketStart(Filler->Switch[S1].X, XS, NB);
+      YAdjust = XAdjust * Sin / Cos;
+      Filler->Switch[S1].YInit = Bucket16For(Switch1.y + YAdjust, YS, NB * 256);
+      Filler->Switch[S1].Delta = Filler->StartDelta;
+
+      Filler->Switch[S2].Type = StartSwitch;
+      Filler->Switch[S2].X = Bucket8For(Switch2.x, XS, NB);
+      Filler->Switch[S2].Y = Bucket8For(Switch2.y, YS, NB);
+      XAdjust = Switch2.x - BucketStart(Filler->Switch[S2].X, XS, NB);
+      YAdjust = XAdjust * Cos / Sin;
+      Filler->Switch[S2].YInit = Bucket16For(Switch2.y - YAdjust, YS, NB * 256);
+      Filler->Switch[S2].Delta = Filler->EndDelta;
+
+      Filler->Switch[2].Type = LastSwitch;
+      Filler->Switch[2].X = Bucket8For(End.x, XS, NB);
+    }
+  }
+}                                /* InitTableFiller */
+
+
+/*---------------------------------------------------------------------------*/
+#ifndef GRAPHICS_DISABLED
+/**
+ * This routine renders the specified feature into ShapeList.
+ * @param window to add feature rendering to
+ * @param Feature feature to be rendered
+ * @param color color to use for feature rendering
+ * @return New shape list with rendering of Feature added.
+ * @note Globals: none
+ */
+void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature,
+                      ScrollView::Color color) {
+  float X, Y, Dx, Dy, Length;
+
+  window->Pen(color);
+  assert(Feature != nullptr);
+  assert(color != 0);
+
+  X = Feature->X;
+  Y = Feature->Y;
+  Length = GetPicoFeatureLength() * 0.7 * INT_CHAR_NORM_RANGE;
+  // The -PI has no significant effect here, but the value of Theta is computed
+  // using BinaryAnglePlusPi in intfx.cpp.
+  Dx = (Length / 2.0) * cos((Feature->Theta / 256.0) * 2.0 * M_PI - M_PI);
+  Dy = (Length / 2.0) * sin((Feature->Theta / 256.0) * 2.0 * M_PI - M_PI);
+
+  window->SetCursor(X, Y);
+  window->DrawTo(X + Dx, Y + Dy);
+}                                /* RenderIntFeature */
+
+/**
+ * This routine extracts the parameters of the specified
+ * proto from the class description and adds a rendering of
+ * the proto onto the ShapeList.
+ *
+ * @param window ScrollView instance
+ * @param Class class that proto is contained in
+ * @param ProtoId id of proto to be rendered
+ * @param color color to render proto in
+ *
+ * Globals: none
+ *
+ * @return New shape list with a rendering of one proto added.
+ */
+void RenderIntProto(ScrollView *window,
+                    INT_CLASS Class,
+                    PROTO_ID ProtoId,
+                    ScrollView::Color color) {
+  PROTO_SET ProtoSet;
+  INT_PROTO Proto;
+  int ProtoSetIndex;
+  int ProtoWordIndex;
+  float Length;
+  int Xmin, Xmax, Ymin, Ymax;
+  float X, Y, Dx, Dy;
+  uint32_t ProtoMask;
+  int Bucket;
+
+  assert(ProtoId >= 0);
+  assert(Class != nullptr);
+  assert(ProtoId < Class->NumProtos);
+  assert(color != 0);
+  window->Pen(color);
+
+  ProtoSet = Class->ProtoSets[SetForProto(ProtoId)];
+  ProtoSetIndex = IndexForProto(ProtoId);
+  Proto = &(ProtoSet->Protos[ProtoSetIndex]);
+  Length = (Class->ProtoLengths[ProtoId] *
+    GetPicoFeatureLength() * INT_CHAR_NORM_RANGE);
+  ProtoMask = PPrunerMaskFor(ProtoId);
+  ProtoWordIndex = PPrunerWordIndexFor(ProtoId);
+
+  // find the x and y extent of the proto from the proto pruning table
+  Xmin = Ymin = NUM_PP_BUCKETS;
+  Xmax = Ymax = 0;
+  for (Bucket = 0; Bucket < NUM_PP_BUCKETS; Bucket++) {
+    if (ProtoMask & ProtoSet->ProtoPruner[PRUNER_X][Bucket][ProtoWordIndex]) {
+      UpdateRange(Bucket, &Xmin, &Xmax);
+    }
+
+    if (ProtoMask & ProtoSet->ProtoPruner[PRUNER_Y][Bucket][ProtoWordIndex]) {
+      UpdateRange(Bucket, &Ymin, &Ymax);
+    }
+  }
+  X = (Xmin + Xmax + 1) / 2.0 * PROTO_PRUNER_SCALE;
+  Y = (Ymin + Ymax + 1) / 2.0 * PROTO_PRUNER_SCALE;
+  // The -PI has no significant effect here, but the value of Theta is computed
+  // using BinaryAnglePlusPi in intfx.cpp.
+  Dx = (Length / 2.0) * cos((Proto->Angle / 256.0) * 2.0 * M_PI - M_PI);
+  Dy = (Length / 2.0) * sin((Proto->Angle / 256.0) * 2.0 * M_PI - M_PI);
+
+  window->SetCursor(X - Dx, Y - Dy);
+  window->DrawTo(X + Dx, Y + Dy);
+}                                /* RenderIntProto */
+#endif
+
+#ifndef GRAPHICS_DISABLED
+/**
+ * Initializes the int matcher window if it is not already
+ * initialized.
+ */
+void InitIntMatchWindowIfReqd() {
+  if (IntMatchWindow == nullptr) {
+    IntMatchWindow = CreateFeatureSpaceWindow("IntMatchWindow", 50, 200);
+    auto* popup_menu = new SVMenuNode();
+
+    popup_menu->AddChild("Debug Adapted classes", IDA_ADAPTIVE,
+                         "x", "Class to debug");
+    popup_menu->AddChild("Debug Static classes", IDA_STATIC,
+                         "x", "Class to debug");
+    popup_menu->AddChild("Debug Both", IDA_BOTH,
+                         "x", "Class to debug");
+    popup_menu->AddChild("Debug Shape Index", IDA_SHAPE_INDEX,
+                         "0", "Index to debug");
+    popup_menu->BuildMenu(IntMatchWindow, false);
+  }
+}
+
+/**
+ * Initializes the proto display window if it is not already
+ * initialized.
+ */
+void InitProtoDisplayWindowIfReqd() {
+  if (ProtoDisplayWindow == nullptr) {
+    ProtoDisplayWindow = CreateFeatureSpaceWindow("ProtoDisplayWindow",
+                                                  550, 200);
+ }
+}
+
+/**
+ * Initializes the feature display window if it is not already
+ * initialized.
+ */
+void InitFeatureDisplayWindowIfReqd() {
+  if (FeatureDisplayWindow == nullptr) {
+    FeatureDisplayWindow = CreateFeatureSpaceWindow("FeatureDisplayWindow",
+                                                    50, 700);
+  }
+}
+
+/// Creates a window of the appropriate size for displaying elements
+/// in feature space.
+ScrollView* CreateFeatureSpaceWindow(const char* name, int xpos, int ypos) {
+  return new ScrollView(name, xpos, ypos, 520, 520, 260, 260, true);
+}
+#endif // !GRAPHICS_DISABLED
+
+} // namespace tesseract
diff --git a/tesseract/src/classify/intproto.h b/tesseract/src/classify/intproto.h
new file mode 100644
index 00000000..77bf2376
--- /dev/null
+++ b/tesseract/src/classify/intproto.h
@@ -0,0 +1,265 @@
+/******************************************************************************
+ ** Filename:    intproto.h
+ ** Purpose:     Definition of data structures for integer protos.
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *****************************************************************************/
+
+#ifndef INTPROTO_H
+#define INTPROTO_H
+
+/**----------------------------------------------------------------------------
+          Include Files and Type Defines
+----------------------------------------------------------------------------**/
+#include "matchdefs.h"
+#include "mfoutline.h"
+#include "protos.h"
+#include "scrollview.h"
+#include "unicharset.h"
+
+namespace tesseract {
+
+class FCOORD;
+
+/* define order of params in pruners */
+#define PRUNER_X 0
+#define PRUNER_Y 1
+#define PRUNER_ANGLE 2
+
+/* definition of coordinate system offsets for each table parameter */
+#define ANGLE_SHIFT (0.0)
+#define X_SHIFT (0.5)
+#define Y_SHIFT (0.5)
+
+#define MAX_PROTO_INDEX 24
+#define BITS_PER_WERD static_cast<int>(8 * sizeof(uint32_t))
+/* Script detection: increase this number to 128 */
+#define MAX_NUM_CONFIGS 64
+#define MAX_NUM_PROTOS 512
+#define PROTOS_PER_PROTO_SET 64
+#define MAX_NUM_PROTO_SETS (MAX_NUM_PROTOS / PROTOS_PER_PROTO_SET)
+#define NUM_PP_PARAMS 3
+#define NUM_PP_BUCKETS 64
+#define NUM_CP_BUCKETS 24
+#define CLASSES_PER_CP 32
+#define NUM_BITS_PER_CLASS 2
+#define CLASS_PRUNER_CLASS_MASK (~(~0u << NUM_BITS_PER_CLASS))
+#define CLASSES_PER_CP_WERD (CLASSES_PER_CP / NUM_BITS_PER_CLASS)
+#define PROTOS_PER_PP_WERD BITS_PER_WERD
+#define BITS_PER_CP_VECTOR (CLASSES_PER_CP * NUM_BITS_PER_CLASS)
+#define MAX_NUM_CLASS_PRUNERS \
+  ((MAX_NUM_CLASSES + CLASSES_PER_CP - 1) / CLASSES_PER_CP)
+#define WERDS_PER_CP_VECTOR (BITS_PER_CP_VECTOR / BITS_PER_WERD)
+#define WERDS_PER_PP_VECTOR \
+  ((PROTOS_PER_PROTO_SET + BITS_PER_WERD - 1) / BITS_PER_WERD)
+#define WERDS_PER_PP (NUM_PP_PARAMS * NUM_PP_BUCKETS * WERDS_PER_PP_VECTOR)
+#define WERDS_PER_CP \
+  (NUM_CP_BUCKETS * NUM_CP_BUCKETS * NUM_CP_BUCKETS * WERDS_PER_CP_VECTOR)
+#define WERDS_PER_CONFIG_VEC \
+  ((MAX_NUM_CONFIGS + BITS_PER_WERD - 1) / BITS_PER_WERD)
+
+/* The first 3 dimensions of the CLASS_PRUNER_STRUCT are the
+ * 3 axes of the quantized feature space.
+ * The position of the the bits recorded for each class in the
+ * 4th dimension is determined by using CPrunerWordIndexFor(c),
+ * where c is the corresponding class id. */
+struct CLASS_PRUNER_STRUCT {
+  uint32_t p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS]
+            [WERDS_PER_CP_VECTOR];
+};
+
+typedef struct {
+  int8_t A;
+  uint8_t B;
+  int8_t C;
+  uint8_t Angle;
+  uint32_t Configs[WERDS_PER_CONFIG_VEC];
+}
+
+INT_PROTO_STRUCT,
+    *INT_PROTO;
+
+typedef uint32_t PROTO_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS]
+                             [WERDS_PER_PP_VECTOR];
+
+typedef struct {
+  PROTO_PRUNER ProtoPruner;
+  INT_PROTO_STRUCT Protos[PROTOS_PER_PROTO_SET];
+}
+
+PROTO_SET_STRUCT,
+    *PROTO_SET;
+
+typedef uint32_t CONFIG_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS][4];
+
+typedef struct {
+  uint16_t NumProtos;
+  uint8_t NumProtoSets;
+  uint8_t NumConfigs;
+  PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS];
+  uint8_t* ProtoLengths;
+  uint16_t ConfigLengths[MAX_NUM_CONFIGS];
+  int font_set_id;  // FontSet id, see above
+}
+
+INT_CLASS_STRUCT,
+    *INT_CLASS;
+
+typedef struct {
+  int NumClasses;
+  int NumClassPruners;
+  INT_CLASS Class[MAX_NUM_CLASSES];
+  CLASS_PRUNER_STRUCT* ClassPruners[MAX_NUM_CLASS_PRUNERS];
+}
+
+INT_TEMPLATES_STRUCT,
+    *INT_TEMPLATES;
+
+/* definitions of integer features*/
+#define MAX_NUM_INT_FEATURES 512
+#define INT_CHAR_NORM_RANGE 256
+
+struct INT_FEATURE_STRUCT {
+  INT_FEATURE_STRUCT() : X(0), Y(0), Theta(0), CP_misses(0) {}
+  // Builds a feature from an FCOORD for position with all the necessary
+  // clipping and rounding.
+  INT_FEATURE_STRUCT(const FCOORD& pos, uint8_t theta);
+  // Builds a feature from ints with all the necessary clipping and casting.
+  INT_FEATURE_STRUCT(int x, int y, int theta);
+
+  uint8_t X;
+  uint8_t Y;
+  uint8_t Theta;
+  int8_t CP_misses;
+
+  void print() const {
+    tprintf("(%d,%d):%d\n", X, Y, Theta);
+  }
+};
+
+using INT_FEATURE = INT_FEATURE_STRUCT*;
+
+typedef INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES];
+
+enum IntmatcherDebugAction {
+  IDA_ADAPTIVE,
+  IDA_STATIC,
+  IDA_SHAPE_INDEX,
+  IDA_BOTH
+};
+
+/**----------------------------------------------------------------------------
+            Macros
+----------------------------------------------------------------------------**/
+
+#define MaxNumIntProtosIn(C) (C->NumProtoSets * PROTOS_PER_PROTO_SET)
+#define SetForProto(P) (P / PROTOS_PER_PROTO_SET)
+#define IndexForProto(P) (P % PROTOS_PER_PROTO_SET)
+#define ProtoForProtoId(C, P) \
+  (&((C->ProtoSets[SetForProto(P)])->Protos[IndexForProto(P)]))
+#define PPrunerWordIndexFor(I) \
+  (((I) % PROTOS_PER_PROTO_SET) / PROTOS_PER_PP_WERD)
+#define PPrunerBitIndexFor(I) ((I) % PROTOS_PER_PP_WERD)
+#define PPrunerMaskFor(I) (1 << PPrunerBitIndexFor(I))
+
+#define MaxNumClassesIn(T) (T->NumClassPruners * CLASSES_PER_CP)
+#define LegalClassId(c) ((c) >= 0 && (c) < MAX_NUM_CLASSES)
+#define UnusedClassIdIn(T, c) ((T)->Class[c] == nullptr)
+#define ClassForClassId(T, c) ((T)->Class[c])
+#define ClassPrunersFor(T) ((T)->ClassPruner)
+#define CPrunerIdFor(c) ((c) / CLASSES_PER_CP)
+#define CPrunerFor(T, c) ((T)->ClassPruners[CPrunerIdFor(c)])
+#define CPrunerWordIndexFor(c) (((c) % CLASSES_PER_CP) / CLASSES_PER_CP_WERD)
+#define CPrunerBitIndexFor(c) (((c) % CLASSES_PER_CP) % CLASSES_PER_CP_WERD)
+#define CPrunerMaskFor(L, c) \
+  (((L) + 1) << CPrunerBitIndexFor(c) * NUM_BITS_PER_CLASS)
+
+/* DEBUG macros*/
+#define PRINT_MATCH_SUMMARY 0x001
+#define DISPLAY_FEATURE_MATCHES 0x002
+#define DISPLAY_PROTO_MATCHES 0x004
+#define PRINT_FEATURE_MATCHES 0x008
+#define PRINT_PROTO_MATCHES 0x010
+#define CLIP_MATCH_EVIDENCE 0x020
+
+#define MatchDebuggingOn(D) (D)
+#define PrintMatchSummaryOn(D) ((D)&PRINT_MATCH_SUMMARY)
+#define DisplayFeatureMatchesOn(D) ((D)&DISPLAY_FEATURE_MATCHES)
+#define DisplayProtoMatchesOn(D) ((D)&DISPLAY_PROTO_MATCHES)
+#define PrintFeatureMatchesOn(D) ((D)&PRINT_FEATURE_MATCHES)
+#define PrintProtoMatchesOn(D) ((D)&PRINT_PROTO_MATCHES)
+#define ClipMatchEvidenceOn(D) ((D)&CLIP_MATCH_EVIDENCE)
+
+/**----------------------------------------------------------------------------
+          Public Function Prototypes
+----------------------------------------------------------------------------**/
+void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class);
+
+int AddIntConfig(INT_CLASS Class);
+
+int AddIntProto(INT_CLASS Class);
+
+void AddProtoToClassPruner(PROTO Proto, CLASS_ID ClassId,
+                           INT_TEMPLATES Templates);
+
+void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class,
+                           bool debug);
+
+uint8_t Bucket8For(float param, float offset, int num_buckets);
+uint16_t Bucket16For(float param, float offset, int num_buckets);
+
+uint8_t CircBucketFor(float param, float offset, int num_buckets);
+
+void UpdateMatchDisplay();
+
+void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class);
+
+void DisplayIntFeature(const INT_FEATURE_STRUCT* Feature, float Evidence);
+
+void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, float Evidence);
+
+INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs);
+
+INT_TEMPLATES NewIntTemplates();
+
+TESS_API
+void free_int_templates(INT_TEMPLATES templates);
+
+void ShowMatchDisplay();
+
+// Clears the given window and draws the featurespace guides for the
+// appropriate normalization method.
+TESS_API
+void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView* window);
+
+/*----------------------------------------------------------------------------*/
+#ifndef GRAPHICS_DISABLED
+TESS_API
+void RenderIntFeature(ScrollView* window, const INT_FEATURE_STRUCT* Feature,
+                      ScrollView::Color color);
+
+void InitIntMatchWindowIfReqd();
+
+void InitProtoDisplayWindowIfReqd();
+
+void InitFeatureDisplayWindowIfReqd();
+
+// Creates a window of the appropriate size for displaying elements
+// in feature space.
+TESS_API
+ScrollView* CreateFeatureSpaceWindow(const char* name, int xpos, int ypos);
+#endif // !GRAPHICS_DISABLED
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/classify/kdtree.cpp b/tesseract/src/classify/kdtree.cpp
new file mode 100644
index 00000000..d8ff700d
--- /dev/null
+++ b/tesseract/src/classify/kdtree.cpp
@@ -0,0 +1,541 @@
+/******************************************************************************
+ **  Filename:  kdtree.cpp
+ **  Purpose:   Routines for managing K-D search trees
+ **  Author:    Dan Johnson
+ **
+ **  (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+/*-----------------------------------------------------------------------------
+          Include Files and Type Defines
+-----------------------------------------------------------------------------*/
+#include "kdtree.h"
+
+#include <algorithm>
+#include <cfloat>      // for FLT_MAX
+#include <cstdio>
+#include <cmath>
+
+namespace tesseract {
+
+#define Magnitude(X)    ((X) < 0 ? -(X) : (X))
+#define NodeFound(N,K,D)  (((N)->Key == (K)) && ((N)->Data == (D)))
+
+/*-----------------------------------------------------------------------------
+        Global Data Definitions and Declarations
+-----------------------------------------------------------------------------*/
+#define MINSEARCH -FLT_MAX
+#define MAXSEARCH FLT_MAX
+
+// Helper function to find the next essential dimension in a cycle.
+static int NextLevel(KDTREE *tree, int level) {
+  do {
+    ++level;
+    if (level >= tree->KeySize)
+      level = 0;
+  } while (tree->KeyDesc[level].NonEssential);
+  return level;
+}
+
+//-----------------------------------------------------------------------------
+/**  Store the k smallest-keyed key-value pairs. */
+template<typename Key, typename Value>
+class MinK {
+ public:
+  MinK(Key max_key, int k);
+  ~MinK();
+
+  struct Element {
+    Element() {}
+    Element(const Key& k, const Value& v) : key(k), value(v) {}
+
+    Key key;
+    Value value;
+  };
+
+  bool insert(Key k, Value v);
+  const Key& max_insertable_key();
+
+  int elements_count() { return elements_count_; }
+  const Element* elements() { return elements_; }
+
+ private:
+  const Key max_key_;   ///< the maximum possible Key
+  Element *elements_;   ///< unsorted array of elements
+  int elements_count_;  ///< the number of results collected so far
+  int k_;               ///< the number of results we want from the search
+  int max_index_;       ///< the index of the result with the largest key
+};
+
+template<typename Key, typename Value>
+MinK<Key, Value>::MinK(Key max_key, int k) :
+  max_key_(max_key), elements_count_(0), k_(k < 1 ? 1 : k), max_index_(0) {
+  elements_ = new Element[k_];
+}
+
+template<typename Key, typename Value>
+MinK<Key, Value>::~MinK() {
+  delete []elements_;
+}
+
+template<typename Key, typename Value>
+const Key& MinK<Key, Value>::max_insertable_key() {
+  if (elements_count_ < k_)
+    return max_key_;
+  return elements_[max_index_].key;
+}
+
+template<typename Key, typename Value>
+bool MinK<Key, Value>::insert(Key key, Value value) {
+  if (elements_count_ < k_) {
+    elements_[elements_count_++] = Element(key, value);
+    if (key > elements_[max_index_].key)
+      max_index_ = elements_count_ - 1;
+    return true;
+  } else if (key < elements_[max_index_].key) {
+    // evict the largest element.
+    elements_[max_index_] = Element(key, value);
+    // recompute max_index_
+    for (int i = 0; i < elements_count_; i++) {
+      if (elements_[i].key > elements_[max_index_].key)
+        max_index_ = i;
+    }
+    return true;
+  }
+  return false;
+}
+
+
+//-----------------------------------------------------------------------------
+/** Helper class for searching for the k closest points to query_point in tree.
+ */
+class KDTreeSearch {
+ public:
+  KDTreeSearch(KDTREE* tree, float *query_point, int k_closest);
+  ~KDTreeSearch();
+
+  /** Return the k nearest points' data. */
+  void Search(int *result_count, float *distances, void **results);
+
+ private:
+  void SearchRec(int Level, KDNODE *SubTree);
+  bool BoxIntersectsSearch(float *lower, float *upper);
+
+  KDTREE *tree_;
+  float *query_point_;
+  float *sb_min_;  ///< search box minimum
+  float *sb_max_;  ///< search box maximum
+  MinK<float, void *> results_;
+};
+
+KDTreeSearch::KDTreeSearch(KDTREE *tree, float *query_point, int k_closest)
+    : tree_(tree), query_point_(query_point), results_(MAXSEARCH, k_closest) {
+  sb_min_ = new float[tree->KeySize];
+  sb_max_ = new float[tree->KeySize];
+}
+
+KDTreeSearch::~KDTreeSearch() {
+  delete[] sb_min_;
+  delete[] sb_max_;
+}
+
+/// Locate the k_closest points to query_point_, and return their distances and
+/// data into the given buffers.
+void KDTreeSearch::Search(int *result_count,
+                          float *distances,
+                          void **results) {
+  if (tree_->Root.Left == nullptr) {
+    *result_count = 0;
+  } else {
+    for (int i = 0; i < tree_->KeySize; i++) {
+      sb_min_[i] = tree_->KeyDesc[i].Min;
+      sb_max_[i] = tree_->KeyDesc[i].Max;
+    }
+    SearchRec(0, tree_->Root.Left);
+    int count = results_.elements_count();
+    *result_count = count;
+    for (int j = 0; j < count; j++) {
+      // Pre-cast to float64 as key is a template type and we have no control
+      // over its actual type.
+      distances[j] = static_cast<float>(sqrt(static_cast<double>(results_.elements()[j].key)));
+      results[j] = results_.elements()[j].value;
+    }
+  }
+}
+
+/*-----------------------------------------------------------------------------
+              Public Code
+-----------------------------------------------------------------------------*/
+/// @return a new KDTREE based on the specified parameters.
+/// @param KeySize  # of dimensions in the K-D tree
+/// @param KeyDesc  array of params to describe key dimensions
+KDTREE *MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[]) {
+  auto *KDTree = static_cast<KDTREE *>(malloc(
+      sizeof(KDTREE) + (KeySize - 1) * sizeof(PARAM_DESC)));
+  for (int i = 0; i < KeySize; i++) {
+    KDTree->KeyDesc[i].NonEssential = KeyDesc[i].NonEssential;
+    KDTree->KeyDesc[i].Circular = KeyDesc[i].Circular;
+    if (KeyDesc[i].Circular) {
+      KDTree->KeyDesc[i].Min = KeyDesc[i].Min;
+      KDTree->KeyDesc[i].Max = KeyDesc[i].Max;
+      KDTree->KeyDesc[i].Range = KeyDesc[i].Max - KeyDesc[i].Min;
+      KDTree->KeyDesc[i].HalfRange = KDTree->KeyDesc[i].Range / 2;
+      KDTree->KeyDesc[i].MidRange = (KeyDesc[i].Max + KeyDesc[i].Min) / 2;
+    } else {
+      KDTree->KeyDesc[i].Min = MINSEARCH;
+      KDTree->KeyDesc[i].Max = MAXSEARCH;
+    }
+  }
+  KDTree->KeySize = KeySize;
+  KDTree->Root.Left = nullptr;
+  KDTree->Root.Right = nullptr;
+  return KDTree;
+}
+
+
+/**
+ * This routine stores Data in the K-D tree specified by Tree
+ * using Key as an access key.
+ *
+ * @param Tree    K-D tree in which data is to be stored
+ * @param Key    ptr to key by which data can be retrieved
+ * @param Data    ptr to data to be stored in the tree
+ */
+void KDStore(KDTREE *Tree, float *Key, void *Data) {
+  int Level;
+  KDNODE *Node;
+  KDNODE **PtrToNode;
+
+  PtrToNode = &(Tree->Root.Left);
+  Node = *PtrToNode;
+  Level = NextLevel(Tree, -1);
+  while (Node != nullptr) {
+    if (Key[Level] < Node->BranchPoint) {
+      PtrToNode = &(Node->Left);
+      if (Key[Level] > Node->LeftBranch)
+        Node->LeftBranch = Key[Level];
+    }
+    else {
+      PtrToNode = &(Node->Right);
+      if (Key[Level] < Node->RightBranch)
+        Node->RightBranch = Key[Level];
+    }
+    Level = NextLevel(Tree, Level);
+    Node = *PtrToNode;
+  }
+
+  *PtrToNode = MakeKDNode(Tree, Key, Data, Level);
+}                                /* KDStore */
+
+/**
+ * This routine deletes a node from Tree.  The node to be
+ * deleted is specified by the Key for the node and the Data
+ * contents of the node.  These two pointers must be identical
+ * to the pointers that were used for the node when it was
+ * originally stored in the tree.  A node will be deleted from
+ * the tree only if its key and data pointers are identical
+ * to Key and Data respectively.  The tree is re-formed by removing
+ * the affected subtree and inserting all elements but the root.
+ *
+ * @param Tree K-D tree to delete node from
+ * @param Key key of node to be deleted
+ * @param Data data contents of node to be deleted
+ */
+void
+KDDelete (KDTREE * Tree, float Key[], void *Data) {
+  int Level;
+  KDNODE *Current;
+  KDNODE *Father;
+
+  /* initialize search at root of tree */
+  Father = &(Tree->Root);
+  Current = Father->Left;
+  Level = NextLevel(Tree, -1);
+
+  /* search tree for node to be deleted */
+  while ((Current != nullptr) && (!NodeFound (Current, Key, Data))) {
+    Father = Current;
+    if (Key[Level] < Current->BranchPoint)
+      Current = Current->Left;
+    else
+      Current = Current->Right;
+
+    Level = NextLevel(Tree, Level);
+  }
+
+  if (Current != nullptr) {         /* if node to be deleted was found */
+    if (Current == Father->Left) {
+      Father->Left = nullptr;
+      Father->LeftBranch = Tree->KeyDesc[Level].Min;
+    } else {
+      Father->Right = nullptr;
+      Father->RightBranch = Tree->KeyDesc[Level].Max;
+    }
+
+    InsertNodes(Tree, Current->Left);
+    InsertNodes(Tree, Current->Right);
+    FreeSubTree(Current);
+  }
+}                                /* KDDelete */
+
+/**
+ * This routine searches the K-D tree specified by Tree and
+ * finds the QuerySize nearest neighbors of Query.  All neighbors
+ * must be within MaxDistance of Query.  The data contents of
+ * the nearest neighbors
+ * are placed in NBuffer and their distances from Query are
+ * placed in DBuffer.
+ * @param Tree    ptr to K-D tree to be searched
+ * @param Query    ptr to query key (point in D-space)
+ * @param QuerySize  number of nearest neighbors to be found
+ * @param MaxDistance  all neighbors must be within this distance
+ * @param NBuffer ptr to QuerySize buffer to hold nearest neighbors
+ * @param DBuffer ptr to QuerySize buffer to hold distances
+ *          from nearest neighbor to query point
+ * @param NumberOfResults [out] Number of nearest neighbors actually found
+ */
+void KDNearestNeighborSearch(
+    KDTREE *Tree, float Query[], int QuerySize, float MaxDistance,
+    int *NumberOfResults, void **NBuffer, float DBuffer[]) {
+  KDTreeSearch search(Tree, Query, QuerySize);
+  search.Search(NumberOfResults, DBuffer, NBuffer);
+}
+
+
+/*---------------------------------------------------------------------------*/
+/** Walk a given Tree with action. */
+void KDWalk(KDTREE *Tree, void_proc action, void *context) {
+  if (Tree->Root.Left != nullptr)
+    Walk(Tree, action, context, Tree->Root.Left, NextLevel(Tree, -1));
+}
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine frees all memory which is allocated to the
+ * specified KD-tree.  This includes the data structure for
+ * the kd-tree itself plus the data structures for each node
+ * in the tree.  It does not include the Key and Data items
+ * which are pointed to by the nodes.  This memory is left
+ * untouched.
+ * @param Tree  tree data structure to be released
+ */
+void FreeKDTree(KDTREE *Tree) {
+  FreeSubTree(Tree->Root.Left);
+  free(Tree);
+}                                /* FreeKDTree */
+
+
+/*-----------------------------------------------------------------------------
+              Private Code
+-----------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine allocates memory for a new K-D tree node
+ * and places the specified Key and Data into it.  The
+ * left and right subtree pointers for the node are
+ * initialized to empty subtrees.
+ * @param tree  The tree to create the node for
+ * @param Key  Access key for new node in KD tree
+ * @param Data  ptr to data to be stored in new node
+ * @param Index  index of Key to branch on
+ * @return pointer to new K-D tree node
+ */
+KDNODE *MakeKDNode(KDTREE *tree, float Key[], void *Data, int Index) {
+  KDNODE *NewNode;
+
+  NewNode = static_cast<KDNODE *>(malloc (sizeof (KDNODE)));
+
+  NewNode->Key = Key;
+  NewNode->Data = Data;
+  NewNode->BranchPoint = Key[Index];
+  NewNode->LeftBranch = tree->KeyDesc[Index].Min;
+  NewNode->RightBranch = tree->KeyDesc[Index].Max;
+  NewNode->Left = nullptr;
+  NewNode->Right = nullptr;
+
+  return NewNode;
+}                                /* MakeKDNode */
+
+
+/*---------------------------------------------------------------------------*/
+void FreeKDNode(KDNODE *Node) { free(Node); }
+
+/*---------------------------------------------------------------------------*/
+/**
+ * Recursively accumulate the k_closest points to query_point_ into results_.
+ * @param Level  level in tree of sub-tree to be searched
+ * @param SubTree  sub-tree to be searched
+ */
+void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) {
+  if (level >= tree_->KeySize)
+    level = 0;
+
+  if (!BoxIntersectsSearch(sb_min_, sb_max_))
+    return;
+
+  results_.insert(DistanceSquared(tree_->KeySize, tree_->KeyDesc, query_point_,
+                                  sub_tree->Key),
+                  sub_tree->Data);
+
+  if (query_point_[level] < sub_tree->BranchPoint) {
+    if (sub_tree->Left != nullptr) {
+      float tmp = sb_max_[level];
+      sb_max_[level] = sub_tree->LeftBranch;
+      SearchRec(NextLevel(tree_, level), sub_tree->Left);
+      sb_max_[level] = tmp;
+    }
+    if (sub_tree->Right != nullptr) {
+      float tmp = sb_min_[level];
+      sb_min_[level] = sub_tree->RightBranch;
+      SearchRec(NextLevel(tree_, level), sub_tree->Right);
+      sb_min_[level] = tmp;
+    }
+  } else {
+    if (sub_tree->Right != nullptr) {
+      float tmp = sb_min_[level];
+      sb_min_[level] = sub_tree->RightBranch;
+      SearchRec(NextLevel(tree_, level), sub_tree->Right);
+      sb_min_[level] = tmp;
+    }
+    if (sub_tree->Left != nullptr) {
+      float tmp = sb_max_[level];
+      sb_max_[level] = sub_tree->LeftBranch;
+      SearchRec(NextLevel(tree_, level), sub_tree->Left);
+      sb_max_[level] = tmp;
+    }
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ *Returns the Euclidean distance squared between p1 and p2 for all essential
+ * dimensions.
+ * @param k      keys are in k-space
+ * @param dim    dimension descriptions (essential, circular, etc)
+ * @param p1,p2  two different points in K-D space
+ */
+float DistanceSquared(int k, PARAM_DESC *dim, float p1[], float p2[]) {
+  float total_distance = 0;
+
+  for (; k > 0; k--, p1++, p2++, dim++) {
+    if (dim->NonEssential)
+      continue;
+
+    float dimension_distance = *p1 - *p2;
+
+    /* if this dimension is circular - check wraparound distance */
+    if (dim->Circular) {
+      dimension_distance = Magnitude(dimension_distance);
+      float wrap_distance = dim->Max - dim->Min - dimension_distance;
+      dimension_distance = std::min(dimension_distance, wrap_distance);
+    }
+
+    total_distance += dimension_distance * dimension_distance;
+  }
+  return total_distance;
+}
+
+float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[]) {
+  return sqrt(DistanceSquared(k, dim, p1, p2));
+}
+
+/*---------------------------------------------------------------------------*/
+/// Return whether the query region (the smallest known circle about
+/// query_point_ containing results->k_ points) intersects the box specified
+/// between lower and upper.  For circular dimensions, we also check the point
+/// one wrap distance away from the query.
+bool KDTreeSearch::BoxIntersectsSearch(float *lower, float *upper) {
+  float *query = query_point_;
+  // Compute the sum in higher precision.
+  double total_distance = 0.0;
+  double radius_squared = static_cast<double>(results_.max_insertable_key()) *
+    results_.max_insertable_key();
+  PARAM_DESC *dim = tree_->KeyDesc;
+
+  for (int i = tree_->KeySize; i > 0; i--, dim++, query++, lower++, upper++) {
+    if (dim->NonEssential)
+      continue;
+
+    float dimension_distance;
+    if (*query < *lower)
+      dimension_distance = *lower - *query;
+    else if (*query > *upper)
+      dimension_distance = *query - *upper;
+    else
+      dimension_distance = 0;
+
+    /* if this dimension is circular - check wraparound distance */
+    if (dim->Circular) {
+      float wrap_distance = FLT_MAX;
+      if (*query < *lower)
+        wrap_distance = *query + dim->Max - dim->Min - *upper;
+      else if (*query > *upper)
+        wrap_distance = *lower - (*query - (dim->Max - dim->Min));
+      dimension_distance = std::min(dimension_distance, wrap_distance);
+    }
+
+    total_distance +=
+      static_cast<double>(dimension_distance) * dimension_distance;
+    if (total_distance >= radius_squared)
+      return false;
+  }
+  return true;
+}
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * Walk a tree, calling action once on each node.
+ *
+ * Operation:
+ *   This routine walks through the specified sub_tree and invokes action
+ *   action at each node as follows:
+ *       action(context, data, level)
+ *   data  the data contents of the node being visited,
+ *   level is the level of the node in the tree with the root being level 0.
+ * @param tree  root of the tree being walked.
+ * @param action  action to be performed at every node
+ * @param context  action's context
+ * @param sub_tree  ptr to root of subtree to be walked
+ * @param level  current level in the tree for this node
+ */
+void Walk(KDTREE *tree, void_proc action, void *context,
+          KDNODE *sub_tree, int32_t level) {
+  (*action)(context, sub_tree->Data, level);
+  if (sub_tree->Left != nullptr)
+    Walk(tree, action, context, sub_tree->Left, NextLevel(tree, level));
+  if (sub_tree->Right != nullptr)
+    Walk(tree, action, context, sub_tree->Right, NextLevel(tree, level));
+}
+
+/** Given a subtree nodes, insert all of its elements into tree. */
+void InsertNodes(KDTREE *tree, KDNODE *nodes) {
+  if (nodes == nullptr)
+    return;
+
+  KDStore(tree, nodes->Key, nodes->Data);
+  InsertNodes(tree, nodes->Left);
+  InsertNodes(tree, nodes->Right);
+}
+
+/** Free all of the nodes of a sub tree. */
+void FreeSubTree(KDNODE *sub_tree) {
+  if (sub_tree != nullptr) {
+    FreeSubTree(sub_tree->Left);
+    FreeSubTree(sub_tree->Right);
+    free(sub_tree);
+  }
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/classify/kdtree.h b/tesseract/src/classify/kdtree.h
new file mode 100644
index 00000000..b8512191
--- /dev/null
+++ b/tesseract/src/classify/kdtree.h
@@ -0,0 +1,98 @@
+/******************************************************************************
+ ** Filename:   kdtree.h
+ ** Purpose:    Definition of K-D tree access routines.
+ ** Author:     Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *****************************************************************************/
+
+#ifndef KDTREE_H
+#define KDTREE_H
+
+#include "ocrfeatures.h"
+
+namespace tesseract {
+
+using void_proc = void (*)(...);
+
+/**
+NOTE:  All circular parameters of all keys must be in the range
+
+Min <= Param < Max
+
+where Min and Max are specified in the KeyDesc parameter passed to
+MakeKDTree.  All KD routines assume that this is true and will not operate
+correctly if circular parameters outside the specified range are used.
+*/
+
+struct KDNODE {
+  float* Key;          /**< search key */
+  void* Data;          /**< data that corresponds to key */
+  float BranchPoint;   /**< needed to make deletes work efficiently */
+  float LeftBranch;    /**< used to optimize search pruning */
+  float RightBranch;   /**< used to optimize search pruning */
+  struct KDNODE* Left; /**< ptrs for KD tree structure */
+  struct KDNODE* Right;
+};
+
+struct KDTREE {
+  int16_t KeySize;       /* number of dimensions in the tree */
+  KDNODE Root;           /* Root.Left points to actual root node */
+  PARAM_DESC KeyDesc[1]; /* description of each dimension */
+};
+
+/*----------------------------------------------------------------------------
+            Macros
+-----------------------------------------------------------------------------*/
+#define RootOf(T) ((T)->Root.Left->Data)
+
+/*-----------------------------------------------------------------------------
+          Public Function Prototypes
+-----------------------------------------------------------------------------*/
+KDTREE* MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[]);
+
+void KDStore(KDTREE* Tree, float* Key, void* Data);
+
+void KDDelete(KDTREE* Tree, float Key[], void* Data);
+
+void KDNearestNeighborSearch(KDTREE* Tree, float Query[], int QuerySize,
+                             float MaxDistance, int* NumberOfResults,
+                             void** NBuffer, float DBuffer[]);
+
+void KDWalk(KDTREE* Tree, void_proc Action, void* context);
+
+void FreeKDTree(KDTREE* Tree);
+
+/*-----------------------------------------------------------------------------
+          Private Function Prototypes
+-----------------------------------------------------------------------------*/
+KDNODE* MakeKDNode(KDTREE* tree, float Key[], void* Data, int Index);
+
+void FreeKDNode(KDNODE* Node);
+
+float DistanceSquared(int k, PARAM_DESC* dim, float p1[], float p2[]);
+
+TESS_API
+float ComputeDistance(int k, PARAM_DESC* dim, float p1[], float p2[]);
+
+int QueryInSearch(KDTREE* tree);
+
+void Walk(KDTREE* tree, void_proc action, void* context, KDNODE* SubTree,
+          int32_t Level);
+
+void InsertNodes(KDTREE* tree, KDNODE* nodes);
+
+void FreeSubTree(KDNODE* SubTree);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/classify/mf.cpp b/tesseract/src/classify/mf.cpp
new file mode 100644
index 00000000..d6232eee
--- /dev/null
+++ b/tesseract/src/classify/mf.cpp
@@ -0,0 +1,82 @@
+/******************************************************************************
+ ** Filename:    mf.c
+ ** Purpose:     Micro-feature interface to flexible feature extractor.
+ ** Author:      Dan Johnson
+ ** History:     Thu May 24 09:08:38 1990, DSJ, Created.
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+/*----------------------------------------------------------------------------
+          Include Files and Type Defines
+----------------------------------------------------------------------------*/
+#include "mf.h"
+
+#include "featdefs.h"
+#include "mfdefs.h"
+#include "mfx.h"
+
+#include <cmath>
+
+namespace tesseract {
+
+/*----------------------------------------------------------------------------
+              Private Code
+----------------------------------------------------------------------------*/
+/**
+ * Call the old micro-feature extractor and then copy
+ * the features into the new format.  Then deallocate the
+ * old micro-features.
+ * @param Blob  blob to extract micro-features from
+ * @param cn_denorm  control parameter to feature extractor.
+ * @return Micro-features for Blob.
+ */
+FEATURE_SET ExtractMicros(TBLOB* Blob, const DENORM& cn_denorm) {
+  int NumFeatures;
+  MICROFEATURES Features, OldFeatures;
+  FEATURE_SET FeatureSet;
+  FEATURE Feature;
+  MICROFEATURE OldFeature;
+
+  OldFeatures = BlobMicroFeatures(Blob, cn_denorm);
+  if (OldFeatures == nullptr)
+    return nullptr;
+  NumFeatures = count (OldFeatures);
+  FeatureSet = NewFeatureSet (NumFeatures);
+
+  Features = OldFeatures;
+  iterate(Features) {
+    OldFeature = reinterpret_cast<MICROFEATURE>first_node (Features);
+    Feature = NewFeature (&MicroFeatureDesc);
+    Feature->Params[MFDirection] = OldFeature[ORIENTATION];
+    Feature->Params[MFXPosition] = OldFeature[XPOSITION];
+    Feature->Params[MFYPosition] = OldFeature[YPOSITION];
+    Feature->Params[MFLength] = OldFeature[MFLENGTH];
+
+    // Bulge features are deprecated and should not be used.  Set to 0.
+    Feature->Params[MFBulge1] = 0.0f;
+    Feature->Params[MFBulge2] = 0.0f;
+
+#ifndef _WIN32
+    // Assert that feature parameters are well defined.
+    int i;
+    for (i = 0; i < Feature->Type->NumParams; i++) {
+      ASSERT_HOST(!std::isnan(Feature->Params[i]));
+    }
+#endif
+
+    AddFeature(FeatureSet, Feature);
+  }
+  FreeMicroFeatures(OldFeatures);
+  return FeatureSet;
+}                                /* ExtractMicros */
+
+} // namespace tesseract
diff --git a/tesseract/src/classify/mf.h b/tesseract/src/classify/mf.h
new file mode 100644
index 00000000..b1113ce5
--- /dev/null
+++ b/tesseract/src/classify/mf.h
@@ -0,0 +1,40 @@
+/******************************************************************************
+ ** Filename:    mf.h
+ ** Purpose:     Micro-feature interface to flexible feature extractor.
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#ifndef MF_H
+#define MF_H
+
+#include "ocrfeatures.h"
+#include "blobs.h"
+
+namespace tesseract {
+
+typedef enum {
+  MFXPosition, MFYPosition,
+  MFLength, MFDirection, MFBulge1, MFBulge2,
+  MFCount  // For array sizes.
+} MF_PARAM_NAME;
+
+typedef float MicroFeature[MFCount];
+/*----------------------------------------------------------------------------
+          Private Function Prototypes
+-----------------------------------------------------------------------------*/
+FEATURE_SET ExtractMicros(TBLOB* Blob, const DENORM& cn_denorm);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/classify/mfdefs.cpp b/tesseract/src/classify/mfdefs.cpp
new file mode 100644
index 00000000..3442fdfc
--- /dev/null
+++ b/tesseract/src/classify/mfdefs.cpp
@@ -0,0 +1,46 @@
+/******************************************************************************
+ ** Filename:    mfdefs.cpp
+ ** Purpose:     Basic routines for manipulating micro-features
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#include "mfdefs.h"
+
+#include <cmath>
+
+namespace tesseract {
+
+/*----------------------------------------------------------------------------
+              Public Code
+----------------------------------------------------------------------------**/
+
+/**
+ * This routine allocates and returns a new micro-feature
+ * data structure.
+ * @return New MICROFEATURE
+ */
+MICROFEATURE NewMicroFeature() {
+  return (static_cast<MICROFEATURE>(malloc (sizeof (MFBLOCK))));
+}                                /* NewMicroFeature */
+
+/**
+ * This routine deallocates all of the memory consumed by
+ * a list of micro-features.
+ * @param MicroFeatures list of micro-features to be freed
+ */
+void FreeMicroFeatures(MICROFEATURES MicroFeatures) {
+  destroy_nodes(MicroFeatures, free);
+}                                /* FreeMicroFeatures */
+
+} // namespace tesseract
diff --git a/tesseract/src/classify/mfdefs.h b/tesseract/src/classify/mfdefs.h
new file mode 100644
index 00000000..90d5374b
--- /dev/null
+++ b/tesseract/src/classify/mfdefs.h
@@ -0,0 +1,61 @@
+/******************************************************************************
+ ** Filename:    mfdefs.h
+ ** Purpose:     Definition of micro-features
+ ** Author:      Dan Johnson
+ ** History:     Mon Jan 22 08:42:13 1990, DSJ, Created.
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+#ifndef MFDEFS_H
+#define MFDEFS_H
+
+/**----------------------------------------------------------------------------
+          Include Files and Type Defines
+----------------------------------------------------------------------------**/
+#include "matchdefs.h"
+#include "oldlist.h"
+
+namespace tesseract {
+
+/* definition of a list of micro-features */
+using MICROFEATURES = LIST;
+
+/* definition of structure of micro-features */
+#define MFSIZE 6
+typedef float MFBLOCK[MFSIZE];
+using MICROFEATURE = float*;
+
+/* definitions of individual micro-feature parameters */
+#define XPOSITION 0
+#define YPOSITION 1
+#define MFLENGTH 2
+#define ORIENTATION 3
+#define FIRSTBULGE 4
+#define SECONDBULGE 5
+
+/**----------------------------------------------------------------------------
+            Macros
+----------------------------------------------------------------------------**/
+
+/* macros for accessing micro-feature lists */
+#define NextFeatureOf(L) ((MICROFEATURE)first_node(L))
+
+/**----------------------------------------------------------------------------
+          Public Function Prototypes
+----------------------------------------------------------------------------**/
+MICROFEATURE NewMicroFeature();
+
+void FreeMicroFeatures(MICROFEATURES MicroFeatures);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/classify/mfoutline.cpp b/tesseract/src/classify/mfoutline.cpp
new file mode 100644
index 00000000..450c7acc
--- /dev/null
+++ b/tesseract/src/classify/mfoutline.cpp
@@ -0,0 +1,446 @@
+/******************************************************************************
+ ** Filename:    mfoutline.c
+ ** Purpose:     Interface to outline struct used for extracting features
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#include "mfoutline.h"
+
+#include "clusttool.h"           //If remove you get cought in a loop somewhere
+#include "blobs.h"
+#include "mfx.h"
+#include "params.h"
+#include "classify.h"
+
+#include <cmath>
+#include <cstdio>
+
+namespace tesseract {
+
+/*---------------------------------------------------------------------------*/
+/** Convert a blob into a list of MFOUTLINEs (float-based microfeature format).
+ */
+LIST ConvertBlob(TBLOB *blob) {
+  LIST outlines = NIL_LIST;
+  return (blob == nullptr)
+      ? NIL_LIST
+      : ConvertOutlines(blob->outlines, outlines, outer);
+}
+
+
+/*---------------------------------------------------------------------------*/
+/** Convert a TESSLINE into the float-based MFOUTLINE micro-feature format. */
+MFOUTLINE ConvertOutline(TESSLINE *outline) {
+  MFEDGEPT *NewPoint;
+  MFOUTLINE MFOutline = NIL_LIST;
+  EDGEPT *EdgePoint;
+  EDGEPT *StartPoint;
+  EDGEPT *NextPoint;
+
+  if (outline == nullptr || outline->loop == nullptr)
+    return MFOutline;
+
+  StartPoint = outline->loop;
+  EdgePoint = StartPoint;
+  do {
+    NextPoint = EdgePoint->next;
+
+    /* filter out duplicate points */
+    if (EdgePoint->pos.x != NextPoint->pos.x ||
+        EdgePoint->pos.y != NextPoint->pos.y) {
+      NewPoint = NewEdgePoint();
+      NewPoint->ClearMark();
+      NewPoint->Hidden = EdgePoint->IsHidden();
+      NewPoint->Point.x = EdgePoint->pos.x;
+      NewPoint->Point.y = EdgePoint->pos.y;
+      MFOutline = push(MFOutline, NewPoint);
+    }
+    EdgePoint = NextPoint;
+  } while (EdgePoint != StartPoint);
+
+  if (MFOutline != nullptr)
+    MakeOutlineCircular(MFOutline);
+  return MFOutline;
+}
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * Convert a tree of outlines to a list of MFOUTLINEs (lists of MFEDGEPTs).
+ *
+ * @param outline      first outline to be converted
+ * @param mf_outlines  list to add converted outlines to
+ * @param outline_type  are the outlines outer or holes?
+ */
+LIST ConvertOutlines(TESSLINE *outline,
+                     LIST mf_outlines,
+                     OUTLINETYPE outline_type) {
+  MFOUTLINE mf_outline;
+
+  while (outline != nullptr) {
+    mf_outline = ConvertOutline(outline);
+    if (mf_outline != nullptr)
+      mf_outlines = push(mf_outlines, mf_outline);
+    outline = outline->next;
+  }
+  return mf_outlines;
+}
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine searches through the specified outline, computes
+ * a slope for each vector in the outline, and marks each
+ * vector as having one of the following directions:
+ *   N, S, E, W, NE, NW, SE, SW
+ * This information is then stored in the outline and the
+ * outline is returned.
+ * @param Outline   micro-feature outline to analyze
+ * @param MinSlope  controls "snapping" of segments to horizontal
+ * @param MaxSlope  controls "snapping" of segments to vertical
+ */
+void FindDirectionChanges(MFOUTLINE Outline,
+                          float MinSlope,
+                          float MaxSlope) {
+  MFEDGEPT *Current;
+  MFEDGEPT *Last;
+  MFOUTLINE EdgePoint;
+
+  if (DegenerateOutline (Outline))
+    return;
+
+  Last = PointAt (Outline);
+  Outline = NextPointAfter (Outline);
+  EdgePoint = Outline;
+  do {
+    Current = PointAt (EdgePoint);
+    ComputeDirection(Last, Current, MinSlope, MaxSlope);
+
+    Last = Current;
+    EdgePoint = NextPointAfter (EdgePoint);
+  }
+  while (EdgePoint != Outline);
+
+}                                /* FindDirectionChanges */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine deallocates all of the memory consumed by
+ * a micro-feature outline.
+ * @param arg   micro-feature outline to be freed
+ */
+void FreeMFOutline(void *arg) {  //MFOUTLINE                             Outline)
+  MFOUTLINE Start;
+  auto Outline = static_cast<MFOUTLINE>(arg);
+
+  /* break the circular outline so we can use std. techniques to deallocate */
+  Start = list_rest (Outline);
+  set_rest(Outline, NIL_LIST);
+  while (Start != nullptr) {
+    free(first_node(Start));
+    Start = pop (Start);
+  }
+
+}                                /* FreeMFOutline */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * Release all memory consumed by the specified list
+ * of outlines.
+ * @param Outlines  list of mf-outlines to be freed
+ */
+void FreeOutlines(LIST Outlines) {
+  destroy_nodes(Outlines, FreeMFOutline);
+}                                /* FreeOutlines */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine searches through the specified outline and finds
+ * the points at which the outline changes direction.  These
+ * points are then marked as "extremities".  This routine is
+ * used as an alternative to FindExtremities().  It forces the
+ * endpoints of the microfeatures to be at the direction
+ * changes rather than at the midpoint between direction
+ * changes.
+ * @param Outline   micro-feature outline to analyze
+ */
+void MarkDirectionChanges(MFOUTLINE Outline) {
+  MFOUTLINE Current;
+  MFOUTLINE Last;
+  MFOUTLINE First;
+
+  if (DegenerateOutline (Outline))
+    return;
+
+  First = NextDirectionChange (Outline);
+  Last = First;
+  do {
+    Current = NextDirectionChange (Last);
+    PointAt(Current)->MarkPoint();
+    Last = Current;
+  }
+  while (Last != First);
+
+}                                /* MarkDirectionChanges */
+
+
+/*---------------------------------------------------------------------------*/
+/** Return a new edge point for a micro-feature outline. */
+MFEDGEPT *NewEdgePoint() {
+  return reinterpret_cast<MFEDGEPT *>(malloc(sizeof(MFEDGEPT)));
+}
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine returns the next point in the micro-feature
+ * outline that is an extremity.  The search starts after
+ * EdgePoint.  The routine assumes that the outline being
+ * searched is not a degenerate outline (i.e. it must have
+ * 2 or more edge points).
+ * @param EdgePoint start search from this point
+ * @return Next extremity in the outline after EdgePoint.
+ * @note Globals: none
+ */
+MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) {
+  EdgePoint = NextPointAfter(EdgePoint);
+  while (!PointAt(EdgePoint)->ExtremityMark)
+    EdgePoint = NextPointAfter(EdgePoint);
+
+  return (EdgePoint);
+
+}                                /* NextExtremity */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine normalizes the coordinates of the specified
+ * outline so that the outline is deskewed down to the
+ * baseline, translated so that x=0 is at XOrigin, and scaled
+ * so that the height of a character cell from descender to
+ * ascender is 1.  Of this height, 0.25 is for the descender,
+ * 0.25 for the ascender, and 0.5 for the x-height.  The
+ * y coordinate of the baseline is 0.
+ * @param Outline   outline to be normalized
+ * @param XOrigin   x-origin of text
+ */
+void NormalizeOutline(MFOUTLINE Outline,
+                      float XOrigin) {
+  if (Outline == NIL_LIST)
+    return;
+
+  MFOUTLINE EdgePoint = Outline;
+  do {
+    MFEDGEPT *Current = PointAt(EdgePoint);
+    Current->Point.y = MF_SCALE_FACTOR *
+        (Current->Point.y - kBlnBaselineOffset);
+    Current->Point.x = MF_SCALE_FACTOR * (Current->Point.x - XOrigin);
+    EdgePoint = NextPointAfter(EdgePoint);
+  } while (EdgePoint != Outline);
+}                                /* NormalizeOutline */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine normalizes every outline in Outlines
+ * according to the currently selected normalization method.
+ * It also returns the scale factors that it used to do this
+ * scaling.  The scale factors returned represent the x and
+ * y sizes in the normalized coordinate system that correspond
+ * to 1 pixel in the original coordinate system.
+ * Outlines are changed and XScale and YScale are updated.
+ *
+ * Globals:
+ * - classify_norm_method  method being used for normalization
+ * - classify_char_norm_range map radius of gyration to this value
+ * @param Outlines  list of outlines to be normalized
+ * @param XScale    x-direction scale factor used by routine
+ * @param YScale    y-direction scale factor used by routine
+ */
+void Classify::NormalizeOutlines(LIST Outlines,
+                                 float *XScale,
+                                 float *YScale) {
+  MFOUTLINE Outline;
+
+  switch (classify_norm_method) {
+    case character:
+      ASSERT_HOST(!"How did NormalizeOutlines get called in character mode?");
+      break;
+
+    case baseline:
+      iterate(Outlines) {
+        Outline = static_cast<MFOUTLINE>first_node(Outlines);
+        NormalizeOutline(Outline, 0.0);
+      }
+      *XScale = *YScale = MF_SCALE_FACTOR;
+      break;
+  }
+}                                /* NormalizeOutlines */
+
+/*----------------------------------------------------------------------------
+              Private Code
+----------------------------------------------------------------------------*/
+/**
+ * Change the direction of every vector in the specified
+ * outline segment to Direction.  The segment to be changed
+ * starts at Start and ends at End.  Note that the previous
+ * direction of End must also be changed to reflect the
+ * change in direction of the point before it.
+ * @param Start defines start of segment of outline to be modified
+ * @param End defines end of segment of outline to be modified
+ * @param Direction new direction to assign to segment
+ */
+void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) {
+  MFOUTLINE Current;
+
+  for (Current = Start; Current != End; Current = NextPointAfter (Current))
+    PointAt (Current)->Direction = Direction;
+
+  PointAt (End)->PreviousDirection = Direction;
+
+}                                /* ChangeDirection */
+
+/**
+ * This routine normalizes each point in Outline by
+ * translating it to the specified center and scaling it
+ * anisotropically according to the given scale factors.
+ * @param Outline     outline to be character normalized
+ * @param cn_denorm
+ */
+void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm) {
+  MFOUTLINE First, Current;
+  MFEDGEPT *CurrentPoint;
+
+  if (Outline == NIL_LIST)
+    return;
+
+  First = Outline;
+  Current = First;
+  do {
+    CurrentPoint = PointAt(Current);
+    FCOORD pos(CurrentPoint->Point.x, CurrentPoint->Point.y);
+    cn_denorm.LocalNormTransform(pos, &pos);
+    CurrentPoint->Point.x = (pos.x() - UINT8_MAX / 2) * MF_SCALE_FACTOR;
+    CurrentPoint->Point.y = (pos.y() - UINT8_MAX / 2) * MF_SCALE_FACTOR;
+
+    Current = NextPointAfter(Current);
+  }
+  while (Current != First);
+
+}                                /* CharNormalizeOutline */
+
+/**
+ * This routine computes the slope from Start to Finish and
+ * and then computes the approximate direction of the line
+ * segment from Start to Finish.  The direction is quantized
+ * into 8 buckets:
+ *  N, S, E, W, NE, NW, SE, SW
+ * Both the slope and the direction are then stored into
+ * the appropriate fields of the Start edge point.  The
+ * direction is also stored into the PreviousDirection field
+ * of the Finish edge point.
+ * @param Start   starting point to compute direction from
+ * @param Finish    finishing point to compute direction to
+ * @param MinSlope  slope below which lines are horizontal
+ * @param MaxSlope  slope above which lines are vertical
+ */
+void ComputeDirection(MFEDGEPT *Start,
+                      MFEDGEPT *Finish,
+                      float MinSlope,
+                      float MaxSlope) {
+  FVECTOR Delta;
+
+  Delta.x = Finish->Point.x - Start->Point.x;
+  Delta.y = Finish->Point.y - Start->Point.y;
+  if (Delta.x == 0) {
+    if (Delta.y < 0) {
+      Start->Slope = -FLT_MAX;
+      Start->Direction = south;
+    } else {
+      Start->Slope = FLT_MAX;
+      Start->Direction = north;
+    }
+  } else {
+    Start->Slope = Delta.y / Delta.x;
+    if (Delta.x > 0) {
+      if (Delta.y > 0) {
+        if (Start->Slope > MinSlope) {
+          if (Start->Slope < MaxSlope) {
+            Start->Direction = northeast;
+          } else {
+            Start->Direction = north;
+          }
+        } else {
+          Start->Direction = east;
+        }
+      }
+      else if (Start->Slope < -MinSlope) {
+        if (Start->Slope > -MaxSlope) {
+          Start->Direction = southeast;
+        } else {
+          Start->Direction = south;
+        }
+      } else {
+        Start->Direction = east;
+      }
+    } else if (Delta.y > 0) {
+      if (Start->Slope < -MinSlope) {
+        if (Start->Slope > -MaxSlope) {
+          Start->Direction = northwest;
+        } else {
+          Start->Direction = north;
+        }
+      } else {
+        Start->Direction = west;
+      }
+    } else if (Start->Slope > MinSlope) {
+      if (Start->Slope < MaxSlope) {
+        Start->Direction = southwest;
+      } else {
+        Start->Direction = south;
+      }
+    } else {
+      Start->Direction = west;
+    }
+  }
+  Finish->PreviousDirection = Start->Direction;
+}
+
+/**
+ * This routine returns the next point in the micro-feature
+ * outline that has a direction different than EdgePoint.  The
+ * routine assumes that the outline being searched is not a
+ * degenerate outline (i.e. it must have 2 or more edge points).
+ * @param EdgePoint start search from this point
+ * @return Point of next direction change in micro-feature outline.
+ * @note Globals: none
+ */
+MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) {
+  DIRECTION InitialDirection;
+
+  InitialDirection = PointAt (EdgePoint)->Direction;
+
+  MFOUTLINE next_pt = nullptr;
+  do {
+    EdgePoint = NextPointAfter(EdgePoint);
+    next_pt = NextPointAfter(EdgePoint);
+  } while (PointAt(EdgePoint)->Direction == InitialDirection &&
+           !PointAt(EdgePoint)->Hidden &&
+           next_pt != nullptr && !PointAt(next_pt)->Hidden);
+
+  return (EdgePoint);
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/classify/mfoutline.h b/tesseract/src/classify/mfoutline.h
new file mode 100644
index 00000000..6da42855
--- /dev/null
+++ b/tesseract/src/classify/mfoutline.h
@@ -0,0 +1,135 @@
+/******************************************************************************
+ ** Filename:    mfoutline.h
+ ** Purpose:     Interface spec for fx outline structures
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#ifndef MFOUTLINE_H
+#define MFOUTLINE_H
+
+#include "blobs.h"
+#include "fpoint.h"
+#include "oldlist.h"
+#include "params.h"
+
+namespace tesseract {
+
+using MFOUTLINE = LIST;
+
+enum DIRECTION : uint8_t {
+  north,
+  south,
+  east,
+  west,
+  northeast,
+  northwest,
+  southeast,
+  southwest
+};
+
+struct MFEDGEPT {
+  // Inline functions for manipulating micro-feature outline edge points.
+
+  void ClearMark() {
+    ExtremityMark = false;
+  }
+
+  void MarkPoint() {
+    ExtremityMark = true;
+  }
+
+  FPOINT Point;
+  float Slope;
+  bool Hidden;
+  bool ExtremityMark;
+  DIRECTION Direction;
+  DIRECTION PreviousDirection;
+};
+
+enum OUTLINETYPE { outer, hole };
+
+enum NORM_METHOD { baseline, character };
+
+/**----------------------------------------------------------------------------
+          Macros
+----------------------------------------------------------------------------**/
+#define AverageOf(A, B) (((A) + (B)) / 2)
+
+// Constant for computing the scale factor to use to normalize characters.
+const float MF_SCALE_FACTOR = 0.5f / kBlnXHeight;
+
+// Inline functions for manipulating micro-feature outlines.
+
+static inline bool DegenerateOutline(MFOUTLINE Outline) {
+  return (Outline == NIL_LIST) || (Outline == list_rest(Outline));
+}
+
+static inline MFEDGEPT* PointAt(MFOUTLINE Outline) {
+  return reinterpret_cast<MFEDGEPT*>first_node(Outline);
+}
+
+static inline MFOUTLINE NextPointAfter(MFOUTLINE Outline) {
+  return list_rest(Outline);
+}
+
+static inline void MakeOutlineCircular(MFOUTLINE Outline) {
+  set_rest(last(Outline), Outline);
+}
+
+/**----------------------------------------------------------------------------
+          Public Function Prototypes
+----------------------------------------------------------------------------**/
+void ComputeBlobCenter(TBLOB* Blob, TPOINT* BlobCenter);
+
+LIST ConvertBlob(TBLOB* Blob);
+
+MFOUTLINE ConvertOutline(TESSLINE* Outline);
+
+LIST ConvertOutlines(TESSLINE* Outline, LIST ConvertedOutlines,
+                     OUTLINETYPE OutlineType);
+
+void FilterEdgeNoise(MFOUTLINE Outline, float NoiseSegmentLength);
+
+void FindDirectionChanges(MFOUTLINE Outline, float MinSlope, float MaxSlope);
+
+void FreeMFOutline(void* agr);  // MFOUTLINE Outline);
+
+void FreeOutlines(LIST Outlines);
+
+void MarkDirectionChanges(MFOUTLINE Outline);
+
+MFEDGEPT* NewEdgePoint();
+
+MFOUTLINE NextExtremity(MFOUTLINE EdgePoint);
+
+void NormalizeOutline(MFOUTLINE Outline, float XOrigin);
+
+/*----------------------------------------------------------------------------
+          Private Function Prototypes
+-----------------------------------------------------------------------------*/
+void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction);
+
+// Normalizes the Outline in-place using cn_denorm's local transformation,
+// then converts from the integer feature range [0,255] to the clusterer
+// feature range of [-0.5, 0.5].
+void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm);
+
+void ComputeDirection(MFEDGEPT* Start, MFEDGEPT* Finish, float MinSlope,
+                      float MaxSlope);
+
+MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/classify/mfx.cpp b/tesseract/src/classify/mfx.cpp
new file mode 100644
index 00000000..49b7f0e0
--- /dev/null
+++ b/tesseract/src/classify/mfx.cpp
@@ -0,0 +1,152 @@
+/******************************************************************************
+ ** Filename:       mfx.c
+ ** Purpose:        Micro feature extraction routines
+ ** Author:         Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *****************************************************************************/
+
+#include "mfx.h"
+
+#include "mfdefs.h"
+#include "mfoutline.h"
+#include "clusttool.h"          //NEEDED
+#include "intfx.h"
+#include "normalis.h"
+#include "params.h"
+
+namespace tesseract {
+
+/* old numbers corresponded to 10.0 degrees and 80.0 degrees */
+double_VAR(classify_min_slope, 0.414213562,
+           "Slope below which lines are called horizontal");
+double_VAR(classify_max_slope, 2.414213562,
+           "Slope above which lines are called vertical");
+
+/*----------------------------------------------------------------------------
+          Private Function Prototypes
+-----------------------------------------------------------------------------*/
+
+MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline,
+                                     MICROFEATURES MicroFeatures);
+
+MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End);
+
+/*----------------------------------------------------------------------------
+            Public Code
+----------------------------------------------------------------------------*/
+
+/**
+ * This routine extracts micro-features from the specified
+ * blob and returns a list of the micro-features.  All
+ * micro-features are normalized according to the specified
+ * line statistics.
+ * @param Blob blob to extract micro-features from
+ * @param cn_denorm control parameter to feature extractor
+ * @return List of micro-features extracted from the blob.
+ */
+MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm) {
+  MICROFEATURES MicroFeatures = NIL_LIST;
+  LIST Outlines;
+  LIST RemainingOutlines;
+  MFOUTLINE Outline;
+
+  if (Blob != nullptr) {
+    Outlines = ConvertBlob(Blob);
+
+    RemainingOutlines = Outlines;
+    iterate(RemainingOutlines) {
+      Outline = static_cast<MFOUTLINE>first_node (RemainingOutlines);
+      CharNormalizeOutline(Outline, cn_denorm);
+    }
+
+    RemainingOutlines = Outlines;
+    iterate(RemainingOutlines) {
+      Outline = static_cast<MFOUTLINE>first_node(RemainingOutlines);
+      FindDirectionChanges(Outline, classify_min_slope, classify_max_slope);
+      MarkDirectionChanges(Outline);
+      MicroFeatures = ConvertToMicroFeatures(Outline, MicroFeatures);
+    }
+    FreeOutlines(Outlines);
+  }
+  return MicroFeatures;
+}                                /* BlobMicroFeatures */
+
+/*---------------------------------------------------------------------------
+            Private Code
+---------------------------------------------------------------------------*/
+
+/**
+ * Convert Outline to MicroFeatures
+ * @param Outline         outline to extract micro-features from
+ * @param MicroFeatures   list of micro-features to add to
+ * @return List of micro-features with new features added to front.
+ * @note Globals: none
+ */
+MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline,
+                                     MICROFEATURES MicroFeatures) {
+  MFOUTLINE Current;
+  MFOUTLINE Last;
+  MFOUTLINE First;
+  MICROFEATURE NewFeature;
+
+  if (DegenerateOutline (Outline))
+    return (MicroFeatures);
+
+  First = NextExtremity (Outline);
+  Last = First;
+  do {
+    Current = NextExtremity (Last);
+    if (!PointAt(Current)->Hidden) {
+      NewFeature = ExtractMicroFeature (Last, Current);
+      if (NewFeature != nullptr)
+        MicroFeatures = push (MicroFeatures, NewFeature);
+    }
+    Last = Current;
+  }
+  while (Last != First);
+
+  return (MicroFeatures);
+}                                /* ConvertToMicroFeatures */
+
+/**
+ * This routine computes the feature parameters which describe
+ * the micro-feature that starts and Start and ends at End.
+ * A new micro-feature is allocated, filled with the feature
+ * parameters, and returned.  The routine assumes that
+ * Start and End are not the same point.  If they are the
+ * same point, nullptr is returned, a warning message is
+ * printed, and the current outline is dumped to stdout.
+ * @param Start starting point of micro-feature
+ * @param End ending point of micro-feature
+ * @return New micro-feature or nullptr if the feature was rejected.
+ * @note Globals: none
+ */
+MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End) {
+  MICROFEATURE NewFeature;
+  MFEDGEPT *P1, *P2;
+
+  P1 = PointAt(Start);
+  P2 = PointAt(End);
+
+  NewFeature = NewMicroFeature ();
+  NewFeature[XPOSITION] = AverageOf(P1->Point.x, P2->Point.x);
+  NewFeature[YPOSITION] = AverageOf(P1->Point.y, P2->Point.y);
+  NewFeature[MFLENGTH] = DistanceBetween(P1->Point, P2->Point);
+  NewFeature[ORIENTATION] = NormalizedAngleFrom(&P1->Point, &P2->Point, 1.0);
+  NewFeature[FIRSTBULGE] = 0.0f;  // deprecated
+  NewFeature[SECONDBULGE] = 0.0f;  // deprecated
+
+  return NewFeature;
+}                                /* ExtractMicroFeature */
+
+} // namespace tesseract
diff --git a/tesseract/src/classify/mfx.h b/tesseract/src/classify/mfx.h
new file mode 100644
index 00000000..818e6917
--- /dev/null
+++ b/tesseract/src/classify/mfx.h
@@ -0,0 +1,46 @@
+/******************************************************************************
+ ** Filename: mfx.h
+ ** Purpose:  Definition of micro-feature extraction routines
+ ** Author:   Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#ifndef MFX_H
+#define MFX_H
+
+#include "mfdefs.h"
+#include "params.h"
+
+namespace tesseract {
+
+class DENORM;
+struct TBLOB;
+
+/*----------------------------------------------------------------------------
+          Variables
+----------------------------------------------------------------------------**/
+
+/* old numbers corresponded to 10.0 degrees and 80.0 degrees */
+extern double_VAR_H(classify_min_slope, 0.414213562,
+                    "Slope below which lines are called horizontal");
+extern double_VAR_H(classify_max_slope, 2.414213562,
+                    "Slope above which lines are called vertical");
+
+/*----------------------------------------------------------------------------
+          Public Function Prototypes
+----------------------------------------------------------------------------**/
+MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/classify/normfeat.cpp b/tesseract/src/classify/normfeat.cpp
new file mode 100644
index 00000000..6aa13cc2
--- /dev/null
+++ b/tesseract/src/classify/normfeat.cpp
@@ -0,0 +1,73 @@
+/******************************************************************************
+ ** Filename:    normfeat.c
+ ** Purpose:     Definition of char normalization features.
+ ** Author:      Dan Johnson
+ ** History:     12/14/90, DSJ, Created.
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#include "normfeat.h"
+
+#include "intfx.h"
+#include "featdefs.h"
+#include "mfoutline.h"
+
+namespace tesseract {
+
+/** Return the length of the outline in baseline normalized form. */
+float ActualOutlineLength(FEATURE Feature) {
+  return (Feature->Params[CharNormLength] * LENGTH_COMPRESSION);
+}
+
+/**
+ * Return the character normalization feature for a blob.
+ *
+ * The features returned are in a scale where the x-height has been
+ * normalized to live in the region y = [-0.25 .. 0.25].  Example ranges
+ * for English below are based on the Linux font collection on 2009-12-04:
+ *
+ *  - Params[CharNormY]
+ *     - The y coordinate of the grapheme's centroid.
+ *     - English: [-0.27, 0.71]
+ *
+ *  - Params[CharNormLength]
+ *     - The length of the grapheme's outline (tiny segments discarded),
+ *     divided by 10.0=LENGTH_COMPRESSION.
+ *     - English: [0.16, 0.85]
+ *
+ *  - Params[CharNormRx]
+ *     - The radius of gyration about the x axis, as measured from CharNormY.
+ *     - English: [0.011, 0.34]
+ *
+ *  - Params[CharNormRy]
+ *     - The radius of gyration about the y axis, as measured from
+ *     the x center of the grapheme's bounding box.
+ *     - English: [0.011, 0.31]
+ */
+FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT& fx_info) {
+  FEATURE_SET feature_set = NewFeatureSet(1);
+  FEATURE feature = NewFeature(&CharNormDesc);
+
+  feature->Params[CharNormY] =
+      MF_SCALE_FACTOR * (fx_info.Ymean - kBlnBaselineOffset);
+  feature->Params[CharNormLength] =
+      MF_SCALE_FACTOR * fx_info.Length / LENGTH_COMPRESSION;
+  feature->Params[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx;
+  feature->Params[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry;
+
+  AddFeature(feature_set, feature);
+
+  return feature_set;
+}                                /* ExtractCharNormFeatures */
+
+} // namespace tesseract
diff --git a/tesseract/src/classify/normfeat.h b/tesseract/src/classify/normfeat.h
new file mode 100644
index 00000000..6293cab9
--- /dev/null
+++ b/tesseract/src/classify/normfeat.h
@@ -0,0 +1,40 @@
+/******************************************************************************
+ ** Filename:    normfeat.h
+ ** Purpose:     Definition of character normalization features.
+ ** Author:      Dan Johnson
+ ** History:     12/14/90, DSJ, Created.
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *****************************************************************************/
+
+#ifndef NORMFEAT_H
+#define NORMFEAT_H
+
+#include "ocrfeatures.h"
+
+namespace tesseract {
+
+#define LENGTH_COMPRESSION  (10.0)
+
+struct INT_FX_RESULT_STRUCT;
+
+typedef enum {
+    CharNormY, CharNormLength, CharNormRx, CharNormRy
+} NORM_PARAM_NAME;
+
+float ActualOutlineLength(FEATURE Feature);
+
+FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT &fx_info);
+
+}
+
+#endif
diff --git a/tesseract/src/classify/normmatch.cpp b/tesseract/src/classify/normmatch.cpp
new file mode 100644
index 00000000..32bd2876
--- /dev/null
+++ b/tesseract/src/classify/normmatch.cpp
@@ -0,0 +1,231 @@
+/******************************************************************************
+ ** Filename:    normmatch.c
+ ** Purpose:     Simple matcher based on character normalization features.
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+/*----------------------------------------------------------------------------
+          Include Files and Type Defines
+----------------------------------------------------------------------------*/
+#include "normmatch.h"
+
+#include "classify.h"
+#include "clusttool.h"
+#include "helpers.h"
+#include "normfeat.h"
+#include "unicharset.h"
+#include "params.h"
+
+#include <cstdio>
+#include <cmath>
+#include <sstream>          // for std::istringstream
+
+namespace tesseract {
+
+struct NORM_PROTOS
+{
+  int NumParams;
+  PARAM_DESC *ParamDesc;
+  LIST* Protos;
+  int NumProtos;
+};
+
+/*----------------------------------------------------------------------------
+              Private Code
+----------------------------------------------------------------------------*/
+
+/**
+ * @name NormEvidenceOf
+ *
+ * Return the new type of evidence number corresponding to this
+ * normalization adjustment.  The equation that represents the transform is:
+ *       1 / (1 + (NormAdj / midpoint) ^ curl)
+ */
+static double NormEvidenceOf(double NormAdj) {
+  NormAdj /= classify_norm_adj_midpoint;
+
+  if (classify_norm_adj_curl == 3) {
+    NormAdj = NormAdj * NormAdj * NormAdj;
+  } else if (classify_norm_adj_curl == 2) {
+    NormAdj = NormAdj * NormAdj;
+  } else {
+    NormAdj = pow(NormAdj, classify_norm_adj_curl);
+  }
+  return (1.0 / (1.0 + NormAdj));
+}
+
+/*----------------------------------------------------------------------------
+        Variables
+----------------------------------------------------------------------------*/
+
+/** control knobs used to control the normalization adjustment process */
+double_VAR(classify_norm_adj_midpoint, 32.0, "Norm adjust midpoint ...");
+double_VAR(classify_norm_adj_curl, 2.0, "Norm adjust curl ...");
+/** Weight of width variance against height and vertical position. */
+const double kWidthErrorWeighting = 0.125;
+
+/*----------------------------------------------------------------------------
+              Public Code
+----------------------------------------------------------------------------*/
+/**
+ * This routine compares Features against each character
+ * normalization proto for ClassId and returns the match
+ * rating of the best match.
+ * @param ClassId id of class to match against
+ * @param feature character normalization feature
+ * @param DebugMatch controls dump of debug info
+ *
+ * Globals:
+ * #NormProtos character normalization prototypes
+ *
+ * @return Best match rating for Feature against protos of ClassId.
+ */
+float Classify::ComputeNormMatch(CLASS_ID ClassId,
+                                 const FEATURE_STRUCT& feature,
+                                 bool DebugMatch) {
+  LIST Protos;
+  float BestMatch;
+  float Match;
+  float Delta;
+  PROTOTYPE *Proto;
+  int ProtoId;
+
+  if (ClassId >= NormProtos->NumProtos) {
+    ClassId = NO_CLASS;
+  }
+
+  /* handle requests for classification as noise */
+  if (ClassId == NO_CLASS) {
+    /* kludge - clean up constants and make into control knobs later */
+    Match = (feature.Params[CharNormLength] *
+      feature.Params[CharNormLength] * 500.0 +
+      feature.Params[CharNormRx] *
+      feature.Params[CharNormRx] * 8000.0 +
+      feature.Params[CharNormRy] *
+      feature.Params[CharNormRy] * 8000.0);
+    return (1.0 - NormEvidenceOf(Match));
+  }
+
+  BestMatch = FLT_MAX;
+  Protos = NormProtos->Protos[ClassId];
+
+  if (DebugMatch) {
+    tprintf("\nChar norm for class %s\n", unicharset.id_to_unichar(ClassId));
+  }
+
+  ProtoId = 0;
+  iterate(Protos) {
+    Proto = reinterpret_cast<PROTOTYPE *>first_node (Protos);
+    Delta = feature.Params[CharNormY] - Proto->Mean[CharNormY];
+    Match = Delta * Delta * Proto->Weight.Elliptical[CharNormY];
+    if (DebugMatch) {
+      tprintf("YMiddle: Proto=%g, Delta=%g, Var=%g, Dist=%g\n",
+              Proto->Mean[CharNormY], Delta,
+              Proto->Weight.Elliptical[CharNormY], Match);
+    }
+    Delta = feature.Params[CharNormRx] - Proto->Mean[CharNormRx];
+    Match += Delta * Delta * Proto->Weight.Elliptical[CharNormRx];
+    if (DebugMatch) {
+      tprintf("Height: Proto=%g, Delta=%g, Var=%g, Dist=%g\n",
+              Proto->Mean[CharNormRx], Delta,
+              Proto->Weight.Elliptical[CharNormRx], Match);
+    }
+    // Ry is width! See intfx.cpp.
+    Delta = feature.Params[CharNormRy] - Proto->Mean[CharNormRy];
+    if (DebugMatch) {
+      tprintf("Width: Proto=%g, Delta=%g, Var=%g\n",
+              Proto->Mean[CharNormRy], Delta,
+              Proto->Weight.Elliptical[CharNormRy]);
+    }
+    Delta = Delta * Delta * Proto->Weight.Elliptical[CharNormRy];
+    Delta *= kWidthErrorWeighting;
+    Match += Delta;
+    if (DebugMatch) {
+      tprintf("Total Dist=%g, scaled=%g, sigmoid=%g, penalty=%g\n",
+              Match, Match / classify_norm_adj_midpoint,
+              NormEvidenceOf(Match), 256 * (1 - NormEvidenceOf(Match)));
+    }
+
+    if (Match < BestMatch)
+      BestMatch = Match;
+
+    ProtoId++;
+  }
+  return 1.0 - NormEvidenceOf(BestMatch);
+}                                /* ComputeNormMatch */
+
+void Classify::FreeNormProtos() {
+  if (NormProtos != nullptr) {
+    for (int i = 0; i < NormProtos->NumProtos; i++)
+      FreeProtoList(&NormProtos->Protos[i]);
+    free(NormProtos->Protos);
+    free(NormProtos->ParamDesc);
+    free(NormProtos);
+    NormProtos = nullptr;
+  }
+}
+
+/**
+ * This routine allocates a new data structure to hold
+ * a set of character normalization protos.  It then fills in
+ * the data structure by reading from the specified File.
+ * @param fp open text file to read normalization protos from
+ * Globals: none
+ * @return Character normalization protos.
+ */
+NORM_PROTOS *Classify::ReadNormProtos(TFile *fp) {
+  NORM_PROTOS *NormProtos;
+  int i;
+  char unichar[2 * UNICHAR_LEN + 1];
+  UNICHAR_ID unichar_id;
+  LIST Protos;
+  int NumProtos;
+
+  /* allocate and initialization data structure */
+  NormProtos = static_cast<NORM_PROTOS *>(malloc (sizeof (NORM_PROTOS)));
+  NormProtos->NumProtos = unicharset.size();
+  NormProtos->Protos = static_cast<LIST *>(malloc (NormProtos->NumProtos * sizeof(LIST)));
+  for (i = 0; i < NormProtos->NumProtos; i++)
+    NormProtos->Protos[i] = NIL_LIST;
+
+  /* read file header and save in data structure */
+  NormProtos->NumParams = ReadSampleSize(fp);
+  NormProtos->ParamDesc = ReadParamDesc(fp, NormProtos->NumParams);
+
+  /* read protos for each class into a separate list */
+  const int kMaxLineSize = 100;
+  char line[kMaxLineSize];
+  while (fp->FGets(line, kMaxLineSize) != nullptr) {
+    std::istringstream stream(line);
+    stream.imbue(std::locale::classic());
+    stream >> unichar >> NumProtos;
+    if (stream.fail()) {
+      continue;
+    }
+    if (unicharset.contains_unichar(unichar)) {
+      unichar_id = unicharset.unichar_to_id(unichar);
+      Protos = NormProtos->Protos[unichar_id];
+      for (i = 0; i < NumProtos; i++)
+        Protos = push_last(Protos, ReadPrototype(fp, NormProtos->NumParams));
+      NormProtos->Protos[unichar_id] = Protos;
+    } else {
+      tprintf("Error: unichar %s in normproto file is not in unichar set.\n",
+              unichar);
+      for (i = 0; i < NumProtos; i++)
+        FreePrototype(ReadPrototype(fp, NormProtos->NumParams));
+    }
+  }
+  return (NormProtos);
+}                                /* ReadNormProtos */
+
+}  // namespace tesseract
diff --git a/tesseract/src/classify/normmatch.h b/tesseract/src/classify/normmatch.h
new file mode 100644
index 00000000..77f66550
--- /dev/null
+++ b/tesseract/src/classify/normmatch.h
@@ -0,0 +1,34 @@
+/******************************************************************************
+ ** Filename:    normmatch.h
+ ** Purpose:     Simple matcher based on character normalization features.
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#ifndef NORMMATCH_H
+#define NORMMATCH_H
+
+#include "matchdefs.h"
+#include "ocrfeatures.h"
+#include "params.h"
+
+namespace tesseract {
+
+/* control knobs used to control the normalization adjustment process */
+extern double_VAR_H(classify_norm_adj_midpoint, 32.0,
+                    "Norm adjust midpoint ...");
+extern double_VAR_H(classify_norm_adj_curl, 2.0, "Norm adjust curl ...");
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/classify/ocrfeatures.cpp b/tesseract/src/classify/ocrfeatures.cpp
new file mode 100644
index 00000000..b8d646b1
--- /dev/null
+++ b/tesseract/src/classify/ocrfeatures.cpp
@@ -0,0 +1,190 @@
+/******************************************************************************
+ ** Filename:    ocrfeatures.cpp
+ ** Purpose:     Generic definition of a feature.
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#include "ocrfeatures.h"
+
+#include "scanutils.h"
+#include "strngs.h"             // for STRING
+
+#include <cassert>
+#include <cmath>
+
+namespace tesseract {
+
+/*----------------------------------------------------------------------------
+              Public Code
+----------------------------------------------------------------------------*/
+/**
+ * Add a feature to a feature set.  If the feature set is
+ * already full, false is returned to indicate that the
+ * feature could not be added to the set; otherwise, true is
+ * returned.
+ * @param FeatureSet set of features to add Feature to
+ * @param Feature feature to be added to FeatureSet
+ * @return  true if feature added to set, false if set is already full.
+ */
+bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature) {
+  if (FeatureSet->NumFeatures >= FeatureSet->MaxNumFeatures) {
+    FreeFeature(Feature);
+    return false;
+  }
+
+  FeatureSet->Features[FeatureSet->NumFeatures++] = Feature;
+  return true;
+}                                /* AddFeature */
+
+/**
+ * Release the memory consumed by the specified feature.
+ * @param Feature feature to be deallocated.
+ */
+void FreeFeature(FEATURE Feature) { free(Feature); } /* FreeFeature */
+
+/**
+ * Release the memory consumed by the specified feature
+ * set.  This routine also frees the memory consumed by the
+ * features contained in the set.
+ * @param FeatureSet  set of features to be freed
+ */
+void FreeFeatureSet(FEATURE_SET FeatureSet) {
+  int i;
+
+  if (FeatureSet) {
+    for (i = 0; i < FeatureSet->NumFeatures; i++)
+      FreeFeature(FeatureSet->Features[i]);
+    free(FeatureSet);
+  }
+}                                /* FreeFeatureSet */
+
+/**
+ * Allocate and return a new feature of the specified
+ * type.
+ * @param FeatureDesc description of feature to be created.
+ * @return New #FEATURE.
+ */
+FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc) {
+  FEATURE Feature;
+
+  Feature = static_cast<FEATURE>(malloc(sizeof(FEATURE_STRUCT) +
+                            (FeatureDesc->NumParams - 1) * sizeof(float)));
+  Feature->Type = FeatureDesc;
+  return (Feature);
+
+}                                /* NewFeature */
+
+/**
+ * Allocate and return a new feature set large enough to
+ * hold the specified number of features.
+ * @param NumFeatures maximum # of features to be put in feature set
+ * @return New #FEATURE_SET.
+ */
+FEATURE_SET NewFeatureSet(int NumFeatures) {
+  FEATURE_SET FeatureSet;
+
+  FeatureSet = static_cast<FEATURE_SET>(malloc (sizeof (FEATURE_SET_STRUCT) +
+    (NumFeatures - 1) * sizeof (FEATURE)));
+  FeatureSet->MaxNumFeatures = NumFeatures;
+  FeatureSet->NumFeatures = 0;
+  return (FeatureSet);
+
+}                                /* NewFeatureSet */
+
+/**
+ * Create a new feature of the specified type and read in
+ * the value of its parameters from File.  The extra penalty
+ * for the feature is also computed by calling the appropriate
+ * function for the specified feature type.  The correct text
+ * representation for a feature is a list of N floats where
+ * N is the number of parameters in the feature.
+ * @param File open text file to read feature from
+ * @param FeatureDesc specifies type of feature to read from File
+ * @return New #FEATURE read from File.
+ */
+static FEATURE ReadFeature(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) {
+  FEATURE Feature;
+  int i;
+
+  Feature = NewFeature (FeatureDesc);
+  for (i = 0; i < Feature->Type->NumParams; i++) {
+    ASSERT_HOST(tfscanf(File, "%f", &(Feature->Params[i])) == 1);
+#ifndef _WIN32
+    assert (!std::isnan(Feature->Params[i]));
+#endif
+  }
+  return Feature;
+}
+
+/**
+ * Create a new feature set of the specified type and read in
+ * the features from File.  The correct text representation
+ * for a feature set is an integer which specifies the number (N)
+ * of features in a set followed by a list of N feature
+ * descriptions.
+ * @param File open text file to read new feature set from
+ * @param FeatureDesc specifies type of feature to read from File
+ * @return New feature set read from File.
+ */
+FEATURE_SET ReadFeatureSet(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) {
+  int NumFeatures;
+  ASSERT_HOST(tfscanf(File, "%d", &NumFeatures) == 1);
+  ASSERT_HOST(NumFeatures >= 0);
+
+  FEATURE_SET FeatureSet = NewFeatureSet(NumFeatures);
+  for (int i = 0; i < NumFeatures; i++)
+    AddFeature(FeatureSet, ReadFeature(File, FeatureDesc));
+
+  return FeatureSet;
+}
+
+/**
+ * Appends a textual representation of Feature to str.
+ * This representation is simply a list of the N parameters
+ * of the feature, terminated with a newline.  It is assumed
+ * that the ExtraPenalty field can be reconstructed from the
+ * parameters of the feature.  It is also assumed that the
+ * feature type information is specified or assumed elsewhere.
+ * @param Feature feature to write out to str
+ * @param str string to write Feature to
+ */
+static void WriteFeature(FEATURE Feature, STRING* str) {
+  for (int i = 0; i < Feature->Type->NumParams; i++) {
+#ifndef WIN32
+    assert(!std::isnan(Feature->Params[i]));
+#endif
+    str->add_str_double(" ", Feature->Params[i]);
+  }
+  *str += "\n";
+}                                /* WriteFeature */
+
+/**
+ * Write a textual representation of FeatureSet to File.
+ * This representation is an integer specifying the number of
+ * features in the set, followed by a newline, followed by
+ * text representations for each feature in the set.
+ * @param FeatureSet feature set to write to File
+ * @param str string to write Feature to
+ */
+void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str) {
+  if (FeatureSet) {
+    str->add_str_int("", FeatureSet->NumFeatures);
+    *str += "\n";
+    for (int i = 0; i < FeatureSet->NumFeatures; i++) {
+      WriteFeature(FeatureSet->Features[i], str);
+    }
+  }
+}                                /* WriteFeatureSet */
+
+} // namespace tesseract
diff --git a/tesseract/src/classify/ocrfeatures.h b/tesseract/src/classify/ocrfeatures.h
new file mode 100644
index 00000000..edf63496
--- /dev/null
+++ b/tesseract/src/classify/ocrfeatures.h
@@ -0,0 +1,122 @@
+/******************************************************************************
+ ** Filename:    features.h
+ ** Purpose:     Generic definition of a feature.
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#ifndef FEATURES_H
+#define FEATURES_H
+
+#include "blobs.h"
+
+#include <cstdio>
+
+namespace tesseract {
+
+class DENORM;
+
+#undef Min
+#undef Max
+#define FEAT_NAME_SIZE 80
+
+// A character is described by multiple sets of extracted features.  Each
+// set contains a number of features of a particular type, for example, a
+// set of bays, or a set of closures, or a set of microfeatures.  Each
+// feature consists of a number of parameters.  All features within a
+// feature set contain the same number of parameters.  All circular
+// parameters are required to be the first parameters in the feature.
+
+struct PARAM_DESC {
+  bool Circular;        // true if dimension wraps around
+  bool NonEssential;    // true if dimension not used in searches
+  float Min;            // low end of range for circular dimensions
+  float Max;            // high end of range for circular dimensions
+  float Range;          // Max - Min
+  float HalfRange;      // (Max - Min)/2
+  float MidRange;       // (Max + Min)/2
+};
+
+struct FEATURE_DESC_STRUCT {
+  uint16_t NumParams;           // total # of params
+  const char* ShortName;        // short name for feature
+  const PARAM_DESC* ParamDesc;  // array - one per param
+};
+using FEATURE_DESC = FEATURE_DESC_STRUCT*;
+
+struct FEATURE_STRUCT {
+  const FEATURE_DESC_STRUCT* Type;  // points to description of feature type
+  float Params[1];                  // variable size array - params for feature
+};
+using FEATURE = FEATURE_STRUCT*;
+
+struct FEATURE_SET_STRUCT {
+  uint16_t NumFeatures;     // number of features in set
+  uint16_t MaxNumFeatures;  // maximum size of feature set
+  FEATURE Features[1];      // variable size array of features
+};
+using FEATURE_SET = FEATURE_SET_STRUCT*;
+
+// A generic character description as a char pointer. In reality, it will be
+// a pointer to some data structure. Paired feature extractors/matchers need
+// to agree on the data structure to be used, however, the high level
+// classifier does not need to know the details of this data structure.
+using CHAR_FEATURES = char*;
+
+/*----------------------------------------------------------------------
+    Macros for defining the parameters of a new features
+----------------------------------------------------------------------*/
+#define StartParamDesc(Name) const PARAM_DESC Name[] = {
+#define DefineParam(Circular, NonEssential, Min, Max) \
+  {Circular,                                          \
+   NonEssential,                                      \
+   Min,                                               \
+   Max,                                               \
+   (Max) - (Min),                                     \
+   (((Max) - (Min)) / 2.0),                           \
+   (((Max) + (Min)) / 2.0)},
+
+#define EndParamDesc };
+
+/*----------------------------------------------------------------------
+Macro for describing a new feature.  The parameters of the macro
+are as follows:
+
+DefineFeature (Name, NumLinear, NumCircular, ShortName, ParamName)
+----------------------------------------------------------------------*/
+#define DefineFeature(Name, NL, NC, SN, PN) \
+  const FEATURE_DESC_STRUCT Name = {((NL) + (NC)), SN, PN};
+
+/*----------------------------------------------------------------------
+        Generic routines that work for all feature types
+----------------------------------------------------------------------*/
+bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature);
+
+TESS_API
+void FreeFeature(FEATURE Feature);
+
+TESS_API
+void FreeFeatureSet(FEATURE_SET FeatureSet);
+
+TESS_API
+FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc);
+
+FEATURE_SET NewFeatureSet(int NumFeatures);
+
+FEATURE_SET ReadFeatureSet(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc);
+
+void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/classify/outfeat.cpp b/tesseract/src/classify/outfeat.cpp
new file mode 100644
index 00000000..f4746372
--- /dev/null
+++ b/tesseract/src/classify/outfeat.cpp
@@ -0,0 +1,168 @@
+/******************************************************************************
+ ** Filename:    outfeat.c
+ ** Purpose:     Definition of outline-features.
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#include "outfeat.h"
+
+#include "classify.h"
+#include "featdefs.h"
+#include "mfoutline.h"
+#include "ocrfeatures.h"
+
+#include <cstdio>
+
+namespace tesseract {
+
+/*----------------------------------------------------------------------------
+              Public Code
+----------------------------------------------------------------------------*/
+
+/**
+ * Convert each segment in the outline to a feature
+ * and return the features.
+ * @param Blob blob to extract pico-features from
+ * @return Outline-features for Blob.
+ * @note Globals: none
+ */
+FEATURE_SET Classify::ExtractOutlineFeatures(TBLOB *Blob) {
+  LIST Outlines;
+  LIST RemainingOutlines;
+  MFOUTLINE Outline;
+  FEATURE_SET FeatureSet;
+  float XScale, YScale;
+
+  FeatureSet = NewFeatureSet (MAX_OUTLINE_FEATURES);
+  if (Blob == nullptr)
+    return (FeatureSet);
+
+  Outlines = ConvertBlob (Blob);
+
+  NormalizeOutlines(Outlines, &XScale, &YScale);
+  RemainingOutlines = Outlines;
+  iterate(RemainingOutlines) {
+    Outline = static_cast<MFOUTLINE>first_node (RemainingOutlines);
+    ConvertToOutlineFeatures(Outline, FeatureSet);
+  }
+  if (classify_norm_method == baseline)
+    NormalizeOutlineX(FeatureSet);
+  FreeOutlines(Outlines);
+  return (FeatureSet);
+}                                /* ExtractOutlineFeatures */
+
+/*----------------------------------------------------------------------------
+              Private Code
+----------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine computes the midpoint between Start and
+ * End to obtain the x,y position of the outline-feature.  It
+ * also computes the direction from Start to End as the
+ * direction of the outline-feature and the distance from
+ * Start to End as the length of the outline-feature.
+ * This feature is then
+ * inserted into the next feature slot in FeatureSet.
+ * @param Start starting point of outline-feature
+ * @param End ending point of outline-feature
+ * @param FeatureSet set to add outline-feature to
+ */
+void AddOutlineFeatureToSet(FPOINT *Start,
+                            FPOINT *End,
+                            FEATURE_SET FeatureSet) {
+  FEATURE Feature;
+
+  Feature = NewFeature(&OutlineFeatDesc);
+  Feature->Params[OutlineFeatDir] = NormalizedAngleFrom(Start, End, 1.0);
+  Feature->Params[OutlineFeatX] = AverageOf(Start->x, End->x);
+  Feature->Params[OutlineFeatY] = AverageOf(Start->y, End->y);
+  Feature->Params[OutlineFeatLength] = DistanceBetween(*Start, *End);
+  AddFeature(FeatureSet, Feature);
+
+}                                /* AddOutlineFeatureToSet */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine steps converts each section in the specified
+ * outline to a feature described by its x,y position, length
+ * and angle.
+ * Results are returned in FeatureSet.
+ * @param Outline outline to extract outline-features from
+ * @param FeatureSet set of features to add outline-features to
+ */
+void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
+  MFOUTLINE Next;
+  MFOUTLINE First;
+  FPOINT FeatureStart;
+  FPOINT FeatureEnd;
+
+  if (DegenerateOutline (Outline))
+    return;
+
+  First = Outline;
+  Next = First;
+  do {
+    FeatureStart = PointAt(Next)->Point;
+    Next = NextPointAfter(Next);
+
+    /* note that an edge is hidden if the ending point of the edge is
+       marked as hidden.  This situation happens because the order of
+       the outlines is reversed when they are converted from the old
+       format.  In the old format, a hidden edge is marked by the
+       starting point for that edge. */
+    if (!PointAt(Next)->Hidden) {
+      FeatureEnd = PointAt(Next)->Point;
+      AddOutlineFeatureToSet(&FeatureStart, &FeatureEnd, FeatureSet);
+    }
+  }
+  while (Next != First);
+}                                /* ConvertToOutlineFeatures */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine computes the weighted average x position
+ * over all of the outline-features in FeatureSet and then
+ * renormalizes the outline-features to force this average
+ * to be the x origin (i.e. x=0).
+ * FeatureSet is changed.
+ * @param FeatureSet outline-features to be normalized
+ */
+void NormalizeOutlineX(FEATURE_SET FeatureSet) {
+  int i;
+  FEATURE Feature;
+  float Length;
+  float TotalX = 0.0;
+  float TotalWeight = 0.0;
+  float Origin;
+
+  if (FeatureSet->NumFeatures <= 0)
+    return;
+
+  for (i = 0; i < FeatureSet->NumFeatures; i++) {
+    Feature = FeatureSet->Features[i];
+    Length = Feature->Params[OutlineFeatLength];
+    TotalX += Feature->Params[OutlineFeatX] * Length;
+    TotalWeight += Length;
+  }
+  Origin = TotalX / TotalWeight;
+
+  for (i = 0; i < FeatureSet->NumFeatures; i++) {
+    Feature = FeatureSet->Features[i];
+    Feature->Params[OutlineFeatX] -= Origin;
+  }
+}                                /* NormalizeOutlineX */
+
+}  // namespace tesseract
diff --git a/tesseract/src/classify/outfeat.h b/tesseract/src/classify/outfeat.h
new file mode 100644
index 00000000..eefde2e1
--- /dev/null
+++ b/tesseract/src/classify/outfeat.h
@@ -0,0 +1,49 @@
+/******************************************************************************
+ ** Filename:    outfeat.h
+ ** Purpose:     Definition of outline features.
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#ifndef OUTFEAT_H
+#define OUTFEAT_H
+
+#include "ocrfeatures.h"
+#include "fpoint.h"
+#include "mfoutline.h"
+
+namespace tesseract {
+
+typedef enum {
+  OutlineFeatX,
+  OutlineFeatY,
+  OutlineFeatLength,
+  OutlineFeatDir
+} OUTLINE_FEAT_PARAM_NAME;
+
+#define MAX_OUTLINE_FEATURES  (100)
+
+/*---------------------------------------------------------------------------
+          Privat Function Prototypes
+----------------------------------------------------------------------------*/
+void AddOutlineFeatureToSet(FPOINT *Start,
+                            FPOINT *End,
+                            FEATURE_SET FeatureSet);
+
+void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet);
+
+void NormalizeOutlineX(FEATURE_SET FeatureSet);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/classify/picofeat.cpp b/tesseract/src/classify/picofeat.cpp
new file mode 100644
index 00000000..17f5e66d
--- /dev/null
+++ b/tesseract/src/classify/picofeat.cpp
@@ -0,0 +1,264 @@
+/******************************************************************************
+ ** Filename:    picofeat.c
+ ** Purpose:     Definition of pico-features.
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#include "picofeat.h"
+
+#include "classify.h"
+#include "featdefs.h"
+#include "fpoint.h"
+#include "mfoutline.h"
+#include "ocrfeatures.h"
+#include "params.h"
+#include "trainingsample.h"
+
+#include <cmath>
+#include <cstdio>
+
+namespace tesseract {
+
+/*---------------------------------------------------------------------------
+          Variables
+----------------------------------------------------------------------------*/
+
+double_VAR(classify_pico_feature_length, 0.05, "Pico Feature Length");
+
+/*---------------------------------------------------------------------------
+          Private Function Prototypes
+----------------------------------------------------------------------------*/
+void ConvertSegmentToPicoFeat(FPOINT *Start,
+                              FPOINT *End,
+                              FEATURE_SET FeatureSet);
+
+void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet);
+
+void NormalizePicoX(FEATURE_SET FeatureSet);
+
+/*----------------------------------------------------------------------------
+              Public Code
+----------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------*/
+/**
+ * Operation: Dummy for now.
+ *
+ * Globals:
+ * - classify_norm_method normalization method currently specified
+ * @param Blob blob to extract pico-features from
+ * @return Pico-features for Blob.
+ */
+FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) {
+  LIST Outlines;
+  LIST RemainingOutlines;
+  MFOUTLINE Outline;
+  FEATURE_SET FeatureSet;
+  float XScale, YScale;
+
+  FeatureSet = NewFeatureSet(MAX_PICO_FEATURES);
+  Outlines = ConvertBlob(Blob);
+  NormalizeOutlines(Outlines, &XScale, &YScale);
+  RemainingOutlines = Outlines;
+  iterate(RemainingOutlines) {
+    Outline = static_cast<MFOUTLINE>first_node (RemainingOutlines);
+    ConvertToPicoFeatures2(Outline, FeatureSet);
+  }
+  if (classify_norm_method == baseline)
+    NormalizePicoX(FeatureSet);
+  FreeOutlines(Outlines);
+  return (FeatureSet);
+
+}                                /* ExtractPicoFeatures */
+
+/*----------------------------------------------------------------------------
+              Private Code
+----------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine converts an entire segment of an outline
+ * into a set of pico features which are added to
+ * FeatureSet.  The length of the segment is rounded to the
+ * nearest whole number of pico-features.  The pico-features
+ * are spaced evenly over the entire segment.
+ * Results are placed in FeatureSet.
+ * Globals:
+ * - classify_pico_feature_length length of a single pico-feature
+ * @param Start starting point of pico-feature
+ * @param End ending point of pico-feature
+ * @param FeatureSet set to add pico-feature to
+ */
+void ConvertSegmentToPicoFeat(FPOINT *Start,
+                              FPOINT *End,
+                              FEATURE_SET FeatureSet) {
+  FEATURE Feature;
+  float Angle;
+  float Length;
+  int NumFeatures;
+  FPOINT Center;
+  FPOINT Delta;
+  int i;
+
+  Angle = NormalizedAngleFrom (Start, End, 1.0);
+  Length = DistanceBetween (*Start, *End);
+  NumFeatures = static_cast<int>(floor (Length / classify_pico_feature_length + 0.5));
+  if (NumFeatures < 1)
+    NumFeatures = 1;
+
+  /* compute vector for one pico feature */
+  Delta.x = XDelta (*Start, *End) / NumFeatures;
+  Delta.y = YDelta (*Start, *End) / NumFeatures;
+
+  /* compute position of first pico feature */
+  Center.x = Start->x + Delta.x / 2.0;
+  Center.y = Start->y + Delta.y / 2.0;
+
+  /* compute each pico feature in segment and add to feature set */
+  for (i = 0; i < NumFeatures; i++) {
+    Feature = NewFeature (&PicoFeatDesc);
+    Feature->Params[PicoFeatDir] = Angle;
+    Feature->Params[PicoFeatX] = Center.x;
+    Feature->Params[PicoFeatY] = Center.y;
+    AddFeature(FeatureSet, Feature);
+
+    Center.x += Delta.x;
+    Center.y += Delta.y;
+  }
+}                                /* ConvertSegmentToPicoFeat */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine steps through the specified outline and cuts it
+ * up into pieces of equal length.  These pieces become the
+ * desired pico-features.  Each segment in the outline
+ * is converted into an integral number of pico-features.
+ * Results are returned in FeatureSet.
+ *
+ * Globals:
+ * - classify_pico_feature_length length of features to be extracted
+ * @param Outline outline to extract micro-features from
+ * @param FeatureSet set of features to add pico-features to
+ */
+void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
+  MFOUTLINE Next;
+  MFOUTLINE First;
+  MFOUTLINE Current;
+
+  if (DegenerateOutline(Outline))
+    return;
+
+  First = Outline;
+  Current = First;
+  Next = NextPointAfter(Current);
+  do {
+    /* note that an edge is hidden if the ending point of the edge is
+       marked as hidden.  This situation happens because the order of
+       the outlines is reversed when they are converted from the old
+       format.  In the old format, a hidden edge is marked by the
+       starting point for that edge. */
+    if (!(PointAt(Next)->Hidden))
+      ConvertSegmentToPicoFeat (&(PointAt(Current)->Point),
+        &(PointAt(Next)->Point), FeatureSet);
+
+    Current = Next;
+    Next = NextPointAfter(Current);
+  }
+  while (Current != First);
+
+}                                /* ConvertToPicoFeatures2 */
+
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine computes the average x position over all
+ * of the pico-features in FeatureSet and then renormalizes
+ * the pico-features to force this average to be the x origin
+ * (i.e. x=0).
+ * FeatureSet is changed.
+ * @param FeatureSet pico-features to be normalized
+ */
+void NormalizePicoX(FEATURE_SET FeatureSet) {
+  int i;
+  FEATURE Feature;
+  float Origin = 0.0;
+
+  for (i = 0; i < FeatureSet->NumFeatures; i++) {
+    Feature = FeatureSet->Features[i];
+    Origin += Feature->Params[PicoFeatX];
+  }
+  Origin /= FeatureSet->NumFeatures;
+
+  for (i = 0; i < FeatureSet->NumFeatures; i++) {
+    Feature = FeatureSet->Features[i];
+    Feature->Params[PicoFeatX] -= Origin;
+  }
+}                                /* NormalizePicoX */
+
+/*---------------------------------------------------------------------------*/
+/**
+ * @param blob blob to extract features from
+ * @param fx_info
+ * @return Integer character-normalized features for blob.
+ */
+FEATURE_SET Classify::ExtractIntCNFeatures(
+    const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) {
+  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
+  std::vector<INT_FEATURE_STRUCT> bl_features;
+  tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample(
+      blob, false, &local_fx_info, &bl_features);
+  if (sample == nullptr) return nullptr;
+
+  uint32_t num_features = sample->num_features();
+  const INT_FEATURE_STRUCT* features = sample->features();
+  FEATURE_SET feature_set = NewFeatureSet(num_features);
+  for (uint32_t f = 0; f < num_features; ++f) {
+    FEATURE feature = NewFeature(&IntFeatDesc);
+
+    feature->Params[IntX] = features[f].X;
+    feature->Params[IntY] = features[f].Y;
+    feature->Params[IntDir] = features[f].Theta;
+    AddFeature(feature_set, feature);
+  }
+  delete sample;
+
+  return feature_set;
+}                                /* ExtractIntCNFeatures */
+
+/*---------------------------------------------------------------------------*/
+/**
+ * @param blob blob to extract features from
+ * @param fx_info
+ * @return Geometric (top/bottom/width) features for blob.
+ */
+FEATURE_SET Classify::ExtractIntGeoFeatures(
+    const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) {
+  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
+  std::vector<INT_FEATURE_STRUCT> bl_features;
+  tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample(
+      blob, false, &local_fx_info, &bl_features);
+  if (sample == nullptr) return nullptr;
+
+  FEATURE_SET feature_set = NewFeatureSet(1);
+  FEATURE feature = NewFeature(&IntFeatDesc);
+
+  feature->Params[GeoBottom] = sample->geo_feature(GeoBottom);
+  feature->Params[GeoTop] = sample->geo_feature(GeoTop);
+  feature->Params[GeoWidth] = sample->geo_feature(GeoWidth);
+  AddFeature(feature_set, feature);
+  delete sample;
+
+  return feature_set;
+}                                /* ExtractIntGeoFeatures */
+
+}  // namespace tesseract.
diff --git a/tesseract/src/classify/picofeat.h b/tesseract/src/classify/picofeat.h
new file mode 100644
index 00000000..d5e7786e
--- /dev/null
+++ b/tesseract/src/classify/picofeat.h
@@ -0,0 +1,65 @@
+/******************************************************************************
+ ** Filename:    picofeat.h
+ ** Purpose:     Definition of pico features.
+ ** Author:      Dan Johnson
+ ** History:     9/4/90, DSJ, Created.
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#ifndef PICOFEAT_H
+#define PICOFEAT_H
+
+#include "ocrfeatures.h"
+#include "params.h"
+
+namespace tesseract {
+
+// Enum for the order/type of params in IntFeatDesc.
+enum IntParams {
+  IntX,   // x-position (0-255).
+  IntY,   // y-position (0-255).
+  IntDir  // Direction (0-255, circular).
+};
+
+// Enum for the order/type of params in GeoFeatDesc.
+enum GeoParams {
+  GeoBottom,  // Bounding box bottom in baseline space (0-255).
+  GeoTop,     // Bounding box top in baseline space (0-255).
+  GeoWidth,   // Bounding box width in baseline space (0-255).
+
+  GeoCount  // Number of geo features.
+};
+
+typedef enum { PicoFeatY, PicoFeatDir, PicoFeatX } PICO_FEAT_PARAM_NAME;
+
+#define MAX_PICO_FEATURES (1000)
+
+/*---------------------------------------------------------------------------
+          Variables
+----------------------------------------------------------------------------*/
+
+extern double_VAR_H(classify_pico_feature_length, 0.05, "Pico Feature Length");
+
+/**----------------------------------------------------------------------------
+          Public Function Prototypes
+----------------------------------------------------------------------------**/
+#define GetPicoFeatureLength() (PicoFeatureLength)
+
+/**----------------------------------------------------------------------------
+        Global Data Definitions and Declarations
+----------------------------------------------------------------------------**/
+extern TESS_API float PicoFeatureLength;
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/classify/protos.cpp b/tesseract/src/classify/protos.cpp
new file mode 100644
index 00000000..5cbe4b2e
--- /dev/null
+++ b/tesseract/src/classify/protos.cpp
@@ -0,0 +1,178 @@
+/******************************************************************************
+ *
+ * File:        protos.cpp  (Formerly protos.c)
+ * Author:      Mark Seaman, OCR Technology
+ *
+ * (c) Copyright 1987, Hewlett-Packard Company.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ *****************************************************************************/
+/*----------------------------------------------------------------------
+              I n c l u d e s
+----------------------------------------------------------------------*/
+#define _USE_MATH_DEFINES       // for M_PI
+
+#include "protos.h"
+
+#include "tprintf.h"
+#include "classify.h"
+#include "params.h"
+#include "intproto.h"
+
+#include <cmath>                // for M_PI
+#include <cstdio>
+
+namespace tesseract {
+
+#define PROTO_INCREMENT   32
+#define CONFIG_INCREMENT  16
+
+/*----------------------------------------------------------------------
+              F u n c t i o n s
+----------------------------------------------------------------------*/
+/**
+ * @name AddConfigToClass
+ *
+ * Add a new config to this class.  Malloc new space and copy the
+ * old configs if necessary.  Return the config id for the new config.
+ *
+ * @param Class The class to add to
+ */
+int AddConfigToClass(CLASS_TYPE Class) {
+  int NewNumConfigs;
+  int NewConfig;
+  int MaxNumProtos;
+  BIT_VECTOR Config;
+
+  MaxNumProtos = Class->MaxNumProtos;
+  ASSERT_HOST(MaxNumProtos <= MAX_NUM_PROTOS);
+
+  if (Class->NumConfigs >= Class->MaxNumConfigs) {
+    /* add configs in CONFIG_INCREMENT chunks at a time */
+    NewNumConfigs = (((Class->MaxNumConfigs + CONFIG_INCREMENT) /
+      CONFIG_INCREMENT) * CONFIG_INCREMENT);
+
+    Class->Configurations =
+      static_cast<CONFIGS>(realloc (Class->Configurations,
+      sizeof (BIT_VECTOR) * NewNumConfigs));
+
+    Class->MaxNumConfigs = NewNumConfigs;
+  }
+  NewConfig = Class->NumConfigs++;
+  Config = NewBitVector(MAX_NUM_PROTOS);
+  Class->Configurations[NewConfig] = Config;
+  zero_all_bits (Config, WordsInVectorOfSize(MAX_NUM_PROTOS));
+
+  return (NewConfig);
+}
+
+
+/**
+ * @name AddProtoToClass
+ *
+ * Add a new proto to this class.  Malloc new space and copy the
+ * old protos if necessary.  Return the proto id for the new proto.
+ *
+ * @param Class The class to add to
+ */
+int AddProtoToClass(CLASS_TYPE Class) {
+  if (Class->NumProtos >= Class->MaxNumProtos) {
+    /* add protos in PROTO_INCREMENT chunks at a time */
+    int NewNumProtos = (((Class->MaxNumProtos + PROTO_INCREMENT) /
+      PROTO_INCREMENT) * PROTO_INCREMENT);
+
+    Class->Prototypes = static_cast<PROTO>(realloc (Class->Prototypes,
+      sizeof (PROTO_STRUCT) *
+      NewNumProtos));
+
+    Class->MaxNumProtos = NewNumProtos;
+    ASSERT_HOST(NewNumProtos <= MAX_NUM_PROTOS);
+  }
+  int NewProto = Class->NumProtos++;
+  ASSERT_HOST(Class->NumProtos <= MAX_NUM_PROTOS);
+  return (NewProto);
+}
+
+
+/**********************************************************************
+ * FillABC
+ *
+ * Fill in Protos A, B, C fields based on the X, Y, Angle fields.
+ **********************************************************************/
+void FillABC(PROTO Proto) {
+  float Slope, Intercept, Normalizer;
+
+  Slope = tan(Proto->Angle * 2.0 * M_PI);
+  Intercept = Proto->Y - Slope * Proto->X;
+  Normalizer = 1.0 / sqrt (Slope * Slope + 1.0);
+  Proto->A = Slope * Normalizer;
+  Proto->B = -Normalizer;
+  Proto->C = Intercept * Normalizer;
+}
+
+
+/**********************************************************************
+ * FreeClass
+ *
+ * Deallocate the memory consumed by the specified class.
+ **********************************************************************/
+void FreeClass(CLASS_TYPE Class) {
+  if (Class) {
+    FreeClassFields(Class);
+    delete Class;
+  }
+}
+
+
+/**********************************************************************
+ * FreeClassFields
+ *
+ * Deallocate the memory consumed by subfields of the specified class.
+ **********************************************************************/
+void FreeClassFields(CLASS_TYPE Class) {
+  int i;
+
+  if (Class) {
+    if (Class->MaxNumProtos > 0) free(Class->Prototypes);
+    if (Class->MaxNumConfigs > 0) {
+      for (i = 0; i < Class->NumConfigs; i++)
+        FreeBitVector (Class->Configurations[i]);
+      free(Class->Configurations);
+    }
+  }
+}
+
+/**********************************************************************
+ * NewClass
+ *
+ * Allocate a new class with enough memory to hold the specified number
+ * of prototypes and configurations.
+ **********************************************************************/
+CLASS_TYPE NewClass(int NumProtos, int NumConfigs) {
+  CLASS_TYPE Class;
+
+  Class = new CLASS_STRUCT;
+
+  if (NumProtos > 0)
+    Class->Prototypes = static_cast<PROTO>(malloc (NumProtos * sizeof (PROTO_STRUCT)));
+
+  if (NumConfigs > 0)
+    Class->Configurations = static_cast<CONFIGS>(malloc (NumConfigs *
+      sizeof (BIT_VECTOR)));
+  Class->MaxNumProtos = NumProtos;
+  Class->MaxNumConfigs = NumConfigs;
+  Class->NumProtos = 0;
+  Class->NumConfigs = 0;
+  return (Class);
+
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/classify/protos.h b/tesseract/src/classify/protos.h
new file mode 100644
index 00000000..ae35b194
--- /dev/null
+++ b/tesseract/src/classify/protos.h
@@ -0,0 +1,107 @@
+/******************************************************************************
+ *
+ * File:         protos.h
+ * Author:       Mark Seaman, SW Productivity
+ *
+ * (c) Copyright 1987, Hewlett-Packard Company.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ *****************************************************************************/
+
+#ifndef PROTOS_H
+#define PROTOS_H
+
+#include "bitvec.h"
+#include "params.h"
+#include "unicity_table.h"
+
+#include <tesseract/unichar.h>
+
+namespace tesseract {
+
+using CONFIGS = BIT_VECTOR*;
+
+typedef struct {
+  float A;
+  float B;
+  float C;
+  float X;
+  float Y;
+  float Angle;
+  float Length;
+} PROTO_STRUCT;
+using PROTO = PROTO_STRUCT*;
+
+struct CLASS_STRUCT {
+  CLASS_STRUCT()
+      : NumProtos(0),
+        MaxNumProtos(0),
+        NumConfigs(0),
+        MaxNumConfigs(0),
+        Prototypes(nullptr),
+        Configurations(nullptr) {}
+  int16_t NumProtos;
+  int16_t MaxNumProtos;
+  int16_t NumConfigs;
+  int16_t MaxNumConfigs;
+  PROTO Prototypes;
+  CONFIGS Configurations;
+  UnicityTable<int> font_set;
+};
+using CLASS_TYPE = CLASS_STRUCT*;
+using CLASSES = CLASS_STRUCT*;
+
+/*----------------------------------------------------------------------
+              M a c r o s
+----------------------------------------------------------------------*/
+/**
+ * AddProtoToConfig
+ *
+ * Set a single proto bit in the specified configuration.
+ */
+
+#define AddProtoToConfig(Pid, Config) (SET_BIT(Config, Pid))
+
+/**
+ * ProtoIn
+ *
+ * Choose the selected prototype in this class record.  Return the
+ * pointer to it (type PROTO).
+ */
+
+#define ProtoIn(Class, Pid) (&(Class)->Prototypes[Pid])
+
+/*----------------------------------------------------------------------
+              F u n c t i o n s
+----------------------------------------------------------------------*/
+TESS_API
+int AddConfigToClass(CLASS_TYPE Class);
+
+TESS_API
+int AddProtoToClass(CLASS_TYPE Class);
+
+TESS_API
+void FillABC(PROTO Proto);
+
+TESS_API
+void FreeClass(CLASS_TYPE Class);
+
+TESS_API
+void FreeClassFields(CLASS_TYPE Class);
+
+void InitPrototypes();
+
+TESS_API
+CLASS_TYPE NewClass(int NumProtos, int NumConfigs);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/classify/shapeclassifier.cpp b/tesseract/src/classify/shapeclassifier.cpp
new file mode 100644
index 00000000..b1091a53
--- /dev/null
+++ b/tesseract/src/classify/shapeclassifier.cpp
@@ -0,0 +1,234 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+///////////////////////////////////////////////////////////////////////
+// File:        shapeclassifier.cpp
+// Description: Base interface class for classifiers that return a
+//              shape index.
+// Author:      Ray Smith
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "shapeclassifier.h"
+
+#include "scrollview.h"
+#include "shapetable.h"
+#include "svmnode.h"
+#include "trainingsample.h"
+#include "tprintf.h"
+
+#include "genericvector.h"
+
+namespace tesseract {
+
+// Classifies the given [training] sample, writing to results.
+// See shapeclassifier.h for a full description.
+// Default implementation calls the ShapeRating version.
+int ShapeClassifier::UnicharClassifySample(
+    const TrainingSample& sample, Pix* page_pix, int debug,
+    UNICHAR_ID keep_this, std::vector<UnicharRating>* results) {
+  results->clear();
+  std::vector<ShapeRating> shape_results;
+  int num_shape_results = ClassifySample(sample, page_pix, debug, keep_this,
+                                         &shape_results);
+  const ShapeTable* shapes = GetShapeTable();
+  GenericVector<int> unichar_map;
+  unichar_map.init_to_size(shapes->unicharset().size(), -1);
+  for (int r = 0; r < num_shape_results; ++r) {
+    shapes->AddShapeToResults(shape_results[r], &unichar_map, results);
+  }
+  return results->size();
+}
+
+// Classifies the given [training] sample, writing to results.
+// See shapeclassifier.h for a full description.
+// Default implementation aborts.
+int ShapeClassifier::ClassifySample(const TrainingSample& sample, Pix* page_pix,
+                           int debug, int keep_this,
+    std::vector<ShapeRating>* results) {
+  ASSERT_HOST("Must implement ClassifySample!" == nullptr);
+  return 0;
+}
+
+// Returns the shape that contains unichar_id that has the best result.
+// If result is not nullptr, it is set with the shape_id and rating.
+// Does not need to be overridden if ClassifySample respects the keep_this
+// rule.
+int ShapeClassifier::BestShapeForUnichar(const TrainingSample& sample,
+                                         Pix* page_pix, UNICHAR_ID unichar_id,
+                                         ShapeRating* result) {
+    std::vector<ShapeRating> results;
+  const ShapeTable* shapes = GetShapeTable();
+  int num_results = ClassifySample(sample, page_pix, 0, unichar_id, &results);
+  for (int r = 0; r < num_results; ++r) {
+    if (shapes->GetShape(results[r].shape_id).ContainsUnichar(unichar_id)) {
+      if (result != nullptr)
+        *result = results[r];
+      return results[r].shape_id;
+    }
+  }
+  return -1;
+}
+
+// Provides access to the UNICHARSET that this classifier works with.
+// Only needs to be overridden if GetShapeTable() can return nullptr.
+const UNICHARSET& ShapeClassifier::GetUnicharset() const {
+  return GetShapeTable()->unicharset();
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Visual debugger classifies the given sample, displays the results and
+// solicits user input to display other classifications. Returns when
+// the user has finished with debugging the sample.
+// Probably doesn't need to be overridden if the subclass provides
+// DisplayClassifyAs.
+void ShapeClassifier::DebugDisplay(const TrainingSample& sample,
+                                   Pix* page_pix,
+                                   UNICHAR_ID unichar_id) {
+  static ScrollView* terminator = nullptr;
+  if (terminator == nullptr) {
+    terminator = new ScrollView("XIT", 0, 0, 50, 50, 50, 50, true);
+  }
+  ScrollView* debug_win = CreateFeatureSpaceWindow("ClassifierDebug", 0, 0);
+  // Provide a right-click menu to choose the class.
+  auto* popup_menu = new SVMenuNode();
+  popup_menu->AddChild("Choose class to debug", 0, "x", "Class to debug");
+  popup_menu->BuildMenu(debug_win, false);
+  // Display the features in green.
+  const INT_FEATURE_STRUCT* features = sample.features();
+  uint32_t num_features = sample.num_features();
+  for (uint32_t f = 0; f < num_features; ++f) {
+    RenderIntFeature(debug_win, &features[f], ScrollView::GREEN);
+  }
+  debug_win->Update();
+  std::vector<UnicharRating> results;
+  // Debug classification until the user quits.
+  const UNICHARSET& unicharset = GetUnicharset();
+  SVEvent* ev;
+  SVEventType ev_type;
+  do {
+    PointerVector<ScrollView> windows;
+    if (unichar_id >= 0) {
+      tprintf("Debugging class %d = %s\n",
+              unichar_id, unicharset.id_to_unichar(unichar_id));
+      UnicharClassifySample(sample, page_pix, 1, unichar_id, &results);
+      DisplayClassifyAs(sample, page_pix, unichar_id, 1, &windows);
+    } else {
+      tprintf("Invalid unichar_id: %d\n", unichar_id);
+      UnicharClassifySample(sample, page_pix, 1, -1, &results);
+    }
+    if (unichar_id >= 0) {
+      tprintf("Debugged class %d = %s\n",
+              unichar_id, unicharset.id_to_unichar(unichar_id));
+    }
+    tprintf("Right-click in ClassifierDebug window to choose debug class,");
+    tprintf(" Left-click or close window to quit...\n");
+    UNICHAR_ID old_unichar_id;
+    do {
+      old_unichar_id = unichar_id;
+      ev = debug_win->AwaitEvent(SVET_ANY);
+      ev_type = ev->type;
+      if (ev_type == SVET_POPUP) {
+        if (unicharset.contains_unichar(ev->parameter)) {
+          unichar_id = unicharset.unichar_to_id(ev->parameter);
+        } else {
+          tprintf("Char class '%s' not found in unicharset", ev->parameter);
+        }
+      }
+      delete ev;
+    } while (unichar_id == old_unichar_id &&
+             ev_type != SVET_CLICK && ev_type != SVET_DESTROY);
+  } while (ev_type != SVET_CLICK && ev_type != SVET_DESTROY);
+  delete debug_win;
+}
+
+#endif // !GRAPHICS_DISABLED
+
+// Displays classification as the given shape_id. Creates as many windows
+// as it feels fit, using index as a guide for placement. Adds any created
+// windows to the windows output and returns a new index that may be used
+// by any subsequent classifiers. Caller waits for the user to view and
+// then destroys the windows by clearing the vector.
+int ShapeClassifier::DisplayClassifyAs(
+    const TrainingSample& sample, Pix* page_pix,
+    UNICHAR_ID unichar_id, int index,
+    PointerVector<ScrollView>* windows) {
+  // Does nothing in the default implementation.
+  return index;
+}
+
+// Prints debug information on the results.
+void ShapeClassifier::UnicharPrintResults(
+    const char* context, const std::vector<UnicharRating>& results) const {
+  tprintf("%s\n", context);
+  for (int i = 0; i < results.size(); ++i) {
+    tprintf("%g: c_id=%d=%s", results[i].rating, results[i].unichar_id,
+            GetUnicharset().id_to_unichar(results[i].unichar_id));
+    if (!results[i].fonts.empty()) {
+      tprintf(" Font Vector:");
+      for (int f = 0; f < results[i].fonts.size(); ++f) {
+        tprintf(" %d", results[i].fonts[f].fontinfo_id);
+      }
+    }
+    tprintf("\n");
+  }
+}
+void ShapeClassifier::PrintResults(
+    const char* context, const std::vector<ShapeRating>& results) const {
+  tprintf("%s\n", context);
+  for (int i = 0; i < results.size(); ++i) {
+    tprintf("%g:", results[i].rating);
+    if (results[i].joined)
+      tprintf("[J]");
+    if (results[i].broken)
+      tprintf("[B]");
+    tprintf(" %s\n", GetShapeTable()->DebugStr(results[i].shape_id).c_str());
+  }
+}
+
+// Removes any result that has all its unichars covered by a better choice,
+// regardless of font.
+void ShapeClassifier::FilterDuplicateUnichars(
+    std::vector<ShapeRating>* results) const {
+    std::vector<ShapeRating> filtered_results;
+  // Copy results to filtered results and knock out duplicate unichars.
+  const ShapeTable* shapes = GetShapeTable();
+  for (int r = 0; r < results->size(); ++r) {
+    if (r > 0) {
+      const Shape& shape_r = shapes->GetShape((*results)[r].shape_id);
+      int c;
+      for (c = 0; c < shape_r.size(); ++c) {
+        int unichar_id = shape_r[c].unichar_id;
+        int s;
+        for (s = 0; s < r; ++s) {
+          const Shape& shape_s = shapes->GetShape((*results)[s].shape_id);
+          if (shape_s.ContainsUnichar(unichar_id))
+            break;  // We found unichar_id.
+        }
+        if (s == r)
+          break;  // We didn't find unichar_id.
+      }
+      if (c == shape_r.size())
+        continue;  // We found all the unichar ids in previous answers.
+    }
+    filtered_results.push_back((*results)[r]);
+  }
+  *results = filtered_results;
+}
+
+}  // namespace tesseract.
diff --git a/tesseract/src/classify/shapeclassifier.h b/tesseract/src/classify/shapeclassifier.h
new file mode 100644
index 00000000..776880fc
--- /dev/null
+++ b/tesseract/src/classify/shapeclassifier.h
@@ -0,0 +1,121 @@
+///////////////////////////////////////////////////////////////////////
+// File:        shapeclassifier.h
+// Description: Base interface class for classifiers that return a
+//              shape index.
+// Author:      Ray Smith
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_
+#define TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_
+
+#include <tesseract/unichar.h>
+
+struct Pix;
+
+namespace tesseract {
+
+class ScrollView;
+class UNICHARSET;
+
+template <typename T> class PointerVector;
+struct ShapeRating;
+class ShapeTable;
+class TrainingSample;
+class TrainingSampleSet;
+struct UnicharRating;
+
+// Interface base class for classifiers that produce ShapeRating results.
+class TESS_API ShapeClassifier {
+ public:
+  virtual ~ShapeClassifier() = default;
+
+  // Classifies the given [training] sample, writing to results.
+  // If page_pix is not nullptr, the overriding function may call
+  // sample.GetSamplePix(padding, page_pix) to get an image of the sample
+  // padded (with real image data) by the given padding to extract features
+  // from the image of the character. Other members of TrainingSample:
+  // features(), micro_features(), cn_feature(), geo_feature() may be used
+  // to get the appropriate tesseract features.
+  // If debug is non-zero, then various degrees of classifier dependent debug
+  // information is provided.
+  // If keep_this (a UNICHAR_ID) is >= 0, then the results should always
+  // contain keep_this, and (if possible) anything of intermediate confidence.
+  // (Used for answering "Why didn't it get that right?" questions.) It must
+  // be a UNICHAR_ID as the callers have no clue how to choose the best shape
+  // that may contain a desired answer.
+  // The return value is the number of classes saved in results.
+  // NOTE that overriding functions MUST clear and sort the results by
+  // descending rating unless the classifier is working with a team of such
+  // classifiers.
+  // NOTE: Neither overload of ClassifySample is pure, but at least one must
+  // be overridden by a classifier in order for it to do anything.
+  virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix,
+                                    int debug, UNICHAR_ID keep_this,
+                                    std::vector<UnicharRating>* results);
+
+ protected:
+  virtual int ClassifySample(const TrainingSample& sample, Pix* page_pix,
+                             int debug, UNICHAR_ID keep_this,
+                             std::vector<ShapeRating>* results);
+
+ public:
+  // Returns the shape that contains unichar_id that has the best result.
+  // If result is not nullptr, it is set with the shape_id and rating.
+  // Returns -1 if ClassifySample fails to provide any result containing
+  // unichar_id. BestShapeForUnichar does not need to be overridden if
+  // ClassifySample respects the keep_this rule.
+  virtual int BestShapeForUnichar(const TrainingSample& sample, Pix* page_pix,
+                                  UNICHAR_ID unichar_id, ShapeRating* result);
+
+  // Provides access to the ShapeTable that this classifier works with.
+  virtual const ShapeTable* GetShapeTable() const = 0;
+  // Provides access to the UNICHARSET that this classifier works with.
+  // Must be overridden IFF GetShapeTable() returns nullptr.
+  virtual const UNICHARSET& GetUnicharset() const;
+
+  // Visual debugger classifies the given sample, displays the results and
+  // solicits user input to display other classifications. Returns when
+  // the user has finished with debugging the sample.
+  // Probably doesn't need to be overridden if the subclass provides
+  // DisplayClassifyAs.
+  void DebugDisplay(const TrainingSample& sample, Pix* page_pix,
+                    UNICHAR_ID unichar_id);
+
+
+  // Displays classification as the given unichar_id. Creates as many windows
+  // as it feels fit, using index as a guide for placement. Adds any created
+  // windows to the windows output and returns a new index that may be used
+  // by any subsequent classifiers. Caller waits for the user to view and
+  // then destroys the windows by clearing the vector.
+  virtual int DisplayClassifyAs(const TrainingSample& sample,  Pix* page_pix,
+                                UNICHAR_ID unichar_id, int index,
+                                PointerVector<ScrollView>* windows);
+
+  // Prints debug information on the results. context is some introductory/title
+  // message.
+  virtual void UnicharPrintResults(
+      const char* context, const std::vector<UnicharRating>& results) const;
+  virtual void PrintResults(const char* context,
+                            const std::vector<ShapeRating>& results) const;
+
+ protected:
+  // Removes any result that has all its unichars covered by a better choice,
+  // regardless of font.
+  void FilterDuplicateUnichars(std::vector<ShapeRating>* results) const;
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_
diff --git a/tesseract/src/classify/shapetable.cpp b/tesseract/src/classify/shapetable.cpp
new file mode 100644
index 00000000..c68f5d82
--- /dev/null
+++ b/tesseract/src/classify/shapetable.cpp
@@ -0,0 +1,727 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+///////////////////////////////////////////////////////////////////////
+// File:        shapetable.cpp
+// Description: Class to map a classifier shape index to unicharset
+//              indices and font indices.
+// Author:      Ray Smith
+// Created:     Tue Nov 02 15:31:32 PDT 2010
+//
+// (C) Copyright 2010, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "shapetable.h"
+
+#include "bitvector.h"
+#include "fontinfo.h"
+#include "intfeaturespace.h"
+#include "strngs.h"
+#include "unicharset.h"
+#include "unicity_table.h"
+
+#include <algorithm>
+
+namespace tesseract {
+
+// Helper function to get the index of the first result with the required
+// unichar_id. If the results are sorted by rating, this will also be the
+// best result with the required unichar_id.
+// Returns -1 if the unichar_id is not found
+int ShapeRating::FirstResultWithUnichar(
+    const GenericVector<ShapeRating>& results,
+    const ShapeTable& shape_table,
+    UNICHAR_ID unichar_id) {
+  for (int r = 0; r < results.size(); ++r) {
+    const int shape_id = results[r].shape_id;
+    const Shape& shape = shape_table.GetShape(shape_id);
+    if (shape.ContainsUnichar(unichar_id)) {
+      return r;
+    }
+  }
+  return -1;
+}
+
+// Helper function to get the index of the first result with the required
+// unichar_id. If the results are sorted by rating, this will also be the
+// best result with the required unichar_id.
+// Returns -1 if the unichar_id is not found
+int UnicharRating::FirstResultWithUnichar(
+    const GenericVector<UnicharRating>& results,
+    UNICHAR_ID unichar_id) {
+  for (int r = 0; r < results.size(); ++r) {
+    if (results[r].unichar_id == unichar_id)
+      return r;
+  }
+  return -1;
+}
+
+// Writes to the given file. Returns false in case of error.
+bool UnicharAndFonts::Serialize(FILE* fp) const {
+  return tesseract::Serialize(fp, &unichar_id) && font_ids.Serialize(fp);
+}
+// Reads from the given file. Returns false in case of error.
+
+bool UnicharAndFonts::DeSerialize(TFile* fp) {
+  return fp->DeSerialize(&unichar_id) && font_ids.DeSerialize(fp);
+}
+
+// Sort function to sort a pair of UnicharAndFonts by unichar_id.
+int UnicharAndFonts::SortByUnicharId(const void* v1, const void* v2) {
+  const auto* p1 = static_cast<const UnicharAndFonts*>(v1);
+  const auto* p2 = static_cast<const UnicharAndFonts*>(v2);
+  return p1->unichar_id - p2->unichar_id;
+}
+
+// Writes to the given file. Returns false in case of error.
+bool Shape::Serialize(FILE* fp) const {
+  uint8_t sorted = unichars_sorted_;
+  return tesseract::Serialize(fp, &sorted) && unichars_.SerializeClasses(fp);
+}
+// Reads from the given file. Returns false in case of error.
+
+bool Shape::DeSerialize(TFile* fp) {
+  uint8_t sorted;
+  if (!fp->DeSerialize(&sorted)) return false;
+  unichars_sorted_ = sorted != 0;
+  return unichars_.DeSerializeClasses(fp);
+}
+
+// Adds a font_id for the given unichar_id. If the unichar_id is not
+// in the shape, it is added.
+void Shape::AddToShape(int unichar_id, int font_id) {
+  for (int c = 0; c < unichars_.size(); ++c) {
+    if (unichars_[c].unichar_id == unichar_id) {
+      // Found the unichar in the shape table.
+      GenericVector<int>& font_list = unichars_[c].font_ids;
+      for (int f = 0; f < font_list.size(); ++f) {
+        if (font_list[f] == font_id)
+          return;  // Font is already there.
+      }
+      font_list.push_back(font_id);
+      return;
+    }
+  }
+  // Unichar_id is not in shape, so add it to shape.
+  unichars_.push_back(UnicharAndFonts(unichar_id, font_id));
+  unichars_sorted_ =  unichars_.size() <= 1;
+}
+
+// Adds everything in other to this.
+void Shape::AddShape(const Shape& other) {
+  for (int c = 0; c < other.unichars_.size(); ++c) {
+    for (int f = 0; f < other.unichars_[c].font_ids.size(); ++f) {
+      AddToShape(other.unichars_[c].unichar_id,
+                 other.unichars_[c].font_ids[f]);
+    }
+  }
+  unichars_sorted_ =  unichars_.size() <= 1;
+}
+
+// Returns true if the shape contains the given unichar_id, font_id pair.
+bool Shape::ContainsUnicharAndFont(int unichar_id, int font_id) const {
+  for (int c = 0; c < unichars_.size(); ++c) {
+    if (unichars_[c].unichar_id == unichar_id) {
+      // Found the unichar, so look for the font.
+      auto &font_list = unichars_[c].font_ids;
+      for (int f = 0; f < font_list.size(); ++f) {
+        if (font_list[f] == font_id)
+          return true;
+      }
+      return false;
+    }
+  }
+  return false;
+}
+
+// Returns true if the shape contains the given unichar_id, ignoring font.
+bool Shape::ContainsUnichar(int unichar_id) const {
+  for (int c = 0; c < unichars_.size(); ++c) {
+    if (unichars_[c].unichar_id == unichar_id) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Returns true if the shape contains the given font, ignoring unichar_id.
+bool Shape::ContainsFont(int font_id) const {
+  for (int c = 0; c < unichars_.size(); ++c) {
+      auto &font_list = unichars_[c].font_ids;
+    for (int f = 0; f < font_list.size(); ++f) {
+      if (font_list[f] == font_id)
+        return true;
+    }
+  }
+  return false;
+}
+// Returns true if the shape contains the given font properties, ignoring
+// unichar_id.
+bool Shape::ContainsFontProperties(const FontInfoTable& font_table,
+                                   uint32_t properties) const {
+  for (int c = 0; c < unichars_.size(); ++c) {
+      auto &font_list = unichars_[c].font_ids;
+    for (int f = 0; f < font_list.size(); ++f) {
+      if (font_table.get(font_list[f]).properties == properties)
+        return true;
+    }
+  }
+  return false;
+}
+// Returns true if the shape contains multiple different font properties,
+// ignoring unichar_id.
+bool Shape::ContainsMultipleFontProperties(
+    const FontInfoTable& font_table) const {
+  uint32_t properties = font_table.get(unichars_[0].font_ids[0]).properties;
+  for (int c = 0; c < unichars_.size(); ++c) {
+      auto &font_list = unichars_[c].font_ids;
+    for (int f = 0; f < font_list.size(); ++f) {
+      if (font_table.get(font_list[f]).properties != properties)
+        return true;
+    }
+  }
+  return false;
+}
+
+// Returns true if this shape is equal to other (ignoring order of unichars
+// and fonts).
+bool Shape::operator==(const Shape& other) const {
+  return IsSubsetOf(other) && other.IsSubsetOf(*this);
+}
+
+// Returns true if this is a subset (including equal) of other.
+bool Shape::IsSubsetOf(const Shape& other) const {
+  for (int c = 0; c < unichars_.size(); ++c) {
+    int unichar_id = unichars_[c].unichar_id;
+    const GenericVector<int>& font_list = unichars_[c].font_ids;
+    for (int f = 0; f < font_list.size(); ++f) {
+      if (!other.ContainsUnicharAndFont(unichar_id, font_list[f]))
+        return false;
+    }
+  }
+  return true;
+}
+
+// Returns true if the lists of unichar ids are the same in this and other,
+// ignoring fonts.
+// NOT const, as it will sort the unichars on demand.
+bool Shape::IsEqualUnichars(Shape* other) {
+  if (unichars_.size() != other->unichars_.size()) return false;
+  if (!unichars_sorted_) SortUnichars();
+  if (!other->unichars_sorted_) other->SortUnichars();
+  for (int c = 0; c < unichars_.size(); ++c) {
+    if (unichars_[c].unichar_id != other->unichars_[c].unichar_id)
+      return false;
+  }
+  return true;
+}
+
+// Sorts the unichars_ vector by unichar.
+void Shape::SortUnichars() {
+  unichars_.sort(UnicharAndFonts::SortByUnicharId);
+  unichars_sorted_ = true;
+}
+
+ShapeTable::ShapeTable() : unicharset_(nullptr), num_fonts_(0) {
+}
+ShapeTable::ShapeTable(const UNICHARSET& unicharset)
+  : unicharset_(&unicharset), num_fonts_(0) {
+}
+
+// Writes to the given file. Returns false in case of error.
+bool ShapeTable::Serialize(FILE* fp) const {
+  return shape_table_.Serialize(fp);
+}
+// Reads from the given file. Returns false in case of error.
+
+bool ShapeTable::DeSerialize(TFile* fp) {
+  if (!shape_table_.DeSerialize(fp)) return false;
+  num_fonts_ = 0;
+  return true;
+}
+
+// Returns the number of fonts used in this ShapeTable, computing it if
+// necessary.
+int ShapeTable::NumFonts() const {
+  if (num_fonts_ <= 0) {
+    for (int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) {
+      const Shape& shape = *shape_table_[shape_id];
+      for (int c = 0; c < shape.size(); ++c) {
+        for (int f = 0; f < shape[c].font_ids.size(); ++f) {
+          if (shape[c].font_ids[f] >= num_fonts_)
+            num_fonts_ = shape[c].font_ids[f] + 1;
+        }
+      }
+    }
+  }
+  return num_fonts_;
+}
+
+// Re-indexes the class_ids in the shapetable according to the given map.
+// Useful in conjunction with set_unicharset.
+void ShapeTable::ReMapClassIds(const GenericVector<int>& unicharset_map) {
+  for (int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) {
+    Shape* shape = shape_table_[shape_id];
+    for (int c = 0; c < shape->size(); ++c) {
+      shape->SetUnicharId(c, unicharset_map[(*shape)[c].unichar_id]);
+    }
+  }
+}
+
+// Returns a string listing the classes/fonts in a shape.
+STRING ShapeTable::DebugStr(int shape_id) const {
+  if (shape_id < 0 || shape_id >= shape_table_.size())
+    return STRING("INVALID_UNICHAR_ID");
+  const Shape& shape = GetShape(shape_id);
+  STRING result;
+  result.add_str_int("Shape", shape_id);
+  if (shape.size() > 100) {
+    result.add_str_int(" Num unichars=", shape.size());
+    return result;
+  }
+  for (int c = 0; c < shape.size(); ++c) {
+    result.add_str_int(" c_id=", shape[c].unichar_id);
+    result += "=";
+    result += unicharset_->id_to_unichar(shape[c].unichar_id);
+    if (shape.size() < 10) {
+      result.add_str_int(", ", shape[c].font_ids.size());
+      result += " fonts =";
+      int num_fonts = shape[c].font_ids.size();
+      if (num_fonts > 10) {
+        result.add_str_int(" ", shape[c].font_ids[0]);
+        result.add_str_int(" ... ", shape[c].font_ids[num_fonts - 1]);
+      } else {
+        for (int f = 0; f < num_fonts; ++f) {
+          result.add_str_int(" ", shape[c].font_ids[f]);
+        }
+      }
+    }
+  }
+  return result;
+}
+
+// Returns a debug string summarizing the table.
+STRING ShapeTable::SummaryStr() const {
+  int max_unichars = 0;
+  int num_multi_shapes = 0;
+  int num_master_shapes = 0;
+  for (int s = 0; s < shape_table_.size(); ++s) {
+    if (MasterDestinationIndex(s) != s) continue;
+    ++num_master_shapes;
+    int shape_size = GetShape(s).size();
+    if (shape_size > 1)
+      ++num_multi_shapes;
+    if (shape_size > max_unichars)
+      max_unichars = shape_size;
+  }
+  STRING result;
+  result.add_str_int("Number of shapes = ", num_master_shapes);
+  result.add_str_int(" max unichars = ", max_unichars);
+  result.add_str_int(" number with multiple unichars = ", num_multi_shapes);
+  return result;
+}
+
+
+// Adds a new shape starting with the given unichar_id and font_id.
+// Returns the assigned index.
+int ShapeTable::AddShape(int unichar_id, int font_id) {
+  int index = shape_table_.size();
+  auto* shape = new Shape;
+  shape->AddToShape(unichar_id, font_id);
+  shape_table_.push_back(shape);
+  num_fonts_ = std::max(num_fonts_, font_id + 1);
+  return index;
+}
+
+// Adds a copy of the given shape unless it is already present.
+// Returns the assigned index or index of existing shape if already present.
+int ShapeTable::AddShape(const Shape& other) {
+  int index;
+  for (index = 0; index < shape_table_.size() &&
+       !(other == *shape_table_[index]); ++index)
+    continue;
+  if (index == shape_table_.size()) {
+    auto* shape = new Shape(other);
+    shape_table_.push_back(shape);
+  }
+  num_fonts_ = 0;
+  return index;
+}
+
+// Removes the shape given by the shape index.
+void ShapeTable::DeleteShape(int shape_id) {
+  delete shape_table_[shape_id];
+  shape_table_[shape_id] = nullptr;
+  shape_table_.remove(shape_id);
+}
+
+// Adds a font_id to the given existing shape index for the given
+// unichar_id. If the unichar_id is not in the shape, it is added.
+void ShapeTable::AddToShape(int shape_id, int unichar_id, int font_id) {
+  Shape& shape = *shape_table_[shape_id];
+  shape.AddToShape(unichar_id, font_id);
+  num_fonts_ = std::max(num_fonts_, font_id + 1);
+}
+
+// Adds the given shape to the existing shape with the given index.
+void ShapeTable::AddShapeToShape(int shape_id, const Shape& other) {
+  Shape& shape = *shape_table_[shape_id];
+  shape.AddShape(other);
+  num_fonts_ = 0;
+}
+
+// Returns the id of the shape that contains the given unichar and font.
+// If not found, returns -1.
+// If font_id < 0, the font_id is ignored and the first shape that matches
+// the unichar_id is returned.
+int ShapeTable::FindShape(int unichar_id, int font_id) const {
+  for (int s = 0; s < shape_table_.size(); ++s) {
+    const Shape& shape = GetShape(s);
+    for (int c = 0; c < shape.size(); ++c) {
+      if (shape[c].unichar_id == unichar_id) {
+        if (font_id < 0)
+          return s;  // We don't care about the font.
+        for (int f = 0; f < shape[c].font_ids.size(); ++f) {
+          if (shape[c].font_ids[f] == font_id)
+            return s;
+        }
+      }
+    }
+  }
+  return -1;
+}
+
+// Returns the first unichar_id and font_id in the given shape.
+void ShapeTable::GetFirstUnicharAndFont(int shape_id,
+                                        int* unichar_id, int* font_id) const {
+  const UnicharAndFonts& unichar_and_fonts = (*shape_table_[shape_id])[0];
+  *unichar_id = unichar_and_fonts.unichar_id;
+  *font_id = unichar_and_fonts.font_ids[0];
+}
+
+// Expands all the classes/fonts in the shape individually to build
+// a ShapeTable.
+int ShapeTable::BuildFromShape(const Shape& shape,
+                               const ShapeTable& master_shapes) {
+  BitVector shape_map(master_shapes.NumShapes());
+  for (int u_ind = 0; u_ind < shape.size(); ++u_ind) {
+    for (int f_ind = 0; f_ind < shape[u_ind].font_ids.size(); ++f_ind) {
+      int c = shape[u_ind].unichar_id;
+      int f = shape[u_ind].font_ids[f_ind];
+      int master_id = master_shapes.FindShape(c, f);
+      if (master_id >= 0) {
+        shape_map.SetBit(master_id);
+      } else if (FindShape(c, f) < 0) {
+        AddShape(c, f);
+      }
+    }
+  }
+  int num_masters = 0;
+  for (int s = 0; s < master_shapes.NumShapes(); ++s) {
+    if (shape_map[s]) {
+      AddShape(master_shapes.GetShape(s));
+      ++num_masters;
+    }
+  }
+  return num_masters;
+}
+
+// Returns true if the shapes are already merged.
+bool ShapeTable::AlreadyMerged(int shape_id1, int shape_id2) const {
+  return MasterDestinationIndex(shape_id1) == MasterDestinationIndex(shape_id2);
+}
+
+// Returns true if any shape contains multiple unichars.
+bool ShapeTable::AnyMultipleUnichars() const {
+  int num_shapes = NumShapes();
+  for (int s1 = 0; s1 < num_shapes; ++s1) {
+    if (MasterDestinationIndex(s1) != s1) continue;
+    if (GetShape(s1).size() > 1)
+      return true;
+  }
+  return false;
+}
+
+// Returns the maximum number of unichars over all shapes.
+int ShapeTable::MaxNumUnichars() const {
+  int max_num_unichars = 0;
+  int num_shapes = NumShapes();
+  for (int s = 0; s < num_shapes; ++s) {
+    if (GetShape(s).size() > max_num_unichars)
+      max_num_unichars = GetShape(s).size();
+  }
+  return max_num_unichars;
+}
+
+
+// Merges shapes with a common unichar over the [start, end) interval.
+// Assumes single unichar per shape.
+void ShapeTable::ForceFontMerges(int start, int end) {
+  for (int s1 = start; s1 < end; ++s1) {
+    if (MasterDestinationIndex(s1) == s1 && GetShape(s1).size() == 1) {
+      int unichar_id = GetShape(s1)[0].unichar_id;
+      for (int s2 = s1 + 1; s2 < end; ++s2) {
+        if (MasterDestinationIndex(s2) == s2 && GetShape(s2).size() == 1 &&
+            unichar_id == GetShape(s2)[0].unichar_id) {
+          MergeShapes(s1, s2);
+        }
+      }
+    }
+  }
+  ShapeTable compacted(*unicharset_);
+  compacted.AppendMasterShapes(*this, nullptr);
+  *this = compacted;
+}
+
+// Returns the number of unichars in the master shape.
+int ShapeTable::MasterUnicharCount(int shape_id) const {
+  int master_id = MasterDestinationIndex(shape_id);
+  return GetShape(master_id).size();
+}
+
+// Returns the sum of the font counts in the master shape.
+int ShapeTable::MasterFontCount(int shape_id) const {
+  int master_id = MasterDestinationIndex(shape_id);
+  const Shape& shape = GetShape(master_id);
+  int font_count = 0;
+  for (int c = 0; c < shape.size(); ++c) {
+    font_count += shape[c].font_ids.size();
+  }
+  return font_count;
+}
+
+// Returns the number of unichars that would result from merging the shapes.
+int ShapeTable::MergedUnicharCount(int shape_id1, int shape_id2) const {
+  // Do it the easy way for now.
+  int master_id1 = MasterDestinationIndex(shape_id1);
+  int master_id2 = MasterDestinationIndex(shape_id2);
+  Shape combined_shape(*shape_table_[master_id1]);
+  combined_shape.AddShape(*shape_table_[master_id2]);
+  return combined_shape.size();
+}
+
+// Merges two shape_ids, leaving shape_id2 marked as merged.
+void ShapeTable::MergeShapes(int shape_id1, int shape_id2) {
+  int master_id1 = MasterDestinationIndex(shape_id1);
+  int master_id2 = MasterDestinationIndex(shape_id2);
+  // Point master_id2 (and all merged shapes) to master_id1.
+  shape_table_[master_id2]->set_destination_index(master_id1);
+  // Add all the shapes of master_id2 to master_id1.
+  shape_table_[master_id1]->AddShape(*shape_table_[master_id2]);
+}
+
+// Swaps two shape_ids.
+void ShapeTable::SwapShapes(int shape_id1, int shape_id2) {
+  Shape* tmp = shape_table_[shape_id1];
+  shape_table_[shape_id1] = shape_table_[shape_id2];
+  shape_table_[shape_id2] = tmp;
+}
+
+// Returns the destination of this shape, (if merged), taking into account
+// the fact that the destination may itself have been merged.
+int ShapeTable::MasterDestinationIndex(int shape_id) const {
+  int dest_id = shape_table_[shape_id]->destination_index();
+  if (dest_id == shape_id || dest_id < 0)
+    return shape_id;  // Is master already.
+  int master_id = shape_table_[dest_id]->destination_index();
+  if (master_id == dest_id || master_id < 0)
+    return dest_id;  // Dest is the master and shape_id points to it.
+  master_id = MasterDestinationIndex(master_id);
+  return master_id;
+}
+
+// Returns false if the unichars in neither shape is a subset of the other.
+bool ShapeTable::SubsetUnichar(int shape_id1, int shape_id2) const {
+  const Shape& shape1 = GetShape(shape_id1);
+  const Shape& shape2 = GetShape(shape_id2);
+  int c1, c2;
+  for (c1 = 0; c1 < shape1.size(); ++c1) {
+    int unichar_id1 = shape1[c1].unichar_id;
+    if (!shape2.ContainsUnichar(unichar_id1))
+      break;
+  }
+  for (c2 = 0; c2 < shape2.size(); ++c2) {
+    int unichar_id2 = shape2[c2].unichar_id;
+    if (!shape1.ContainsUnichar(unichar_id2))
+      break;
+  }
+  return c1 == shape1.size() || c2 == shape2.size();
+}
+
+// Returns false if the unichars in neither shape is a subset of the other.
+bool ShapeTable::MergeSubsetUnichar(int merge_id1, int merge_id2,
+                                    int shape_id) const {
+  const Shape& merge1 = GetShape(merge_id1);
+  const Shape& merge2 = GetShape(merge_id2);
+  const Shape& shape = GetShape(shape_id);
+  int cm1, cm2, cs;
+  for (cs = 0; cs < shape.size(); ++cs) {
+    int unichar_id = shape[cs].unichar_id;
+    if (!merge1.ContainsUnichar(unichar_id) &&
+        !merge2.ContainsUnichar(unichar_id))
+      break;  // Shape is not a subset of the merge.
+  }
+  for (cm1 = 0; cm1 < merge1.size(); ++cm1) {
+    int unichar_id1 = merge1[cm1].unichar_id;
+    if (!shape.ContainsUnichar(unichar_id1))
+      break;  // Merge is not a subset of shape
+  }
+  for (cm2 = 0; cm2 < merge2.size(); ++cm2) {
+    int unichar_id2 = merge2[cm2].unichar_id;
+    if (!shape.ContainsUnichar(unichar_id2))
+      break;  // Merge is not a subset of shape
+  }
+  return cs == shape.size() || (cm1 == merge1.size() && cm2 == merge2.size());
+}
+
+// Returns true if the unichar sets are equal between the shapes.
+bool ShapeTable::EqualUnichars(int shape_id1, int shape_id2) const {
+  const Shape& shape1 = GetShape(shape_id1);
+  const Shape& shape2 = GetShape(shape_id2);
+  for (int c1 = 0; c1 < shape1.size(); ++c1) {
+    int unichar_id1 = shape1[c1].unichar_id;
+    if (!shape2.ContainsUnichar(unichar_id1))
+      return false;
+  }
+  for (int c2 = 0; c2 < shape2.size(); ++c2) {
+    int unichar_id2 = shape2[c2].unichar_id;
+    if (!shape1.ContainsUnichar(unichar_id2))
+      return false;
+  }
+  return true;
+}
+
+// Returns true if the unichar sets are equal between the shapes.
+bool ShapeTable::MergeEqualUnichars(int merge_id1, int merge_id2,
+                                    int shape_id) const {
+  const Shape& merge1 = GetShape(merge_id1);
+  const Shape& merge2 = GetShape(merge_id2);
+  const Shape& shape = GetShape(shape_id);
+  for (int cs = 0; cs < shape.size(); ++cs) {
+    int unichar_id = shape[cs].unichar_id;
+    if (!merge1.ContainsUnichar(unichar_id) &&
+        !merge2.ContainsUnichar(unichar_id))
+      return false;  // Shape has a unichar that appears in neither merge.
+  }
+  for (int cm1 = 0; cm1 < merge1.size(); ++cm1) {
+    int unichar_id1 = merge1[cm1].unichar_id;
+    if (!shape.ContainsUnichar(unichar_id1))
+      return false;  // Merge has a unichar that is not in shape.
+  }
+  for (int cm2 = 0; cm2 < merge2.size(); ++cm2) {
+    int unichar_id2 = merge2[cm2].unichar_id;
+    if (!shape.ContainsUnichar(unichar_id2))
+      return false;  // Merge has a unichar that is not in shape.
+  }
+  return true;
+}
+
+// Returns true if there is a common unichar between the shapes.
+bool ShapeTable::CommonUnichars(int shape_id1, int shape_id2) const {
+  const Shape& shape1 = GetShape(shape_id1);
+  const Shape& shape2 = GetShape(shape_id2);
+  for (int c1 = 0; c1 < shape1.size(); ++c1) {
+    int unichar_id1 = shape1[c1].unichar_id;
+    if (shape2.ContainsUnichar(unichar_id1))
+      return true;
+  }
+  return false;
+}
+
+// Returns true if there is a common font id between the shapes.
+bool ShapeTable::CommonFont(int shape_id1, int shape_id2) const {
+  const Shape& shape1 = GetShape(shape_id1);
+  const Shape& shape2 = GetShape(shape_id2);
+  for (int c1 = 0; c1 < shape1.size(); ++c1) {
+    const GenericVector<int>& font_list1 = shape1[c1].font_ids;
+    for (int f = 0; f < font_list1.size(); ++f) {
+      if (shape2.ContainsFont(font_list1[f]))
+        return true;
+    }
+  }
+  return false;
+}
+
+// Appends the master shapes from other to this.
+// If not nullptr, shape_map is set to map other shape_ids to this's shape_ids.
+void ShapeTable::AppendMasterShapes(const ShapeTable& other,
+                                    GenericVector<int>* shape_map) {
+  if (shape_map != nullptr)
+    shape_map->init_to_size(other.NumShapes(), -1);
+  for (int s = 0; s < other.shape_table_.size(); ++s) {
+    if (other.shape_table_[s]->destination_index() < 0) {
+      int index = AddShape(*other.shape_table_[s]);
+      if (shape_map != nullptr)
+        (*shape_map)[s] = index;
+    }
+  }
+}
+
+// Returns the number of master shapes remaining after merging.
+int ShapeTable::NumMasterShapes() const {
+  int num_shapes = 0;
+  for (int s = 0; s < shape_table_.size(); ++s) {
+    if (shape_table_[s]->destination_index() < 0)
+      ++num_shapes;
+  }
+  return num_shapes;
+}
+
+
+// Adds the unichars of the given shape_id to the vector of results. Any
+// unichar_id that is already present just has the fonts added to the
+// font set for that result without adding a new entry in the vector.
+// NOTE: it is assumed that the results are given to this function in order
+// of decreasing rating.
+// The unichar_map vector indicates the index of the results entry containing
+// each unichar, or -1 if the unichar is not yet included in results.
+void ShapeTable::AddShapeToResults(const ShapeRating& shape_rating,
+                                   GenericVector<int>* unichar_map,
+                                   std::vector<UnicharRating>* results) const {
+  if (shape_rating.joined) {
+    AddUnicharToResults(UNICHAR_JOINED, shape_rating.rating, unichar_map,
+                        results);
+  }
+  if (shape_rating.broken) {
+    AddUnicharToResults(UNICHAR_BROKEN, shape_rating.rating, unichar_map,
+                        results);
+  }
+  const Shape& shape = GetShape(shape_rating.shape_id);
+  for (int u = 0; u < shape.size(); ++u) {
+    int result_index = AddUnicharToResults(shape[u].unichar_id,
+                                           shape_rating.rating,
+                                           unichar_map, results);
+    for (int f = 0; f < shape[u].font_ids.size(); ++f) {
+      (*results)[result_index].fonts.push_back(
+          ScoredFont(shape[u].font_ids[f],
+                     IntCastRounded(shape_rating.rating * INT16_MAX)));
+    }
+  }
+}
+
+// Adds the given unichar_id to the results if needed, updating unichar_map
+// and returning the index of unichar in results.
+int ShapeTable::AddUnicharToResults(
+    int unichar_id, float rating, GenericVector<int>* unichar_map,
+    std::vector<UnicharRating>* results) const {
+  int result_index = unichar_map->get(unichar_id);
+  if (result_index < 0) {
+    UnicharRating result(unichar_id, rating);
+    result_index = results->size();
+    results->push_back(result);
+    (*unichar_map)[unichar_id] = result_index;
+  }
+  return result_index;
+}
+
+
+}  // namespace tesseract
diff --git a/tesseract/src/classify/shapetable.h b/tesseract/src/classify/shapetable.h
new file mode 100644
index 00000000..5a551401
--- /dev/null
+++ b/tesseract/src/classify/shapetable.h
@@ -0,0 +1,379 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+///////////////////////////////////////////////////////////////////////
+// File:        shapetable.h
+// Description: Class to map a classifier shape index to unicharset
+//              indices and font indices.
+// Author:      Ray Smith
+//
+// (C) Copyright 2010, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CLASSIFY_SHAPETABLE_H_
+#define TESSERACT_CLASSIFY_SHAPETABLE_H_
+
+#include "bitvector.h"
+#include "fontinfo.h"
+#include "genericheap.h"
+#include "intmatcher.h"
+
+#include "genericvector.h"
+
+namespace tesseract {
+
+class STRING;
+class UNICHARSET;
+class ShapeTable;
+
+// Simple struct to hold a single classifier unichar selection, a corresponding
+// rating, and a list of appropriate fonts.
+struct UnicharRating {
+  UnicharRating()
+    : unichar_id(0), rating(0.0f), adapted(false), config(0),
+      feature_misses(0) {}
+  UnicharRating(int u, float r)
+    : unichar_id(u), rating(r), adapted(false), config(0), feature_misses(0) {}
+
+  // Print debug info.
+  void Print() const {
+    tprintf("Unichar-id=%d, rating=%g, adapted=%d, config=%d, misses=%u,"
+            " %zu fonts\n", unichar_id, rating, adapted, config, feature_misses,
+            fonts.size());
+  }
+
+  // Helper function to get the index of the first result with the required
+  // unichar_id. If the results are sorted by rating, this will also be the
+  // best result with the required unichar_id.
+  // Returns -1 if the unichar_id is not found
+  static int FirstResultWithUnichar(const GenericVector<UnicharRating>& results,
+                                    UNICHAR_ID unichar_id);
+
+  // Index into some UNICHARSET table indicates the class of the answer.
+  UNICHAR_ID unichar_id;
+  // Rating from classifier with 1.0 perfect and 0.0 impossible.
+  // Call it a probability if you must.
+  float rating;
+  // True if this result is from the adaptive classifier.
+  bool adapted;
+  // Index of best matching font configuration of result.
+  uint8_t config;
+  // Number of features that were total misses - were liked by no classes.
+  uint16_t feature_misses;
+  // Unsorted collection of fontinfo ids and scores. Note that a raw result
+  // from the IntegerMatch will contain config ids, that require transforming
+  // to fontinfo ids via fontsets and (possibly) shapetable.
+  std::vector<ScoredFont> fonts;
+};
+
+// Classifier result from a low-level classification is an index into some
+// ShapeTable and a rating.
+struct ShapeRating {
+  ShapeRating()
+    : shape_id(0), rating(0.0f), raw(0.0f), font(0.0f),
+      joined(false), broken(false) {}
+  ShapeRating(int s, float r)
+    : shape_id(s), rating(r), raw(1.0f), font(0.0f),
+      joined(false), broken(false) {}
+
+  // Helper function to get the index of the first result with the required
+  // unichar_id. If the results are sorted by rating, this will also be the
+  // best result with the required unichar_id.
+  // Returns -1 if the unichar_id is not found
+  static int FirstResultWithUnichar(const GenericVector<ShapeRating>& results,
+                                    const ShapeTable& shape_table,
+                                    UNICHAR_ID unichar_id);
+
+  // Index into some shape table indicates the class of the answer.
+  int shape_id;
+  // Rating from classifier with 1.0 perfect and 0.0 impossible.
+  // Call it a probability if you must.
+  float rating;
+  // Subsidiary rating that a classifier may use internally.
+  float raw;
+  // Subsidiary rating that a classifier may use internally.
+  float font;
+  // Flag indicating that the input may be joined.
+  bool joined;
+  // Flag indicating that the input may be broken (a fragment).
+  bool broken;
+};
+
+// Simple struct to hold an entry for a heap-based priority queue of
+// ShapeRating.
+struct ShapeQueueEntry {
+  ShapeQueueEntry() : result(ShapeRating(0, 0.0f)), level(0) {}
+  ShapeQueueEntry(const ShapeRating& rating, int level0)
+    : result(rating), level(level0) {}
+
+  // Sort by decreasing rating and decreasing level for equal rating.
+  bool operator<(const ShapeQueueEntry& other) const {
+    if (result.rating > other.result.rating) return true;
+    if (result.rating == other.result.rating)
+      return level > other.level;
+    return false;
+  }
+
+  // Output from classifier.
+  ShapeRating result;
+  // Which level in the tree did this come from?
+  int level;
+};
+using ShapeQueue = GenericHeap<ShapeQueueEntry>;
+
+// Simple struct to hold a set of fonts associated with a single unichar-id.
+// A vector of UnicharAndFonts makes a shape.
+struct UnicharAndFonts {
+  UnicharAndFonts() : unichar_id(0) {
+  }
+  UnicharAndFonts(int uni_id, int font_id) : unichar_id(uni_id) {
+    font_ids.push_back(font_id);
+  }
+
+  // Writes to the given file. Returns false in case of error.
+  bool Serialize(FILE* fp) const;
+  // Reads from the given file. Returns false in case of error.
+  bool DeSerialize(TFile* fp);
+
+  // Sort function to sort a pair of UnicharAndFonts by unichar_id.
+  static int SortByUnicharId(const void* v1, const void* v2);
+
+  GenericVector<int32_t> font_ids;
+  int32_t unichar_id;
+};
+
+// A Shape is a collection of unichar-ids and a list of fonts associated with
+// each, organized as a vector of UnicharAndFonts. Conceptually a Shape is
+// a classifiable unit, and represents a group of characters or parts of
+// characters that have a similar or identical shape. Shapes/ShapeTables may
+// be organized hierarchically from identical shapes at the leaves to vaguely
+// similar shapes near the root.
+class TESS_API Shape {
+ public:
+  Shape() : destination_index_(-1) {}
+
+  // Writes to the given file. Returns false in case of error.
+  bool Serialize(FILE* fp) const;
+  // Reads from the given file. Returns false in case of error.
+  bool DeSerialize(TFile* fp);
+
+  int destination_index() const {
+    return destination_index_;
+  }
+  void set_destination_index(int index) {
+    destination_index_ = index;
+  }
+  int size() const {
+    return unichars_.size();
+  }
+  // Returns a UnicharAndFonts entry for the given index, which must be
+  // in the range [0, size()).
+  const UnicharAndFonts& operator[](int index) const {
+    return unichars_[index];
+  }
+  // Sets the unichar_id of the given index to the new unichar_id.
+  void SetUnicharId(int index, int unichar_id) {
+    unichars_[index].unichar_id = unichar_id;
+  }
+  // Adds a font_id for the given unichar_id. If the unichar_id is not
+  // in the shape, it is added.
+  void AddToShape(int unichar_id, int font_id);
+  // Adds everything in other to this.
+  void AddShape(const Shape& other);
+  // Returns true if the shape contains the given unichar_id, font_id pair.
+  bool ContainsUnicharAndFont(int unichar_id, int font_id) const;
+  // Returns true if the shape contains the given unichar_id, ignoring font.
+  bool ContainsUnichar(int unichar_id) const;
+  // Returns true if the shape contains the given font, ignoring unichar_id.
+  bool ContainsFont(int font_id) const;
+  // Returns true if the shape contains the given font properties, ignoring
+  // unichar_id.
+  bool ContainsFontProperties(const FontInfoTable& font_table,
+                              uint32_t properties) const;
+  // Returns true if the shape contains multiple different font properties,
+  // ignoring unichar_id.
+  bool ContainsMultipleFontProperties(const FontInfoTable& font_table) const;
+  // Returns true if this shape is equal to other (ignoring order of unichars
+  // and fonts).
+  bool operator==(const Shape& other) const;
+  // Returns true if this is a subset (including equal) of other.
+  bool IsSubsetOf(const Shape& other) const;
+  // Returns true if the lists of unichar ids are the same in this and other,
+  // ignoring fonts.
+  // NOT const, as it will sort the unichars on demand.
+  bool IsEqualUnichars(Shape* other);
+
+ private:
+  // Sorts the unichars_ vector by unichar.
+  void SortUnichars();
+
+  // Flag indicates that the unichars are sorted, allowing faster set
+  // operations with another shape.
+  bool unichars_sorted_ = false;
+  // If this Shape is part of a ShapeTable the destiation_index_ is the index
+  // of some other shape in the ShapeTable with which this shape is merged.
+  int destination_index_ = 0;
+  // Array of unichars, each with a set of fonts. Each unichar has at most
+  // one entry in the vector.
+  GenericVector<UnicharAndFonts> unichars_;
+};
+
+// ShapeTable is a class to encapsulate the triple indirection that is
+// used here.
+// ShapeTable is a vector of shapes.
+// Each shape is a vector of UnicharAndFonts representing the set of unichars
+// that the shape represents.
+// Each UnicharAndFonts also lists the fonts of the unichar_id that were
+// mapped to the shape during training.
+class TESS_API ShapeTable {
+ public:
+  ShapeTable();
+  // The UNICHARSET reference supplied here, or in set_unicharset below must
+  // exist for the entire life of the ShapeTable. It is used only by DebugStr.
+  explicit ShapeTable(const UNICHARSET& unicharset);
+
+  // Writes to the given file. Returns false in case of error.
+  bool Serialize(FILE* fp) const;
+  // Reads from the given file. Returns false in case of error.
+  bool DeSerialize(TFile* fp);
+
+  // Accessors.
+  int NumShapes() const {
+    return shape_table_.size();
+  }
+  const UNICHARSET& unicharset() const {
+    return *unicharset_;
+  }
+  // Returns the number of fonts used in this ShapeTable, computing it if
+  // necessary.
+  int NumFonts() const;
+  // Shapetable takes a pointer to the UNICHARSET, so it must persist for the
+  // entire life of the ShapeTable.
+  void set_unicharset(const UNICHARSET& unicharset) {
+    unicharset_ = &unicharset;
+  }
+  // Re-indexes the class_ids in the shapetable according to the given map.
+  // Useful in conjunction with set_unicharset.
+  void ReMapClassIds(const GenericVector<int>& unicharset_map);
+  // Returns a string listing the classes/fonts in a shape.
+  STRING DebugStr(int shape_id) const;
+  // Returns a debug string summarizing the table.
+  STRING SummaryStr() const;
+
+  // Adds a new shape starting with the given unichar_id and font_id.
+  // Returns the assigned index.
+  int AddShape(int unichar_id, int font_id);
+  // Adds a copy of the given shape unless it is already present.
+  // Returns the assigned index or index of existing shape if already present.
+  int AddShape(const Shape& other);
+  // Removes the shape given by the shape index. All indices above are changed!
+  void DeleteShape(int shape_id);
+  // Adds a font_id to the given existing shape index for the given
+  // unichar_id. If the unichar_id is not in the shape, it is added.
+  void AddToShape(int shape_id, int unichar_id, int font_id);
+  // Adds the given shape to the existing shape with the given index.
+  void AddShapeToShape(int shape_id, const Shape& other);
+  // Returns the id of the shape that contains the given unichar and font.
+  // If not found, returns -1.
+  // If font_id < 0, the font_id is ignored and the first shape that matches
+  // the unichar_id is returned.
+  int FindShape(int unichar_id, int font_id) const;
+  // Returns the first unichar_id and font_id in the given shape.
+  void GetFirstUnicharAndFont(int shape_id,
+                              int* unichar_id, int* font_id) const;
+
+  // Accessors for the Shape with the given shape_id.
+  const Shape& GetShape(int shape_id) const {
+    return *shape_table_[shape_id];
+  }
+  Shape* MutableShape(int shape_id) {
+    return shape_table_[shape_id];
+  }
+
+  // Expands all the classes/fonts in the shape individually to build
+  // a ShapeTable.
+  int BuildFromShape(const Shape& shape, const ShapeTable& master_shapes);
+
+  // Returns true if the shapes are already merged.
+  bool AlreadyMerged(int shape_id1, int shape_id2) const;
+  // Returns true if any shape contains multiple unichars.
+  bool AnyMultipleUnichars() const;
+  // Returns the maximum number of unichars over all shapes.
+  int MaxNumUnichars() const;
+  // Merges shapes with a common unichar over the [start, end) interval.
+  // Assumes single unichar per shape.
+  void ForceFontMerges(int start, int end);
+  // Returns the number of unichars in the master shape.
+  int MasterUnicharCount(int shape_id) const;
+  // Returns the sum of the font counts in the master shape.
+  int MasterFontCount(int shape_id) const;
+  // Returns the number of unichars that would result from merging the shapes.
+  int MergedUnicharCount(int shape_id1, int shape_id2) const;
+  // Merges two shape_ids, leaving shape_id2 marked as merged.
+  void MergeShapes(int shape_id1, int shape_id2);
+  // Swaps two shape_ids.
+  void SwapShapes(int shape_id1, int shape_id2);
+  // Appends the master shapes from other to this.
+  // Used to create a clean ShapeTable from a merged one, or to create a
+  // copy of a ShapeTable.
+  // If not nullptr, shape_map is set to map other shape_ids to this's shape_ids.
+  void AppendMasterShapes(const ShapeTable& other,
+                          GenericVector<int>* shape_map);
+  // Returns the number of master shapes remaining after merging.
+  int NumMasterShapes() const;
+  // Returns the destination of this shape, (if merged), taking into account
+  // the fact that the destination may itself have been merged.
+  // For a non-merged shape, returns the input shape_id.
+  int MasterDestinationIndex(int shape_id) const;
+
+  // Returns false if the unichars in neither shape is a subset of the other..
+  bool SubsetUnichar(int shape_id1, int shape_id2) const;
+  // Returns false if the unichars in neither shape is a subset of the other..
+  bool MergeSubsetUnichar(int merge_id1, int merge_id2, int shape_id) const;
+  // Returns true if the unichar sets are equal between the shapes.
+  bool EqualUnichars(int shape_id1, int shape_id2) const;
+  bool MergeEqualUnichars(int merge_id1, int merge_id2, int shape_id) const;
+  // Returns true if there is a common unichar between the shapes.
+  bool CommonUnichars(int shape_id1, int shape_id2) const;
+  // Returns true if there is a common font id between the shapes.
+  bool CommonFont(int shape_id1, int shape_id2) const;
+
+  // Adds the unichars of the given shape_id to the vector of results. Any
+  // unichar_id that is already present just has the fonts added to the
+  // font set for that result without adding a new entry in the vector.
+  // NOTE: it is assumed that the results are given to this function in order
+  // of decreasing rating.
+  // The unichar_map vector indicates the index of the results entry containing
+  // each unichar, or -1 if the unichar is not yet included in results.
+  void AddShapeToResults(const ShapeRating& shape_rating,
+                         GenericVector<int>* unichar_map,
+                         std::vector<UnicharRating>* results) const;
+
+ private:
+  // Adds the given unichar_id to the results if needed, updating unichar_map
+  // and returning the index of unichar in results.
+  int AddUnicharToResults(int unichar_id, float rating,
+                          GenericVector<int>* unichar_map,
+                          std::vector<UnicharRating>* results) const;
+
+  // Pointer to a provided unicharset used only by the Debugstr member.
+  const UNICHARSET* unicharset_;
+  // Vector of pointers to the Shapes in this ShapeTable.
+  PointerVector<Shape> shape_table_;
+
+  // Cached data calculated on demand.
+  mutable int num_fonts_;
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_CLASSIFY_SHAPETABLE_H_
diff --git a/tesseract/src/classify/tessclassifier.cpp b/tesseract/src/classify/tessclassifier.cpp
new file mode 100644
index 00000000..c7819d66
--- /dev/null
+++ b/tesseract/src/classify/tessclassifier.cpp
@@ -0,0 +1,84 @@
+///////////////////////////////////////////////////////////////////////
+// File:        tessclassifier.cpp
+// Description: Tesseract implementation of a ShapeClassifier.
+// Author:      Ray Smith
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "tessclassifier.h"
+
+#include "classify.h"
+#include "trainingsample.h"
+
+namespace tesseract {
+
+// Classifies the given [training] sample, writing to results.
+// See ShapeClassifier for a full description.
+int TessClassifier::UnicharClassifySample(
+    const TrainingSample& sample, Pix* page_pix, int debug,
+    UNICHAR_ID keep_this, std::vector<UnicharRating>* results) {
+  const int old_matcher_level = classify_->matcher_debug_level;
+  const int old_matcher_flags = classify_->matcher_debug_flags;
+  const int old_classify_level = classify_->classify_debug_level;
+  if (debug) {
+    // Explicitly set values of various control parameters to generate debug
+    // output if required, restoring the old values after classifying.
+    classify_->matcher_debug_level.set_value(2);
+    classify_->matcher_debug_flags.set_value(25);
+    classify_->classify_debug_level.set_value(3);
+  }
+  classify_->CharNormTrainingSample(pruner_only_, keep_this, sample, results);
+  if (debug) {
+    classify_->matcher_debug_level.set_value(old_matcher_level);
+    classify_->matcher_debug_flags.set_value(old_matcher_flags);
+    classify_->classify_debug_level.set_value(old_classify_level);
+  }
+  return results->size();
+}
+
+// Provides access to the ShapeTable that this classifier works with.
+const ShapeTable* TessClassifier::GetShapeTable() const {
+  return classify_->shape_table();
+}
+// Provides access to the UNICHARSET that this classifier works with.
+// Only needs to be overridden if GetShapeTable() can return nullptr.
+const UNICHARSET& TessClassifier::GetUnicharset() const {
+  return classify_->unicharset;
+}
+
+// Displays classification as the given shape_id. Creates as many windows
+// as it feels fit, using index as a guide for placement. Adds any created
+// windows to the windows output and returns a new index that may be used
+// by any subsequent classifiers. Caller waits for the user to view and
+// then destroys the windows by clearing the vector.
+int TessClassifier::DisplayClassifyAs(
+    const TrainingSample& sample, Pix* page_pix, int unichar_id, int index,
+    PointerVector<ScrollView>* windows) {
+  int shape_id = unichar_id;
+  // TODO(rays) Fix this so it works with both flat and real shapetables.
+  //  if (GetShapeTable() != nullptr)
+  //  shape_id = BestShapeForUnichar(sample, page_pix, unichar_id, nullptr);
+  if (shape_id < 0) return index;
+  if (UnusedClassIdIn(classify_->PreTrainedTemplates, shape_id)) {
+    tprintf("No built-in templates for class/shape %d\n", shape_id);
+    return index;
+  }
+#ifndef GRAPHICS_DISABLED
+  classify_->ShowBestMatchFor(shape_id, sample.features(),
+                              sample.num_features());
+#endif
+  return index;
+}
+
+}  // namespace tesseract
diff --git a/tesseract/src/classify/tessclassifier.h b/tesseract/src/classify/tessclassifier.h
new file mode 100644
index 00000000..a8b3f753
--- /dev/null
+++ b/tesseract/src/classify/tessclassifier.h
@@ -0,0 +1,72 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+///////////////////////////////////////////////////////////////////////
+// File:        tessclassifier.h
+// Description: Tesseract implementation of a ShapeClassifier.
+// Author:      Ray Smith
+// Created:     Tue Nov 22 14:10:45 PST 2011
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef THIRD_PARTY_TESSERACT_CLASSIFY_TESSCLASSIFIER_H_
+#define THIRD_PARTY_TESSERACT_CLASSIFY_TESSCLASSIFIER_H_
+
+#include "shapeclassifier.h"
+
+namespace tesseract {
+
+class Classify;
+class TrainingSample;
+
+// Tesseract implementation of a ShapeClassifier.
+// Due to limitations in the content of TrainingSample, this currently
+// only works for the static classifier and only works if the ShapeTable
+// in classify is not nullptr.
+class TESS_API TessClassifier : public ShapeClassifier {
+ public:
+  TessClassifier(bool pruner_only, tesseract::Classify* classify)
+    : pruner_only_(pruner_only), classify_(classify) {}
+  ~TessClassifier() override = default;
+
+  // Classifies the given [training] sample, writing to results.
+  // See ShapeClassifier for a full description.
+  int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix,
+                                    int debug, UNICHAR_ID keep_this,
+                                    std::vector<UnicharRating>* results) override;
+  // Provides access to the ShapeTable that this classifier works with.
+  const ShapeTable* GetShapeTable() const override;
+  // Provides access to the UNICHARSET that this classifier works with.
+  // Only needs to be overridden if GetShapeTable() can return nullptr.
+  const UNICHARSET& GetUnicharset() const override;
+
+  // Displays classification as the given shape_id. Creates as many windows
+  // as it feels fit, using index as a guide for placement. Adds any created
+  // windows to the windows output and returns a new index that may be used
+  // by any subsequent classifiers. Caller waits for the user to view and
+  // then destroys the windows by clearing the vector.
+  int DisplayClassifyAs(const TrainingSample& sample, Pix* page_pix,
+                                int unichar_id, int index,
+                                PointerVector<ScrollView>* windows) override;
+
+ private:
+  // Indicates that this classifier is to use just the ClassPruner, or the
+  // full classifier if false.
+  bool pruner_only_;
+  // Borrowed pointer to the actual Tesseract classifier.
+  tesseract::Classify* classify_;
+};
+
+}  // namespace tesseract
+
+#endif /* THIRD_PARTY_TESSERACT_CLASSIFY_TESSCLASSIFIER_H_ */
diff --git a/tesseract/src/classify/trainingsample.cpp b/tesseract/src/classify/trainingsample.cpp
new file mode 100644
index 00000000..003fb97b
--- /dev/null
+++ b/tesseract/src/classify/trainingsample.cpp
@@ -0,0 +1,339 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#define _USE_MATH_DEFINES       // for M_PI
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "trainingsample.h"
+
+#include "intfeaturespace.h"
+#include "helpers.h"
+#include "normfeat.h"
+#include "shapetable.h"
+
+#include "allheaders.h"
+
+#include <cmath>                // for M_PI
+
+namespace tesseract {
+
+ELISTIZE(TrainingSample)
+
+// Center of randomizing operations.
+const int kRandomizingCenter = 128;
+
+// Randomizing factors.
+const int TrainingSample::kYShiftValues[kSampleYShiftSize] = {
+    6, 3, -3, -6, 0
+};
+const double TrainingSample::kScaleValues[kSampleScaleSize] = {
+    1.0625, 0.9375, 1.0
+};
+
+TrainingSample::~TrainingSample() {
+  delete [] features_;
+  delete [] micro_features_;
+}
+
+// WARNING! Serialize/DeSerialize do not save/restore the "cache" data
+// members, which is mostly the mapped features, and the weight.
+// It is assumed these can all be reconstructed from what is saved.
+// Writes to the given file. Returns false in case of error.
+bool TrainingSample::Serialize(FILE* fp) const {
+  if (fwrite(&class_id_, sizeof(class_id_), 1, fp) != 1) return false;
+  if (fwrite(&font_id_, sizeof(font_id_), 1, fp) != 1) return false;
+  if (fwrite(&page_num_, sizeof(page_num_), 1, fp) != 1) return false;
+  if (!bounding_box_.Serialize(fp)) return false;
+  if (fwrite(&num_features_, sizeof(num_features_), 1, fp) != 1) return false;
+  if (fwrite(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1)
+    return false;
+  if (fwrite(&outline_length_, sizeof(outline_length_), 1, fp) != 1)
+    return false;
+  if (fwrite(features_, sizeof(*features_), num_features_, fp) != num_features_)
+    return false;
+  if (fwrite(micro_features_, sizeof(*micro_features_), num_micro_features_,
+             fp) != num_micro_features_)
+    return false;
+  if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
+      kNumCNParams) return false;
+  if (fwrite(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount)
+    return false;
+  return true;
+}
+
+// Creates from the given file. Returns nullptr in case of error.
+// If swap is true, assumes a big/little-endian swap is needed.
+TrainingSample* TrainingSample::DeSerializeCreate(bool swap, FILE* fp) {
+  auto* sample = new TrainingSample;
+  if (sample->DeSerialize(swap, fp)) return sample;
+  delete sample;
+  return nullptr;
+}
+
+// Reads from the given file. Returns false in case of error.
+// If swap is true, assumes a big/little-endian swap is needed.
+bool TrainingSample::DeSerialize(bool swap, FILE* fp) {
+  if (fread(&class_id_, sizeof(class_id_), 1, fp) != 1) return false;
+  if (fread(&font_id_, sizeof(font_id_), 1, fp) != 1) return false;
+  if (fread(&page_num_, sizeof(page_num_), 1, fp) != 1) return false;
+  if (!bounding_box_.DeSerialize(swap, fp)) return false;
+  if (fread(&num_features_, sizeof(num_features_), 1, fp) != 1) return false;
+  if (fread(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1)
+    return false;
+  if (fread(&outline_length_, sizeof(outline_length_), 1, fp) != 1)
+    return false;
+  if (swap) {
+    ReverseN(&class_id_, sizeof(class_id_));
+    ReverseN(&num_features_, sizeof(num_features_));
+    ReverseN(&num_micro_features_, sizeof(num_micro_features_));
+    ReverseN(&outline_length_, sizeof(outline_length_));
+  }
+  // Arbitrarily limit the number of elements to protect against bad data.
+  if (num_features_ > UINT16_MAX) return false;
+  if (num_micro_features_ > UINT16_MAX) return false;
+  delete [] features_;
+  features_ = new INT_FEATURE_STRUCT[num_features_];
+  if (fread(features_, sizeof(*features_), num_features_, fp)
+      != num_features_)
+    return false;
+  delete [] micro_features_;
+  micro_features_ = new MicroFeature[num_micro_features_];
+  if (fread(micro_features_, sizeof(*micro_features_), num_micro_features_,
+            fp) != num_micro_features_)
+    return false;
+  if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
+            kNumCNParams) return false;
+  if (fread(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount)
+    return false;
+  return true;
+}
+
+// Saves the given features into a TrainingSample.
+TrainingSample* TrainingSample::CopyFromFeatures(
+    const INT_FX_RESULT_STRUCT& fx_info,
+    const TBOX& bounding_box,
+    const INT_FEATURE_STRUCT* features,
+    int num_features) {
+  auto* sample = new TrainingSample;
+  sample->num_features_ = num_features;
+  sample->features_ = new INT_FEATURE_STRUCT[num_features];
+  sample->outline_length_ = fx_info.Length;
+  memcpy(sample->features_, features, num_features * sizeof(features[0]));
+  sample->geo_feature_[GeoBottom] = bounding_box.bottom();
+  sample->geo_feature_[GeoTop] = bounding_box.top();
+  sample->geo_feature_[GeoWidth] = bounding_box.width();
+
+  // Generate the cn_feature_ from the fx_info.
+  sample->cn_feature_[CharNormY] =
+      MF_SCALE_FACTOR * (fx_info.Ymean - kBlnBaselineOffset);
+  sample->cn_feature_[CharNormLength] =
+      MF_SCALE_FACTOR * fx_info.Length / LENGTH_COMPRESSION;
+  sample->cn_feature_[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx;
+  sample->cn_feature_[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry;
+
+  sample->features_are_indexed_ = false;
+  sample->features_are_mapped_ = false;
+  return sample;
+}
+
+// Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining.
+FEATURE_STRUCT* TrainingSample::GetCNFeature() const {
+  FEATURE feature = NewFeature(&CharNormDesc);
+  for (int i = 0; i < kNumCNParams; ++i)
+    feature->Params[i] = cn_feature_[i];
+  return feature;
+}
+
+// Constructs and returns a copy randomized by the method given by
+// the randomizer index. If index is out of [0, kSampleRandomSize) then
+// an exact copy is returned.
+TrainingSample* TrainingSample::RandomizedCopy(int index) const {
+  TrainingSample* sample = Copy();
+  if (index >= 0 && index < kSampleRandomSize) {
+    ++index;  // Remove the first combination.
+    const int yshift = kYShiftValues[index / kSampleScaleSize];
+    double scaling = kScaleValues[index % kSampleScaleSize];
+    for (uint32_t i = 0; i < num_features_; ++i) {
+      double result = (features_[i].X - kRandomizingCenter) * scaling;
+      result += kRandomizingCenter;
+      sample->features_[i].X = ClipToRange<int>(result + 0.5, 0, UINT8_MAX);
+      result = (features_[i].Y - kRandomizingCenter) * scaling;
+      result += kRandomizingCenter + yshift;
+      sample->features_[i].Y = ClipToRange<int>(result + 0.5, 0, UINT8_MAX);
+    }
+  }
+  return sample;
+}
+
+// Constructs and returns an exact copy.
+TrainingSample* TrainingSample::Copy() const {
+  auto* sample = new TrainingSample;
+  sample->class_id_ = class_id_;
+  sample->font_id_ = font_id_;
+  sample->weight_ = weight_;
+  sample->sample_index_ = sample_index_;
+  sample->num_features_ = num_features_;
+  if (num_features_ > 0) {
+    sample->features_ = new INT_FEATURE_STRUCT[num_features_];
+    memcpy(sample->features_, features_, num_features_ * sizeof(features_[0]));
+  }
+  sample->num_micro_features_ = num_micro_features_;
+  if (num_micro_features_ > 0) {
+    sample->micro_features_ = new MicroFeature[num_micro_features_];
+    memcpy(sample->micro_features_, micro_features_,
+           num_micro_features_ * sizeof(micro_features_[0]));
+  }
+  memcpy(sample->cn_feature_, cn_feature_, sizeof(*cn_feature_) * kNumCNParams);
+  memcpy(sample->geo_feature_, geo_feature_, sizeof(*geo_feature_) * GeoCount);
+  return sample;
+}
+
+// Extracts the needed information from the CHAR_DESC_STRUCT.
+void TrainingSample::ExtractCharDesc(int int_feature_type,
+                                     int micro_type,
+                                     int cn_type,
+                                     int geo_type,
+                                     CHAR_DESC_STRUCT* char_desc) {
+  // Extract the INT features.
+  delete[] features_;
+  FEATURE_SET_STRUCT* char_features = char_desc->FeatureSets[int_feature_type];
+  if (char_features == nullptr) {
+    tprintf("Error: no features to train on of type %s\n",
+            kIntFeatureType);
+    num_features_ = 0;
+    features_ = nullptr;
+  } else {
+    num_features_ = char_features->NumFeatures;
+    features_ = new INT_FEATURE_STRUCT[num_features_];
+    for (uint32_t f = 0; f < num_features_; ++f) {
+      features_[f].X =
+          static_cast<uint8_t>(char_features->Features[f]->Params[IntX]);
+      features_[f].Y =
+          static_cast<uint8_t>(char_features->Features[f]->Params[IntY]);
+      features_[f].Theta =
+          static_cast<uint8_t>(char_features->Features[f]->Params[IntDir]);
+      features_[f].CP_misses = 0;
+    }
+  }
+  // Extract the Micro features.
+  delete[] micro_features_;
+  char_features = char_desc->FeatureSets[micro_type];
+  if (char_features == nullptr) {
+    tprintf("Error: no features to train on of type %s\n",
+            kMicroFeatureType);
+    num_micro_features_ = 0;
+    micro_features_ = nullptr;
+  } else {
+    num_micro_features_ = char_features->NumFeatures;
+    micro_features_ = new MicroFeature[num_micro_features_];
+    for (uint32_t f = 0; f < num_micro_features_; ++f) {
+      for (int d = 0; d < MFCount; ++d) {
+        micro_features_[f][d] = char_features->Features[f]->Params[d];
+      }
+    }
+  }
+  // Extract the CN feature.
+  char_features = char_desc->FeatureSets[cn_type];
+  if (char_features == nullptr) {
+    tprintf("Error: no CN feature to train on.\n");
+  } else {
+    ASSERT_HOST(char_features->NumFeatures == 1);
+    cn_feature_[CharNormY] = char_features->Features[0]->Params[CharNormY];
+    cn_feature_[CharNormLength] =
+        char_features->Features[0]->Params[CharNormLength];
+    cn_feature_[CharNormRx] = char_features->Features[0]->Params[CharNormRx];
+    cn_feature_[CharNormRy] = char_features->Features[0]->Params[CharNormRy];
+  }
+  // Extract the Geo feature.
+  char_features = char_desc->FeatureSets[geo_type];
+  if (char_features == nullptr) {
+    tprintf("Error: no Geo feature to train on.\n");
+  } else {
+    ASSERT_HOST(char_features->NumFeatures == 1);
+    geo_feature_[GeoBottom] = char_features->Features[0]->Params[GeoBottom];
+    geo_feature_[GeoTop] = char_features->Features[0]->Params[GeoTop];
+    geo_feature_[GeoWidth] = char_features->Features[0]->Params[GeoWidth];
+  }
+  features_are_indexed_ = false;
+  features_are_mapped_ = false;
+}
+
+// Sets the mapped_features_ from the features_ using the provided
+// feature_space to the indexed versions of the features.
+void TrainingSample::IndexFeatures(const IntFeatureSpace& feature_space) {
+  GenericVector<int> indexed_features;
+  feature_space.IndexAndSortFeatures(features_, num_features_,
+                                     &mapped_features_);
+  features_are_indexed_ = true;
+  features_are_mapped_ = false;
+}
+
+// Returns a pix representing the sample. (Int features only.)
+Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const {
+  Pix* pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1);
+  for (uint32_t f = 0; f < num_features_; ++f) {
+    int start_x = features_[f].X;
+    int start_y = kIntFeatureExtent - features_[f].Y;
+    double dx = cos((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI);
+    double dy = -sin((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI);
+    for (int i = 0; i <= 5; ++i) {
+      int x = static_cast<int>(start_x + dx * i);
+      int y = static_cast<int>(start_y + dy * i);
+      if (x >= 0 && x < 256 && y >= 0 && y < 256)
+        pixSetPixel(pix, x, y, 1);
+    }
+  }
+  if (unicharset != nullptr)
+    pixSetText(pix, unicharset->id_to_unichar(class_id_));
+  return pix;
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Displays the features in the given window with the given color.
+void TrainingSample::DisplayFeatures(ScrollView::Color color,
+                                     ScrollView* window) const {
+  for (uint32_t f = 0; f < num_features_; ++f) {
+    RenderIntFeature(window, &features_[f], color);
+  }
+}
+
+#endif // !GRAPHICS_DISABLED
+
+// Returns a pix of the original sample image. The pix is padded all round
+// by padding wherever possible.
+// The returned Pix must be pixDestroyed after use.
+// If the input page_pix is nullptr, nullptr is returned.
+Pix* TrainingSample::GetSamplePix(int padding, Pix* page_pix) const {
+  if (page_pix == nullptr)
+    return nullptr;
+  int page_width = pixGetWidth(page_pix);
+  int page_height = pixGetHeight(page_pix);
+  TBOX padded_box = bounding_box();
+  padded_box.pad(padding, padding);
+  // Clip the padded_box to the limits of the page
+  TBOX page_box(0, 0, page_width, page_height);
+  padded_box &= page_box;
+  Box* box = boxCreate(page_box.left(), page_height - page_box.top(),
+                       page_box.width(), page_box.height());
+  Pix* sample_pix = pixClipRectangle(page_pix, box, nullptr);
+  boxDestroy(&box);
+  return sample_pix;
+}
+
+}  // namespace tesseract
diff --git a/tesseract/src/classify/trainingsample.h b/tesseract/src/classify/trainingsample.h
new file mode 100644
index 00000000..0ac2cc4f
--- /dev/null
+++ b/tesseract/src/classify/trainingsample.h
@@ -0,0 +1,252 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TRAINING_TRAININGSAMPLE_H_
+#define TESSERACT_TRAINING_TRAININGSAMPLE_H_
+
+#include "elst.h"
+#include "featdefs.h"
+#include "intfx.h"
+#include "intmatcher.h"
+#include "matrix.h"
+#include "mf.h"
+#include "picofeat.h"
+#include "shapetable.h"
+#include "unicharset.h"
+
+struct Pix;
+
+namespace tesseract {
+
+class IntFeatureMap;
+class IntFeatureSpace;
+class ShapeTable;
+
+// Number of elements of cn_feature_.
+static const int kNumCNParams = 4;
+// Number of ways to shift the features when randomizing.
+static const int kSampleYShiftSize = 5;
+// Number of ways to scale the features when randomizing.
+static const int kSampleScaleSize = 3;
+// Total number of different ways to manipulate the features when randomizing.
+// The first and last combinations are removed to avoid an excessive
+// top movement (first) and an identity transformation (last).
+// WARNING: To avoid patterned duplication of samples, be sure to keep
+// kSampleRandomSize prime!
+// Eg with current values (kSampleYShiftSize = 5 and TkSampleScaleSize = 3)
+// kSampleRandomSize is 13, which is prime.
+static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2;
+// ASSERT_IS_PRIME(kSampleRandomSize) !!
+
+class TESS_API TrainingSample : public ELIST_LINK {
+ public:
+  TrainingSample()
+    : class_id_(INVALID_UNICHAR_ID), font_id_(0), page_num_(0),
+      num_features_(0), num_micro_features_(0), outline_length_(0),
+      features_(nullptr), micro_features_(nullptr), weight_(1.0),
+      max_dist_(0.0), sample_index_(0),
+      features_are_indexed_(false), features_are_mapped_(false),
+      is_error_(false) {
+  }
+  ~TrainingSample();
+
+  // Saves the given features into a TrainingSample. The features are copied,
+  // so may be deleted afterwards. Delete the return value after use.
+  static TrainingSample* CopyFromFeatures(const INT_FX_RESULT_STRUCT& fx_info,
+                                          const TBOX& bounding_box,
+                                          const INT_FEATURE_STRUCT* features,
+                                          int num_features);
+  // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining.
+  FEATURE_STRUCT* GetCNFeature() const;
+  // Constructs and returns a copy "randomized" by the method given by
+  // the randomizer index. If index is out of [0, kSampleRandomSize) then
+  // an exact copy is returned.
+  TrainingSample* RandomizedCopy(int index) const;
+  // Constructs and returns an exact copy.
+  TrainingSample* Copy() const;
+
+  // WARNING! Serialize/DeSerialize do not save/restore the "cache" data
+  // members, which is mostly the mapped features, and the weight.
+  // It is assumed these can all be reconstructed from what is saved.
+  // Writes to the given file. Returns false in case of error.
+  bool Serialize(FILE* fp) const;
+  // Creates from the given file. Returns nullptr in case of error.
+  // If swap is true, assumes a big/little-endian swap is needed.
+  static TrainingSample* DeSerializeCreate(bool swap, FILE* fp);
+  // Reads from the given file. Returns false in case of error.
+  // If swap is true, assumes a big/little-endian swap is needed.
+  bool DeSerialize(bool swap, FILE* fp);
+
+  // Extracts the needed information from the CHAR_DESC_STRUCT.
+  void ExtractCharDesc(int feature_type, int micro_type,
+                       int cn_type, int geo_type,
+                       CHAR_DESC_STRUCT* char_desc);
+
+  // Sets the mapped_features_ from the features_ using the provided
+  // feature_space to the indexed versions of the features.
+  void IndexFeatures(const IntFeatureSpace& feature_space);
+
+  // Returns a pix representing the sample. (Int features only.)
+  Pix* RenderToPix(const UNICHARSET* unicharset) const;
+  // Displays the features in the given window with the given color.
+  void DisplayFeatures(ScrollView::Color color, ScrollView* window) const;
+
+  // Returns a pix of the original sample image. The pix is padded all round
+  // by padding wherever possible.
+  // The returned Pix must be pixDestroyed after use.
+  // If the input page_pix is nullptr, nullptr is returned.
+  Pix* GetSamplePix(int padding, Pix* page_pix) const;
+
+  // Accessors.
+  UNICHAR_ID class_id() const {
+    return class_id_;
+  }
+  void set_class_id(int id) {
+    class_id_ = id;
+  }
+  int font_id() const {
+    return font_id_;
+  }
+  void set_font_id(int id) {
+    font_id_ = id;
+  }
+  int page_num() const {
+    return page_num_;
+  }
+  void set_page_num(int page) {
+    page_num_ = page;
+  }
+  const TBOX& bounding_box() const {
+    return bounding_box_;
+  }
+  void set_bounding_box(const TBOX& box) {
+    bounding_box_ = box;
+  }
+  uint32_t num_features() const {
+    return num_features_;
+  }
+  const INT_FEATURE_STRUCT* features() const {
+    return features_;
+  }
+  uint32_t num_micro_features() const {
+    return num_micro_features_;
+  }
+  const MicroFeature* micro_features() const {
+    return micro_features_;
+  }
+  int outline_length() const {
+    return outline_length_;
+  }
+  float cn_feature(int index) const {
+    return cn_feature_[index];
+  }
+  int geo_feature(int index) const {
+    return geo_feature_[index];
+  }
+  double weight() const {
+    return weight_;
+  }
+  void set_weight(double value) {
+    weight_ = value;
+  }
+  double max_dist() const {
+    return max_dist_;
+  }
+  void set_max_dist(double value) {
+    max_dist_ = value;
+  }
+  int sample_index() const {
+    return sample_index_;
+  }
+  void set_sample_index(int value) {
+    sample_index_ = value;
+  }
+  bool features_are_mapped() const {
+    return features_are_mapped_;
+  }
+  const GenericVector<int>& mapped_features() const {
+    ASSERT_HOST(features_are_mapped_);
+    return mapped_features_;
+  }
+  const GenericVector<int>& indexed_features() const {
+    ASSERT_HOST(features_are_indexed_);
+    return mapped_features_;
+  }
+  bool is_error() const {
+    return is_error_;
+  }
+  void set_is_error(bool value) {
+    is_error_ = value;
+  }
+
+ private:
+  // Unichar id that this sample represents. There obviously must be a
+  // reference UNICHARSET somewhere. Usually in TrainingSampleSet.
+  UNICHAR_ID class_id_;
+  // Font id in which this sample was printed. Refers to a fontinfo_table_ in
+  // MasterTrainer.
+  int font_id_;
+  // Number of page that the sample came from.
+  int page_num_;
+  // Bounding box of sample in original image.
+  TBOX bounding_box_;
+  // Number of INT_FEATURE_STRUCT in features_ array.
+  uint32_t num_features_;
+  // Number of MicroFeature in micro_features_ array.
+  uint32_t num_micro_features_;
+  // Total length of outline in the baseline normalized coordinate space.
+  // See comment in WERD_RES class definition for a discussion of coordinate
+  // spaces.
+  int outline_length_;
+  // Array of features.
+  INT_FEATURE_STRUCT* features_;
+  // Array of features.
+  MicroFeature* micro_features_;
+  // The one and only CN feature. Indexed by NORM_PARAM_NAME enum.
+  float cn_feature_[kNumCNParams];
+  // The one and only geometric feature. (Aims at replacing cn_feature_).
+  // Indexed by GeoParams enum in picofeat.h
+  int geo_feature_[GeoCount];
+
+  // Non-serialized cache data.
+  // Weight used for boosting training.
+  double weight_;
+  // Maximum distance to other samples of same class/font used in computing
+  // the canonical sample.
+  double max_dist_;
+  // Global index of this sample.
+  int sample_index_;
+public:
+  // both are used in training tools
+  // hide after refactoring
+
+  // Indexed/mapped features, as indicated by the bools below.
+  GenericVector<int> mapped_features_;
+  bool features_are_indexed_;
+  bool features_are_mapped_;
+private:
+  // True if the last classification was an error by the current definition.
+  bool is_error_;
+
+  // Randomizing factors.
+  static const int kYShiftValues[kSampleYShiftSize];
+  static const double kScaleValues[kSampleScaleSize];
+};
+
+ELISTIZEH(TrainingSample)
+
+}  // namespace tesseract
+
+#endif  // TESSERACT_TRAINING_TRAININGSAMPLE_H_