diff options
Diffstat (limited to 'leptonica/src/baseline.c')
-rw-r--r-- | leptonica/src/baseline.c | 600 |
1 files changed, 600 insertions, 0 deletions
diff --git a/leptonica/src/baseline.c b/leptonica/src/baseline.c new file mode 100644 index 00000000..bda8df78 --- /dev/null +++ b/leptonica/src/baseline.c @@ -0,0 +1,600 @@ +/*====================================================================* + - Copyright (C) 2001 Leptonica. All rights reserved. + - + - Redistribution and use in source and binary forms, with or without + - modification, are permitted provided that the following conditions + - are met: + - 1. Redistributions of source code must retain the above copyright + - notice, this list of conditions and the following disclaimer. + - 2. Redistributions in binary form must reproduce the above + - copyright notice, this list of conditions and the following + - disclaimer in the documentation and/or other materials + - provided with the distribution. + - + - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY + - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *====================================================================*/ + +/*! + * \file baseline.c + * <pre> + * + * Locate text baselines in an image + * NUMA *pixFindBaselines() + * + * Projective transform to remove local skew + * PIX *pixDeskewLocal() + * + * Determine local skew + * l_int32 pixGetLocalSkewTransform() + * NUMA *pixGetLocalSkewAngles() + * + * We have two apparently different functions here: + * ~ finding baselines + * ~ finding a projective transform to remove keystone warping + * The function pixGetLocalSkewAngles() returns an array of angles, + * one for each raster line, and the baselines of the text lines + * should intersect the left edge of the image with that angle. + * </pre> + */ + +#ifdef HAVE_CONFIG_H +#include <config_auto.h> +#endif /* HAVE_CONFIG_H */ + +#include <math.h> +#include "allheaders.h" + + /* Min to travel after finding max before abandoning peak */ +static const l_int32 MinDistInPeak = 35; + + /* Thresholds for peaks and zeros, relative to the max peak */ +static const l_int32 PeakThresholdRatio = 20; +static const l_int32 ZeroThresholdRatio = 100; + + /* Default values for determining local skew */ +static const l_int32 DefaultSlices = 10; +static const l_int32 DefaultSweepReduction = 2; +static const l_int32 DefaultBsReduction = 1; +static const l_float32 DefaultSweepRange = 5.; /* degrees */ +static const l_float32 DefaultSweepDelta = 1.; /* degrees */ +static const l_float32 DefaultMinbsDelta = 0.01f; /* degrees */ + + /* Overlap slice fraction added to top and bottom of each slice */ +static const l_float32 OverlapFraction = 0.5; + + /* Minimum allowed confidence (ratio) for accepting a value */ +static const l_float32 MinAllowedConfidence = 3.0; + + +/*---------------------------------------------------------------------* + * Locate text baselines in an image * + *---------------------------------------------------------------------*/ +/*! + * \brief pixFindBaselines() + * + * \param[in] pixs 1 bpp, 300 ppi + * \param[out] ppta [optional] pairs of pts corresponding to + * approx. ends of each text line + * \param[in] pixadb for debug output; use NULL to skip + * \return na of baseline y values, or NULL on error + * + * <pre> + * Notes: + * (1) Input binary image must have text lines already aligned + * horizontally. This can be done by either rotating the + * image with pixDeskew(), or, if a projective transform + * is required, by doing pixDeskewLocal() first. + * (2) Input null for &pta if you don't want this returned. + * The pta will come in pairs of points (left and right end + * of each baseline). + * (3) Caution: this will not work properly on text with multiple + * columns, where the lines are not aligned between columns. + * If there are multiple columns, they should be extracted + * separately before finding the baselines. + * (4) This function constructs different types of output + * for baselines; namely, a set of raster line values and + * a set of end points of each baseline. + * (5) This function was designed to handle short and long text lines + * without using dangerous thresholds on the peak heights. It does + * this by combining the differential signal with a morphological + * analysis of the locations of the text lines. One can also + * combine this data to normalize the peak heights, by weighting + * the differential signal in the region of each baseline + * by the inverse of the width of the text line found there. + * </pre> + */ +NUMA * +pixFindBaselines(PIX *pixs, + PTA **ppta, + PIXA *pixadb) +{ +l_int32 h, i, j, nbox, val1, val2, ndiff, bx, by, bw, bh; +l_int32 imaxloc, peakthresh, zerothresh, inpeak; +l_int32 mintosearch, max, maxloc, nloc, locval; +l_int32 *array; +l_float32 maxval; +BOXA *boxa1, *boxa2, *boxa3; +GPLOT *gplot; +NUMA *nasum, *nadiff, *naloc, *naval; +PIX *pix1, *pix2; +PTA *pta; + + PROCNAME("pixFindBaselines"); + + if (ppta) *ppta = NULL; + if (!pixs || pixGetDepth(pixs) != 1) + return (NUMA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); + + /* Close up the text characters, removing noise */ + pix1 = pixMorphSequence(pixs, "c25.1 + e15.1", 0); + + /* Estimate the resolution */ + if (pixadb) pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT); + + /* Save the difference of adjacent row sums. + * The high positive-going peaks are the baselines */ + if ((nasum = pixCountPixelsByRow(pix1, NULL)) == NULL) { + pixDestroy(&pix1); + return (NUMA *)ERROR_PTR("nasum not made", procName, NULL); + } + h = pixGetHeight(pixs); + nadiff = numaCreate(h); + numaGetIValue(nasum, 0, &val2); + for (i = 0; i < h - 1; i++) { + val1 = val2; + numaGetIValue(nasum, i + 1, &val2); + numaAddNumber(nadiff, val1 - val2); + } + numaDestroy(&nasum); + + if (pixadb) { /* show the difference signal */ + lept_mkdir("lept/baseline"); + gplotSimple1(nadiff, GPLOT_PNG, "/tmp/lept/baseline/diff", "Diff Sig"); + pix2 = pixRead("/tmp/lept/baseline/diff.png"); + pixaAddPix(pixadb, pix2, L_INSERT); + } + + /* Use the zeroes of the profile to locate each baseline. */ + array = numaGetIArray(nadiff); + ndiff = numaGetCount(nadiff); + numaGetMax(nadiff, &maxval, &imaxloc); + numaDestroy(&nadiff); + + /* Use this to begin locating a new peak: */ + peakthresh = (l_int32)maxval / PeakThresholdRatio; + /* Use this to begin a region between peaks: */ + zerothresh = (l_int32)maxval / ZeroThresholdRatio; + + naloc = numaCreate(0); + naval = numaCreate(0); + inpeak = FALSE; + for (i = 0; i < ndiff; i++) { + if (inpeak == FALSE) { + if (array[i] > peakthresh) { /* transition to in-peak */ + inpeak = TRUE; + mintosearch = i + MinDistInPeak; /* accept no zeros + * between i and mintosearch */ + max = array[i]; + maxloc = i; + } + } else { /* inpeak == TRUE; look for max */ + if (array[i] > max) { + max = array[i]; + maxloc = i; + mintosearch = i + MinDistInPeak; + } else if (i > mintosearch && array[i] <= zerothresh) { /* leave */ + inpeak = FALSE; + numaAddNumber(naval, max); + numaAddNumber(naloc, maxloc); + } + } + } + LEPT_FREE(array); + + /* If array[ndiff-1] is max, eg. no descenders, baseline at bottom */ + if (inpeak) { + numaAddNumber(naval, max); + numaAddNumber(naloc, maxloc); + } + + if (pixadb) { /* show the raster locations for the peaks */ + gplot = gplotCreate("/tmp/lept/baseline/loc", GPLOT_PNG, "Peak locs", + "rasterline", "height"); + gplotAddPlot(gplot, naloc, naval, GPLOT_POINTS, "locs"); + gplotMakeOutput(gplot); + gplotDestroy(&gplot); + pix2 = pixRead("/tmp/lept/baseline/loc.png"); + pixaAddPix(pixadb, pix2, L_INSERT); + } + numaDestroy(&naval); + + /* Generate an approximate profile of text line width. + * First, filter the boxes of text, where there may be + * more than one box for a given textline. */ + pix2 = pixMorphSequence(pix1, "r11 + c20.1 + o30.1 +c1.3", 0); + if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); + boxa1 = pixConnComp(pix2, NULL, 4); + pixDestroy(&pix1); + pixDestroy(&pix2); + if (boxaGetCount(boxa1) == 0) { + numaDestroy(&naloc); + boxaDestroy(&boxa1); + L_INFO("no compnents after filtering\n", procName); + return NULL; + } + boxa2 = boxaTransform(boxa1, 0, 0, 4., 4.); + boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL); + boxaDestroy(&boxa1); + boxaDestroy(&boxa2); + + /* Optionally, find the baseline segments */ + pta = NULL; + if (ppta) { + pta = ptaCreate(0); + *ppta = pta; + } + if (pta) { + nloc = numaGetCount(naloc); + nbox = boxaGetCount(boxa3); + for (i = 0; i < nbox; i++) { + boxaGetBoxGeometry(boxa3, i, &bx, &by, &bw, &bh); + for (j = 0; j < nloc; j++) { + numaGetIValue(naloc, j, &locval); + if (L_ABS(locval - (by + bh)) > 25) + continue; + ptaAddPt(pta, bx, locval); + ptaAddPt(pta, bx + bw, locval); + break; + } + } + } + boxaDestroy(&boxa3); + + if (pixadb && pta) { /* display baselines */ + l_int32 npts, x1, y1, x2, y2; + pix1 = pixConvertTo32(pixs); + npts = ptaGetCount(pta); + for (i = 0; i < npts; i += 2) { + ptaGetIPt(pta, i, &x1, &y1); + ptaGetIPt(pta, i + 1, &x2, &y2); + pixRenderLineArb(pix1, x1, y1, x2, y2, 2, 255, 0, 0); + } + pixWriteDebug("/tmp/lept/baseline/baselines.png", pix1, IFF_PNG); + pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT); + pixDestroy(&pix1); + } + + return naloc; +} + + +/*---------------------------------------------------------------------* + * Projective transform to remove local skew * + *---------------------------------------------------------------------*/ +/*! + * \brief pixDeskewLocal() + * + * \param[in] pixs 1 bpp + * \param[in] nslices the number of horizontal overlapping slices; + * must be larger than 1 and not exceed 20; + * use 0 for default + * \param[in] redsweep sweep reduction factor: 1, 2, 4 or 8; + * use 0 for default value + * \param[in] redsearch search reduction factor: 1, 2, 4 or 8, and + * not larger than redsweep; use 0 for default value + * \param[in] sweeprange half the full range, assumed about 0; in degrees; + * use 0.0 for default value + * \param[in] sweepdelta angle increment of sweep; in degrees; + * use 0.0 for default value + * \param[in] minbsdelta min binary search increment angle; in degrees; + * use 0.0 for default value + * \return pixd, or NULL on error + * + * <pre> + * Notes: + * (1) This function allows deskew of a page whose skew changes + * approximately linearly with vertical position. It uses + * a projective transform that in effect does a differential + * shear about the LHS of the page, and makes all text lines + * horizontal. + * (2) The origin of the keystoning can be either a cheap document + * feeder that rotates the page as it is passed through, or a + * camera image taken from either the left or right side + * of the vertical. + * (3) The image transformation is a projective warping, + * not a rotation. Apart from this function, the text lines + * must be properly aligned vertically with respect to each + * other. This can be done by pre-processing the page; e.g., + * by rotating or horizontally shearing it. + * Typically, this can be achieved by vertically aligning + * the page edge. + * </pre> + */ +PIX * +pixDeskewLocal(PIX *pixs, + l_int32 nslices, + l_int32 redsweep, + l_int32 redsearch, + l_float32 sweeprange, + l_float32 sweepdelta, + l_float32 minbsdelta) +{ +l_int32 ret; +PIX *pixd; +PTA *ptas, *ptad; + + PROCNAME("pixDeskewLocal"); + + if (!pixs || pixGetDepth(pixs) != 1) + return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); + + /* Skew array gives skew angle (deg) as fctn of raster line + * where it intersects the LHS of the image */ + ret = pixGetLocalSkewTransform(pixs, nslices, redsweep, redsearch, + sweeprange, sweepdelta, minbsdelta, + &ptas, &ptad); + if (ret != 0) + return (PIX *)ERROR_PTR("transform pts not found", procName, NULL); + + /* Use a projective transform */ + pixd = pixProjectiveSampledPta(pixs, ptad, ptas, L_BRING_IN_WHITE); + + ptaDestroy(&ptas); + ptaDestroy(&ptad); + return pixd; +} + + +/*---------------------------------------------------------------------* + * Determine the local skew * + *---------------------------------------------------------------------*/ +/*! + * \brief pixGetLocalSkewTransform() + * + * \param[in] pixs + * \param[in] nslices the number of horizontal overlapping slices; + * must be larger than 1 and not exceed 20; + * use 0 for default + * \param[in] redsweep sweep reduction factor: 1, 2, 4 or 8; + * use 0 for default value + * \param[in] redsearch search reduction factor: 1, 2, 4 or 8, and not + * larger than redsweep; use 0 for default value + * \param[in] sweeprange half the full range, assumed about 0; + * in degrees; use 0.0 for default value + * \param[in] sweepdelta angle increment of sweep; in degrees; + * use 0.0 for default value + * \param[in] minbsdelta min binary search increment angle; in degrees; + * use 0.0 for default value + * \param[out] pptas 4 points in the source + * \param[out] pptad the corresponding 4 pts in the dest + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This generates two pairs of points in the src, each pair + * corresponding to a pair of points that would lie along + * the same raster line in a transformed (dewarped) image. + * (2) The sets of 4 src and 4 dest points returned by this function + * can then be used, in a projective or bilinear transform, + * to remove keystoning in the src. + * </pre> + */ +l_ok +pixGetLocalSkewTransform(PIX *pixs, + l_int32 nslices, + l_int32 redsweep, + l_int32 redsearch, + l_float32 sweeprange, + l_float32 sweepdelta, + l_float32 minbsdelta, + PTA **pptas, + PTA **pptad) +{ +l_int32 w, h, i; +l_float32 deg2rad, angr, angd, dely; +NUMA *naskew; +PTA *ptas, *ptad; + + PROCNAME("pixGetLocalSkewTransform"); + + if (!pptas || !pptad) + return ERROR_INT("&ptas and &ptad not defined", procName, 1); + *pptas = *pptad = NULL; + if (!pixs || pixGetDepth(pixs) != 1) + return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); + if (nslices < 2 || nslices > 20) + nslices = DefaultSlices; + if (redsweep < 1 || redsweep > 8) + redsweep = DefaultSweepReduction; + if (redsearch < 1 || redsearch > redsweep) + redsearch = DefaultBsReduction; + if (sweeprange == 0.0) + sweeprange = DefaultSweepRange; + if (sweepdelta == 0.0) + sweepdelta = DefaultSweepDelta; + if (minbsdelta == 0.0) + minbsdelta = DefaultMinbsDelta; + + naskew = pixGetLocalSkewAngles(pixs, nslices, redsweep, redsearch, + sweeprange, sweepdelta, minbsdelta, + NULL, NULL, 0); + if (!naskew) + return ERROR_INT("naskew not made", procName, 1); + + deg2rad = 3.14159265f / 180.f; + w = pixGetWidth(pixs); + h = pixGetHeight(pixs); + ptas = ptaCreate(4); + ptad = ptaCreate(4); + *pptas = ptas; + *pptad = ptad; + + /* Find i for skew line that intersects LHS at i and RHS at h / 20 */ + for (i = 0; i < h; i++) { + numaGetFValue(naskew, i, &angd); + angr = angd * deg2rad; + dely = w * tan(angr); + if (i - dely > 0.05 * h) + break; + } + ptaAddPt(ptas, 0, i); + ptaAddPt(ptas, w - 1, i - dely); + ptaAddPt(ptad, 0, i); + ptaAddPt(ptad, w - 1, i); + + /* Find i for skew line that intersects LHS at i and RHS at 19h / 20 */ + for (i = h - 1; i > 0; i--) { + numaGetFValue(naskew, i, &angd); + angr = angd * deg2rad; + dely = w * tan(angr); + if (i - dely < 0.95 * h) + break; + } + ptaAddPt(ptas, 0, i); + ptaAddPt(ptas, w - 1, i - dely); + ptaAddPt(ptad, 0, i); + ptaAddPt(ptad, w - 1, i); + + numaDestroy(&naskew); + return 0; +} + + +/*! + * \brief pixGetLocalSkewAngles() + * + * \param[in] pixs 1 bpp + * \param[in] nslices the number of horizontal overlapping slices; + * must be larger than 1 and not exceed 20; + * use 0 for default + * \param[in] redsweep sweep reduction factor: 1, 2, 4 or 8; + * use 0 for default value + * \param[in] redsearch search reduction factor: 1, 2, 4 or 8, and not + * larger than redsweep; use 0 for default value + * \param[in] sweeprange half the full range, assumed about 0; + * in degrees; use 0.0 for default value + * \param[in] sweepdelta angle increment of sweep; in degrees; + * use 0.0 for default value + * \param[in] minbsdelta min binary search increment angle; in degrees; + * use 0.0 for default value + * \param[out] pa [optional] slope of skew as fctn of y + * \param[out] pb [optional] intercept at y = 0 of skew, + 8 as a function of y + * \param[in] debug 1 for generating plot of skew angle vs. y; + * 0 otherwise + * \return naskew, or NULL on error + * + * <pre> + * Notes: + * (1) The local skew is measured in a set of overlapping strips. + * We then do a least square linear fit parameters to get + * the slope and intercept parameters a and b in + * skew-angle = a * y + b (degrees) + * for the local skew as a function of raster line y. + * This is then used to make naskew, which can be interpreted + * as the computed skew angle (in degrees) at the left edge + * of each raster line. + * (2) naskew can then be used to find the baselines of text, because + * each text line has a baseline that should intersect + * the left edge of the image with the angle given by this + * array, evaluated at the raster line of intersection. + * </pre> + */ +NUMA * +pixGetLocalSkewAngles(PIX *pixs, + l_int32 nslices, + l_int32 redsweep, + l_int32 redsearch, + l_float32 sweeprange, + l_float32 sweepdelta, + l_float32 minbsdelta, + l_float32 *pa, + l_float32 *pb, + l_int32 debug) +{ +l_int32 w, h, hs, i, ystart, yend, ovlap, npts; +l_float32 angle, conf, ycenter, a, b; +BOX *box; +GPLOT *gplot; +NUMA *naskew, *nax, *nay; +PIX *pix; +PTA *pta; + + PROCNAME("pixGetLocalSkewAngles"); + + if (!pixs || pixGetDepth(pixs) != 1) + return (NUMA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); + if (nslices < 2 || nslices > 20) + nslices = DefaultSlices; + if (redsweep < 1 || redsweep > 8) + redsweep = DefaultSweepReduction; + if (redsearch < 1 || redsearch > redsweep) + redsearch = DefaultBsReduction; + if (sweeprange == 0.0) + sweeprange = DefaultSweepRange; + if (sweepdelta == 0.0) + sweepdelta = DefaultSweepDelta; + if (minbsdelta == 0.0) + minbsdelta = DefaultMinbsDelta; + + pixGetDimensions(pixs, &w, &h, NULL); + hs = h / nslices; + ovlap = (l_int32)(OverlapFraction * hs); + pta = ptaCreate(nslices); + for (i = 0; i < nslices; i++) { + ystart = L_MAX(0, hs * i - ovlap); + yend = L_MIN(h - 1, hs * (i + 1) + ovlap); + ycenter = (l_float32)(ystart + yend) / 2; + box = boxCreate(0, ystart, w, yend - ystart + 1); + pix = pixClipRectangle(pixs, box, NULL); + pixFindSkewSweepAndSearch(pix, &angle, &conf, redsweep, redsearch, + sweeprange, sweepdelta, minbsdelta); + if (conf > MinAllowedConfidence) + ptaAddPt(pta, ycenter, angle); + pixDestroy(&pix); + boxDestroy(&box); + } + + /* Do linear least squares fit */ + if ((npts = ptaGetCount(pta)) < 2) { + ptaDestroy(&pta); + return (NUMA *)ERROR_PTR("can't fit skew", procName, NULL); + } + ptaGetLinearLSF(pta, &a, &b, NULL); + if (pa) *pa = a; + if (pb) *pb = b; + + /* Make skew angle array as function of raster line */ + naskew = numaCreate(h); + for (i = 0; i < h; i++) { + angle = a * i + b; + numaAddNumber(naskew, angle); + } + + if (debug) { + lept_mkdir("lept/baseline"); + ptaGetArrays(pta, &nax, &nay); + gplot = gplotCreate("/tmp/lept/baseline/skew", GPLOT_PNG, + "skew as fctn of y", "y (in raster lines from top)", + "angle (in degrees)"); + gplotAddPlot(gplot, NULL, naskew, GPLOT_POINTS, "linear lsf"); + gplotAddPlot(gplot, nax, nay, GPLOT_POINTS, "actual data pts"); + gplotMakeOutput(gplot); + gplotDestroy(&gplot); + numaDestroy(&nax); + numaDestroy(&nay); + } + + ptaDestroy(&pta); + return naskew; +} |