diff options
Diffstat (limited to 'leptonica/src/boxfunc2.c')
-rw-r--r-- | leptonica/src/boxfunc2.c | 1933 |
1 files changed, 1933 insertions, 0 deletions
diff --git a/leptonica/src/boxfunc2.c b/leptonica/src/boxfunc2.c new file mode 100644 index 00000000..98f2808a --- /dev/null +++ b/leptonica/src/boxfunc2.c @@ -0,0 +1,1933 @@ +/*====================================================================* + - Copyright (C) 2001 Leptonica. All rights reserved. + - + - Redistribution and use in source and binary forms, with or without + - modification, are permitted provided that the following conditions + - are met: + - 1. Redistributions of source code must retain the above copyright + - notice, this list of conditions and the following disclaimer. + - 2. Redistributions in binary form must reproduce the above + - copyright notice, this list of conditions and the following + - disclaimer in the documentation and/or other materials + - provided with the distribution. + - + - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY + - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *====================================================================*/ + +/*! + * \file boxfunc2.c + * <pre> + * + * Boxa/Box transform (shift, scale) and orthogonal rotation + * BOXA *boxaTransform() + * BOX *boxTransform() + * BOXA *boxaTransformOrdered() + * BOX *boxTransformOrdered() + * BOXA *boxaRotateOrth() + * BOX *boxRotateOrth() + * BOXA *boxaShiftWithPta() + * + * Boxa sort + * BOXA *boxaSort() + * BOXA *boxaBinSort() + * BOXA *boxaSortByIndex() + * BOXAA *boxaSort2d() + * BOXAA *boxaSort2dByIndex() + * + * Boxa statistics + * l_int32 boxaGetRankVals() + * l_int32 boxaGetMedianVals() + * l_int32 boxaGetAverageSize() + * + * Boxa array extraction + * l_int32 boxaExtractAsNuma() + * l_int32 boxaExtractAsPta() + * PTA *boxaExtractCorners() + * + * Other Boxaa functions + * l_int32 boxaaGetExtent() + * BOXA *boxaaFlattenToBoxa() + * BOXA *boxaaFlattenAligned() + * BOXAA *boxaEncapsulateAligned() + * BOXAA *boxaaTranspose() + * l_int32 boxaaAlignBox() + * </pre> + */ + +#ifdef HAVE_CONFIG_H +#include <config_auto.h> +#endif /* HAVE_CONFIG_H */ + +#include <math.h> +#include "allheaders.h" + + /* For more than this number of c.c. in a binarized image of + * semi-perimeter (w + h) about 5000 or less, the O(n) binsort + * is faster than the O(nlogn) shellsort. */ +static const l_int32 MinCompsForBinSort = 200; + +/*---------------------------------------------------------------------* + * Boxa/Box transform (shift, scale) and orthogonal rotation * + *---------------------------------------------------------------------*/ +/*! + * \brief boxaTransform() + * + * \param[in] boxas + * \param[in] shiftx + * \param[in] shifty + * \param[in] scalex + * \param[in] scaley + * \return boxad, or NULL on error + * + * <pre> + * Notes: + * (1) This is a very simple function that first shifts, then scales. + * (2) The UL corner coordinates of all boxes in the output %boxad + * (3) For the boxes in the output %boxad, the UL corner coordinates + * must be non-negative, and the width and height of valid + * boxes must be at least 1. + * </pre> + */ +BOXA * +boxaTransform(BOXA *boxas, + l_int32 shiftx, + l_int32 shifty, + l_float32 scalex, + l_float32 scaley) +{ +l_int32 i, n; +BOX *boxs, *boxd; +BOXA *boxad; + + PROCNAME("boxaTransform"); + + if (!boxas) + return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); + n = boxaGetCount(boxas); + if ((boxad = boxaCreate(n)) == NULL) + return (BOXA *)ERROR_PTR("boxad not made", procName, NULL); + for (i = 0; i < n; i++) { + if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) { + boxaDestroy(&boxad); + return (BOXA *)ERROR_PTR("boxs not found", procName, NULL); + } + boxd = boxTransform(boxs, shiftx, shifty, scalex, scaley); + boxDestroy(&boxs); + boxaAddBox(boxad, boxd, L_INSERT); + } + + return boxad; +} + + +/*! + * \brief boxTransform() + * + * \param[in] box + * \param[in] shiftx + * \param[in] shifty + * \param[in] scalex + * \param[in] scaley + * \return boxd, or NULL on error + * + * <pre> + * Notes: + * (1) This is a very simple function that first shifts, then scales. + * (2) If the box is invalid, a new invalid box is returned. + * (3) The UL corner coordinates must be non-negative, and the + * width and height of valid boxes must be at least 1. + * </pre> + */ +BOX * +boxTransform(BOX *box, + l_int32 shiftx, + l_int32 shifty, + l_float32 scalex, + l_float32 scaley) +{ + PROCNAME("boxTransform"); + + if (!box) + return (BOX *)ERROR_PTR("box not defined", procName, NULL); + if (box->w <= 0 || box->h <= 0) + return boxCreate(0, 0, 0, 0); + else + return boxCreate((l_int32)(L_MAX(0, scalex * (box->x + shiftx) + 0.5)), + (l_int32)(L_MAX(0, scaley * (box->y + shifty) + 0.5)), + (l_int32)(L_MAX(1.0, scalex * box->w + 0.5)), + (l_int32)(L_MAX(1.0, scaley * box->h + 0.5))); +} + + +/*! + * \brief boxaTransformOrdered() + * + * \param[in] boxas + * \param[in] shiftx + * \param[in] shifty + * \param[in] scalex + * \param[in] scaley + * \param[in] xcen, ycen center of rotation + * \param[in] angle in radians; clockwise is positive + * \param[in] order one of 6 combinations: L_TR_SC_RO, ... + * \return boxd, or NULL on error + * + * <pre> + * shift, scaling and rotation, and the order of the + * transforms is specified. + * (2) Although these operations appear to be on an infinite + * 2D plane, in practice the region of interest is clipped + * to a finite image. The center of rotation is usually taken + * with respect to the image (either the UL corner or the + * center). A translation can have two very different effects: + * (a) Moves the boxes across the fixed image region. + * (b) Moves the image origin, causing a change in the image + * region and an opposite effective translation of the boxes. + * This function should only be used for (a), where the image + * region is fixed on translation. If the image region is + * changed by the translation, use instead the functions + * in affinecompose.c, where the image region and rotation + * center can be computed from the actual clipping due to + * translation of the image origin. + * (3) See boxTransformOrdered() for usage and implementation details. + * </pre> + */ +BOXA * +boxaTransformOrdered(BOXA *boxas, + l_int32 shiftx, + l_int32 shifty, + l_float32 scalex, + l_float32 scaley, + l_int32 xcen, + l_int32 ycen, + l_float32 angle, + l_int32 order) +{ +l_int32 i, n; +BOX *boxs, *boxd; +BOXA *boxad; + + PROCNAME("boxaTransformOrdered"); + + if (!boxas) + return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); + n = boxaGetCount(boxas); + if ((boxad = boxaCreate(n)) == NULL) + return (BOXA *)ERROR_PTR("boxad not made", procName, NULL); + for (i = 0; i < n; i++) { + if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) { + boxaDestroy(&boxad); + return (BOXA *)ERROR_PTR("boxs not found", procName, NULL); + } + boxd = boxTransformOrdered(boxs, shiftx, shifty, scalex, scaley, + xcen, ycen, angle, order); + boxDestroy(&boxs); + boxaAddBox(boxad, boxd, L_INSERT); + } + + return boxad; +} + + +/*! + * \brief boxTransformOrdered() + * + * \param[in] boxs + * \param[in] shiftx + * \param[in] shifty + * \param[in] scalex + * \param[in] scaley + * \param[in] xcen, ycen center of rotation + * \param[in] angle in radians; clockwise is positive + * \param[in] order one of 6 combinations: L_TR_SC_RO, ... + * \return boxd, or NULL on error + * + * <pre> + * Notes: + * (1) This allows a sequence of linear transforms, composed of + * shift, scaling and rotation, where the order of the + * transforms is specified. + * (2) The rotation is taken about a point specified by (xcen, ycen). + * Let the components of the vector from the center of rotation + * to the box center be (xdif, ydif): + * xdif = (bx + 0.5 * bw) - xcen + * ydif = (by + 0.5 * bh) - ycen + * Then the box center after rotation has new components: + * bxcen = xcen + xdif * cosa + ydif * sina + * bycen = ycen + ydif * cosa - xdif * sina + * where cosa and sina are the cos and sin of the angle, + * and the enclosing box for the rotated box has size: + * rw = |bw * cosa| + |bh * sina| + * rh = |bh * cosa| + |bw * sina| + * where bw and bh are the unrotated width and height. + * Then the box UL corner (rx, ry) is + * rx = bxcen - 0.5 * rw + * ry = bycen - 0.5 * rh + * (3) The center of rotation specified by args %xcen and %ycen + * is the point BEFORE any translation or scaling. If the + * rotation is not the first operation, this function finds + * the actual center at the time of rotation. It does this + * by making the following assumptions: + * (1) Any scaling is with respect to the UL corner, so + * that the center location scales accordingly. + * (2) A translation does not affect the center of + * the image; it just moves the boxes. + * We always use assumption (1). However, assumption (2) + * will be incorrect if the apparent translation is due + * to a clipping operation that, in effect, moves the + * origin of the image. In that case, you should NOT use + * these simple functions. Instead, use the functions + * in affinecompose.c, where the rotation center can be + * computed from the actual clipping due to translation + * of the image origin. + * </pre> + */ +BOX * +boxTransformOrdered(BOX *boxs, + l_int32 shiftx, + l_int32 shifty, + l_float32 scalex, + l_float32 scaley, + l_int32 xcen, + l_int32 ycen, + l_float32 angle, + l_int32 order) +{ +l_int32 bx, by, bw, bh, tx, ty, tw, th; +l_int32 xcent, ycent; /* transformed center of rotation due to scaling */ +l_float32 sina, cosa, xdif, ydif, rx, ry, rw, rh; +BOX *boxd; + + PROCNAME("boxTransformOrdered"); + + if (!boxs) + return (BOX *)ERROR_PTR("boxs not defined", procName, NULL); + if (order != L_TR_SC_RO && order != L_SC_RO_TR && order != L_RO_TR_SC && + order != L_TR_RO_SC && order != L_RO_SC_TR && order != L_SC_TR_RO) + return (BOX *)ERROR_PTR("order invalid", procName, NULL); + + boxGetGeometry(boxs, &bx, &by, &bw, &bh); + if (bw <= 0 || bh <= 0) /* invalid */ + return boxCreate(0, 0, 0, 0); + if (angle != 0.0) { + sina = sin(angle); + cosa = cos(angle); + } + + if (order == L_TR_SC_RO) { + tx = (l_int32)(scalex * (bx + shiftx) + 0.5); + ty = (l_int32)(scaley * (by + shifty) + 0.5); + tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5)); + th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5)); + xcent = (l_int32)(scalex * xcen + 0.5); + ycent = (l_int32)(scaley * ycen + 0.5); + if (angle == 0.0) { + boxd = boxCreate(tx, ty, tw, th); + } else { + xdif = tx + 0.5 * tw - xcent; + ydif = ty + 0.5 * th - ycent; + rw = L_ABS(tw * cosa) + L_ABS(th * sina); + rh = L_ABS(th * cosa) + L_ABS(tw * sina); + rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw; + ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh; + boxd = boxCreate((l_int32)rx, (l_int32)ry, (l_int32)rw, + (l_int32)rh); + } + } else if (order == L_SC_TR_RO) { + tx = (l_int32)(scalex * bx + shiftx + 0.5); + ty = (l_int32)(scaley * by + shifty + 0.5); + tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5)); + th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5)); + xcent = (l_int32)(scalex * xcen + 0.5); + ycent = (l_int32)(scaley * ycen + 0.5); + if (angle == 0.0) { + boxd = boxCreate(tx, ty, tw, th); + } else { + xdif = tx + 0.5 * tw - xcent; + ydif = ty + 0.5 * th - ycent; + rw = L_ABS(tw * cosa) + L_ABS(th * sina); + rh = L_ABS(th * cosa) + L_ABS(tw * sina); + rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw; + ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh; + boxd = boxCreate((l_int32)rx, (l_int32)ry, (l_int32)rw, + (l_int32)rh); + } + } else if (order == L_RO_TR_SC) { + if (angle == 0.0) { + rx = bx; + ry = by; + rw = bw; + rh = bh; + } else { + xdif = bx + 0.5 * bw - xcen; + ydif = by + 0.5 * bh - ycen; + rw = L_ABS(bw * cosa) + L_ABS(bh * sina); + rh = L_ABS(bh * cosa) + L_ABS(bw * sina); + rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw; + ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh; + } + tx = (l_int32)(scalex * (rx + shiftx) + 0.5); + ty = (l_int32)(scaley * (ry + shifty) + 0.5); + tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5)); + th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5)); + boxd = boxCreate(tx, ty, tw, th); + } else if (order == L_RO_SC_TR) { + if (angle == 0.0) { + rx = bx; + ry = by; + rw = bw; + rh = bh; + } else { + xdif = bx + 0.5 * bw - xcen; + ydif = by + 0.5 * bh - ycen; + rw = L_ABS(bw * cosa) + L_ABS(bh * sina); + rh = L_ABS(bh * cosa) + L_ABS(bw * sina); + rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw; + ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh; + } + tx = (l_int32)(scalex * rx + shiftx + 0.5); + ty = (l_int32)(scaley * ry + shifty + 0.5); + tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5)); + th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5)); + boxd = boxCreate(tx, ty, tw, th); + } else if (order == L_TR_RO_SC) { + tx = bx + shiftx; + ty = by + shifty; + if (angle == 0.0) { + rx = tx; + ry = ty; + rw = bw; + rh = bh; + } else { + xdif = tx + 0.5 * bw - xcen; + ydif = ty + 0.5 * bh - ycen; + rw = L_ABS(bw * cosa) + L_ABS(bh * sina); + rh = L_ABS(bh * cosa) + L_ABS(bw * sina); + rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw; + ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh; + } + tx = (l_int32)(scalex * rx + 0.5); + ty = (l_int32)(scaley * ry + 0.5); + tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5)); + th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5)); + boxd = boxCreate(tx, ty, tw, th); + } else { /* order == L_SC_RO_TR) */ + tx = (l_int32)(scalex * bx + 0.5); + ty = (l_int32)(scaley * by + 0.5); + tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5)); + th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5)); + xcent = (l_int32)(scalex * xcen + 0.5); + ycent = (l_int32)(scaley * ycen + 0.5); + if (angle == 0.0) { + rx = tx; + ry = ty; + rw = tw; + rh = th; + } else { + xdif = tx + 0.5 * tw - xcent; + ydif = ty + 0.5 * th - ycent; + rw = L_ABS(tw * cosa) + L_ABS(th * sina); + rh = L_ABS(th * cosa) + L_ABS(tw * sina); + rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw; + ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh; + } + tx = (l_int32)(rx + shiftx + 0.5); + ty = (l_int32)(ry + shifty + 0.5); + tw = (l_int32)(rw + 0.5); + th = (l_int32)(rh + 0.5); + boxd = boxCreate(tx, ty, tw, th); + } + + return boxd; +} + + +/*! + * \brief boxaRotateOrth() + * + * \param[in] boxas + * \param[in] w, h of image in which the boxa is embedded + * \param[in] rotation 0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg; + * all rotations are clockwise + * \return boxad, or NULL on error + * + * <pre> + * Notes: + * (1) See boxRotateOrth() for details. + * </pre> + */ +BOXA * +boxaRotateOrth(BOXA *boxas, + l_int32 w, + l_int32 h, + l_int32 rotation) +{ +l_int32 i, n; +BOX *boxs, *boxd; +BOXA *boxad; + + PROCNAME("boxaRotateOrth"); + + if (!boxas) + return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); + if (rotation < 0 || rotation > 3) + return (BOXA *)ERROR_PTR("rotation not in {0,1,2,3}", procName, NULL); + if (rotation == 0) + return boxaCopy(boxas, L_COPY); + + n = boxaGetCount(boxas); + if ((boxad = boxaCreate(n)) == NULL) + return (BOXA *)ERROR_PTR("boxad not made", procName, NULL); + for (i = 0; i < n; i++) { + if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) { + boxaDestroy(&boxad); + return (BOXA *)ERROR_PTR("boxs not found", procName, NULL); + } + boxd = boxRotateOrth(boxs, w, h, rotation); + boxDestroy(&boxs); + boxaAddBox(boxad, boxd, L_INSERT); + } + + return boxad; +} + + +/*! + * \brief boxRotateOrth() + * + * \param[in] box + * \param[in] w, h of image in which the box is embedded + * \param[in] rotation 0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg; + * all rotations are clockwise + * \return boxd, or NULL on error + * + * <pre> + * Notes: + * (1) Rotate the image with the embedded box by the specified amount. + * (2) After rotation, the rotated box is always measured with + * respect to the UL corner of the image. + * </pre> + */ +BOX * +boxRotateOrth(BOX *box, + l_int32 w, + l_int32 h, + l_int32 rotation) +{ +l_int32 bx, by, bw, bh, xdist, ydist; + + PROCNAME("boxRotateOrth"); + + if (!box) + return (BOX *)ERROR_PTR("box not defined", procName, NULL); + if (rotation < 0 || rotation > 3) + return (BOX *)ERROR_PTR("rotation not in {0,1,2,3}", procName, NULL); + if (rotation == 0) + return boxCopy(box); + + boxGetGeometry(box, &bx, &by, &bw, &bh); + if (bw <= 0 || bh <= 0) /* invalid */ + return boxCreate(0, 0, 0, 0); + ydist = h - by - bh; /* below box */ + xdist = w - bx - bw; /* to right of box */ + if (rotation == 1) /* 90 deg cw */ + return boxCreate(ydist, bx, bh, bw); + else if (rotation == 2) /* 180 deg cw */ + return boxCreate(xdist, ydist, bw, bh); + else /* rotation == 3, 270 deg cw */ + return boxCreate(by, xdist, bh, bw); +} + + +/*! + * \brief boxaShiftWithPta() + * + * \param[in] boxas + * \param[in] pta aligned with the boxes; determines shift amount + * \param[in] dir +1 to shift by the values in pta; -1 to shift + * by the negative of the values in the pta. + * \return boxad, or NULL on error + * + * <pre> + * Notes: + * (1) In use, %pta may come from the UL corners of of a boxa, each + * of whose boxes contains the corresponding box of %boxas + * within it. The output %boxad is then a boxa in the (global) + * coordinates of the containing boxa. So the input %pta + * could come from boxaExtractCorners(). + * (2) The operations with %dir == 1 and %dir == -1 are inverses if + * called in order (1, -1). Starting with an input boxa and + * calling twice with these values of %dir results in a boxa + * identical to the input. However, because box parameters can + * never be negative, calling in the order (-1, 1) may result + * in clipping at the left side and the top. + * </pre> + */ +BOXA * +boxaShiftWithPta(BOXA *boxas, + PTA *pta, + l_int32 dir) +{ +l_int32 i, n, x, y, full; +BOX *box1, *box2; +BOXA *boxad; + + PROCNAME("boxaShiftWithPta"); + + if (!boxas) + return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); + boxaIsFull(boxas, &full); + if (!full) + return (BOXA *)ERROR_PTR("boxas not full", procName, NULL); + if (!pta) + return (BOXA *)ERROR_PTR("pta not defined", procName, NULL); + if (dir != 1 && dir != -1) + return (BOXA *)ERROR_PTR("invalid dir", procName, NULL); + n = boxaGetCount(boxas); + if (n != ptaGetCount(pta)) + return (BOXA *)ERROR_PTR("boxas and pta not same size", procName, NULL); + + if ((boxad = boxaCreate(n)) == NULL) + return (BOXA *)ERROR_PTR("boxad not made", procName, NULL); + for (i = 0; i < n; i++) { + box1 = boxaGetBox(boxas, i, L_COPY); + ptaGetIPt(pta, i, &x, &y); + box2 = boxTransform(box1, dir * x, dir * y, 1.0, 1.0); + boxaAddBox(boxad, box2, L_INSERT); + boxDestroy(&box1); + } + return boxad; +} + + +/*---------------------------------------------------------------------* + * Boxa sort * + *---------------------------------------------------------------------*/ +/*! + * \brief boxaSort() + * + * \param[in] boxas + * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y, + * L_SORT_BY_RIGHT, L_SORT_BY_BOT, + * L_SORT_BY_WIDTH, L_SORT_BY_HEIGHT, + * L_SORT_BY_MIN_DIMENSION, L_SORT_BY_MAX_DIMENSION, + * L_SORT_BY_PERIMETER, L_SORT_BY_AREA, + * L_SORT_BY_ASPECT_RATIO + * \param[in] sortorder L_SORT_INCREASING, L_SORT_DECREASING + * \param[out] pnaindex [optional] index of sorted order into + * original array + * \return boxad sorted version of boxas, or NULL on error + * + * <pre> + * Notes: + * (1) An empty boxa returns a copy, with a warning. + * </pre> + */ +BOXA * +boxaSort(BOXA *boxas, + l_int32 sorttype, + l_int32 sortorder, + NUMA **pnaindex) +{ +l_int32 i, n, x, y, w, h, size; +BOXA *boxad; +NUMA *na, *naindex; + + PROCNAME("boxaSort"); + + if (pnaindex) *pnaindex = NULL; + if (!boxas) + return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); + if ((n = boxaGetCount(boxas)) == 0) { + L_WARNING("boxas is empty\n", procName); + return boxaCopy(boxas, L_COPY); + } + if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y && + sorttype != L_SORT_BY_RIGHT && sorttype != L_SORT_BY_BOT && + sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT && + sorttype != L_SORT_BY_MIN_DIMENSION && + sorttype != L_SORT_BY_MAX_DIMENSION && + sorttype != L_SORT_BY_PERIMETER && + sorttype != L_SORT_BY_AREA && + sorttype != L_SORT_BY_ASPECT_RATIO) + return (BOXA *)ERROR_PTR("invalid sort type", procName, NULL); + if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) + return (BOXA *)ERROR_PTR("invalid sort order", procName, NULL); + + /* Use O(n) binsort if possible */ + if (n > MinCompsForBinSort && + ((sorttype == L_SORT_BY_X) || (sorttype == L_SORT_BY_Y) || + (sorttype == L_SORT_BY_WIDTH) || (sorttype == L_SORT_BY_HEIGHT) || + (sorttype == L_SORT_BY_PERIMETER))) + return boxaBinSort(boxas, sorttype, sortorder, pnaindex); + + /* Build up numa of specific data */ + if ((na = numaCreate(n)) == NULL) + return (BOXA *)ERROR_PTR("na not made", procName, NULL); + for (i = 0; i < n; i++) { + boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h); + switch (sorttype) + { + case L_SORT_BY_X: + numaAddNumber(na, x); + break; + case L_SORT_BY_Y: + numaAddNumber(na, y); + break; + case L_SORT_BY_RIGHT: + numaAddNumber(na, x + w - 1); + break; + case L_SORT_BY_BOT: + numaAddNumber(na, y + h - 1); + break; + case L_SORT_BY_WIDTH: + numaAddNumber(na, w); + break; + case L_SORT_BY_HEIGHT: + numaAddNumber(na, h); + break; + case L_SORT_BY_MIN_DIMENSION: + size = L_MIN(w, h); + numaAddNumber(na, size); + break; + case L_SORT_BY_MAX_DIMENSION: + size = L_MAX(w, h); + numaAddNumber(na, size); + break; + case L_SORT_BY_PERIMETER: + size = w + h; + numaAddNumber(na, size); + break; + case L_SORT_BY_AREA: + size = w * h; + numaAddNumber(na, size); + break; + case L_SORT_BY_ASPECT_RATIO: + numaAddNumber(na, (l_float32)w / (l_float32)h); + break; + default: + L_WARNING("invalid sort type\n", procName); + } + } + + /* Get the sort index for data array */ + naindex = numaGetSortIndex(na, sortorder); + numaDestroy(&na); + if (!naindex) + return (BOXA *)ERROR_PTR("naindex not made", procName, NULL); + + /* Build up sorted boxa using sort index */ + boxad = boxaSortByIndex(boxas, naindex); + + if (pnaindex) + *pnaindex = naindex; + else + numaDestroy(&naindex); + return boxad; +} + + +/*! + * \brief boxaBinSort() + * + * \param[in] boxas + * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH, + * L_SORT_BY_HEIGHT, L_SORT_BY_PERIMETER + * \param[in] sortorder L_SORT_INCREASING, L_SORT_DECREASING + * \param[out] pnaindex [optional] index of sorted order into + * original array + * \return boxad sorted version of boxas, or NULL on error + * + * <pre> + * Notes: + * (1) For a large number of boxes (say, greater than 1000), this + * O(n) binsort is much faster than the O(nlogn) shellsort. + * For 5000 components, this is over 20x faster than boxaSort(). + * (2) Consequently, boxaSort() calls this function if it will + * likely go much faster. + * </pre> + */ +BOXA * +boxaBinSort(BOXA *boxas, + l_int32 sorttype, + l_int32 sortorder, + NUMA **pnaindex) +{ +l_int32 i, n, x, y, w, h; +BOXA *boxad; +NUMA *na, *naindex; + + PROCNAME("boxaBinSort"); + + if (pnaindex) *pnaindex = NULL; + if (!boxas) + return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); + if ((n = boxaGetCount(boxas)) == 0) { + L_WARNING("boxas is empty\n", procName); + return boxaCopy(boxas, L_COPY); + } + if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y && + sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT && + sorttype != L_SORT_BY_PERIMETER) + return (BOXA *)ERROR_PTR("invalid sort type", procName, NULL); + if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) + return (BOXA *)ERROR_PTR("invalid sort order", procName, NULL); + + /* Generate Numa of appropriate box dimensions */ + if ((na = numaCreate(n)) == NULL) + return (BOXA *)ERROR_PTR("na not made", procName, NULL); + for (i = 0; i < n; i++) { + boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h); + switch (sorttype) + { + case L_SORT_BY_X: + numaAddNumber(na, x); + break; + case L_SORT_BY_Y: + numaAddNumber(na, y); + break; + case L_SORT_BY_WIDTH: + numaAddNumber(na, w); + break; + case L_SORT_BY_HEIGHT: + numaAddNumber(na, h); + break; + case L_SORT_BY_PERIMETER: + numaAddNumber(na, w + h); + break; + default: + L_WARNING("invalid sort type\n", procName); + } + } + + /* Get the sort index for data array */ + naindex = numaGetBinSortIndex(na, sortorder); + numaDestroy(&na); + if (!naindex) + return (BOXA *)ERROR_PTR("naindex not made", procName, NULL); + + /* Build up sorted boxa using the sort index */ + boxad = boxaSortByIndex(boxas, naindex); + + if (pnaindex) + *pnaindex = naindex; + else + numaDestroy(&naindex); + return boxad; +} + + +/*! + * \brief boxaSortByIndex() + * + * \param[in] boxas + * \param[in] naindex na that maps from the new boxa to the input boxa + * \return boxad sorted, or NULL on error + */ +BOXA * +boxaSortByIndex(BOXA *boxas, + NUMA *naindex) +{ +l_int32 i, n, index; +BOX *box; +BOXA *boxad; + + PROCNAME("boxaSortByIndex"); + + if (!boxas) + return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); + if ((n = boxaGetCount(boxas)) == 0) { + L_WARNING("boxas is empty\n", procName); + return boxaCopy(boxas, L_COPY); + } + if (!naindex) + return (BOXA *)ERROR_PTR("naindex not defined", procName, NULL); + + boxad = boxaCreate(n); + for (i = 0; i < n; i++) { + numaGetIValue(naindex, i, &index); + box = boxaGetBox(boxas, index, L_COPY); + boxaAddBox(boxad, box, L_INSERT); + } + + return boxad; +} + + +/*! + * \brief boxaSort2d() + * + * \param[in] boxas + * \param[out] pnaad [optional] numaa with sorted indices + * whose values are the indices of the input array + * \param[in] delta1 min separation that permits aggregation of a box + * onto a boxa of horizontally-aligned boxes; pass 1 + * \param[in] delta2 min separation that permits aggregation of a box + * onto a boxa of horizontally-aligned boxes; pass 2 + * \param[in] minh1 components less than this height either join an + * existing boxa or are set aside for pass 2 + * \return baa 2d sorted version of boxa, or NULL on error + * + * <pre> + * Notes: + * (1) The final result is a sort where the 'fast scan' direction is + * left to right, and the 'slow scan' direction is from top + * to bottom. Each boxa in the baa represents a sorted set + * of boxes from left to right. + * (2) Three passes are used to aggregate the boxas, which can correspond + * to characters or words in a line of text. In pass 1, only + * taller components, which correspond to xheight or larger, + * are permitted to start a new boxa. In pass 2, the remaining + * vertically-challenged components are allowed to join an + * existing boxa or start a new one. In pass 3, boxa whose extent + * is overlapping are joined. After that, the boxes in each + * boxa are sorted horizontally, and finally the boxa are + * sorted vertically. + * (3) If %delta1 > 0, the first pass allows aggregation when + * boxes in the same boxa do not overlap vertically. In fact, + * %delta1 is the max distance by which they can miss and still + * be aggregated. If %delta1 < 0, the box must have vertical + * overlap of at least abs(%delta1) with the boxa before it + * can be merged. Similar for delta2 on the second pass. + * (4) On the first pass, any component of height less than minh1 + * cannot start a new boxa; it's put aside for later insertion. + * (5) On the second pass, any small component that doesn't align + * with an existing boxa can start a new one. + * (6) This can be used to identify lines of text from + * character or word bounding boxes. + * (7) Typical values for the input parameters on 300 ppi text are: + * delta1 ~ 0 + * delta2 ~ 0 + * minh1 ~ 5 + * </pre> + */ +BOXAA * +boxaSort2d(BOXA *boxas, + NUMAA **pnaad, + l_int32 delta1, + l_int32 delta2, + l_int32 minh1) +{ +l_int32 i, index, h, nt, ne, n, m, ival; +BOX *box; +BOXA *boxa, *boxae, *boxan, *boxa1, *boxa2, *boxa3, *boxav, *boxavs; +BOXAA *baa, *baa1, *baad; +NUMA *naindex, *nae, *nan, *nah, *nav, *na1, *na2, *nad, *namap; +NUMAA *naa, *naa1, *naad; + + PROCNAME("boxaSort2d"); + + if (pnaad) *pnaad = NULL; + if (!boxas) + return (BOXAA *)ERROR_PTR("boxas not defined", procName, NULL); + if (boxaGetCount(boxas) == 0) + return (BOXAA *)ERROR_PTR("boxas is empty", procName, NULL); + + /* Sort from left to right */ + if ((boxa = boxaSort(boxas, L_SORT_BY_X, L_SORT_INCREASING, &naindex)) + == NULL) + return (BOXAA *)ERROR_PTR("boxa not made", procName, NULL); + + /* First pass: assign taller boxes to boxa by row */ + nt = boxaGetCount(boxa); + baa = boxaaCreate(0); + naa = numaaCreate(0); + boxae = boxaCreate(0); /* save small height boxes here */ + nae = numaCreate(0); /* keep track of small height boxes */ + for (i = 0; i < nt; i++) { + box = boxaGetBox(boxa, i, L_CLONE); + boxGetGeometry(box, NULL, NULL, NULL, &h); + if (h < minh1) { /* save for 2nd pass */ + boxaAddBox(boxae, box, L_INSERT); + numaAddNumber(nae, i); + } else { + n = boxaaGetCount(baa); + boxaaAlignBox(baa, box, delta1, &index); + if (index < n) { /* append to an existing boxa */ + boxaaAddBox(baa, index, box, L_INSERT); + } else { /* doesn't align, need new boxa */ + boxan = boxaCreate(0); + boxaAddBox(boxan, box, L_INSERT); + boxaaAddBoxa(baa, boxan, L_INSERT); + nan = numaCreate(0); + numaaAddNuma(naa, nan, L_INSERT); + } + numaGetIValue(naindex, i, &ival); + numaaAddNumber(naa, index, ival); + } + } + boxaDestroy(&boxa); + numaDestroy(&naindex); + + /* Second pass: feed in small height boxes */ + ne = boxaGetCount(boxae); + for (i = 0; i < ne; i++) { + box = boxaGetBox(boxae, i, L_CLONE); + n = boxaaGetCount(baa); + boxaaAlignBox(baa, box, delta2, &index); + if (index < n) { /* append to an existing boxa */ + boxaaAddBox(baa, index, box, L_INSERT); + } else { /* doesn't align, need new boxa */ + boxan = boxaCreate(0); + boxaAddBox(boxan, box, L_INSERT); + boxaaAddBoxa(baa, boxan, L_INSERT); + nan = numaCreate(0); + numaaAddNuma(naa, nan, L_INSERT); + } + numaGetIValue(nae, i, &ival); /* location in original boxas */ + numaaAddNumber(naa, index, ival); + } + + /* Third pass: merge some boxa whose extent is overlapping. + * Think of these boxa as text lines, where the bounding boxes + * of the text lines can overlap, but likely won't have + * a huge overlap. + * First do a greedy find of pairs of overlapping boxa, where + * the two boxa overlap by at least 50% of the smaller, and + * the smaller is not more than half the area of the larger. + * For such pairs, call the larger one the primary boxa. The + * boxes in the smaller one are appended to those in the primary + * in pass 3a, and the primaries are extracted in pass 3b. + * In this way, all boxes in the original baa are saved. */ + n = boxaaGetCount(baa); + boxaaGetExtent(baa, NULL, NULL, NULL, &boxa3); + boxa1 = boxaHandleOverlaps(boxa3, L_REMOVE_SMALL, 1000, 0.5, 0.5, &namap); + boxaDestroy(&boxa1); + boxaDestroy(&boxa3); + for (i = 0; i < n; i++) { /* Pass 3a: join selected copies of boxa */ + numaGetIValue(namap, i, &ival); + if (ival >= 0) { /* join current to primary boxa[ival] */ + boxa1 = boxaaGetBoxa(baa, i, L_COPY); + boxa2 = boxaaGetBoxa(baa, ival, L_CLONE); + boxaJoin(boxa2, boxa1, 0, -1); + boxaDestroy(&boxa2); + boxaDestroy(&boxa1); + na1 = numaaGetNuma(naa, i, L_COPY); + na2 = numaaGetNuma(naa, ival, L_CLONE); + numaJoin(na2, na1, 0, -1); + numaDestroy(&na1); + numaDestroy(&na2); + } + } + baa1 = boxaaCreate(n); + naa1 = numaaCreate(n); + for (i = 0; i < n; i++) { /* Pass 3b: save primary boxa */ + numaGetIValue(namap, i, &ival); + if (ival == -1) { + boxa1 = boxaaGetBoxa(baa, i, L_CLONE); + boxaaAddBoxa(baa1, boxa1, L_INSERT); + na1 = numaaGetNuma(naa, i, L_CLONE); + numaaAddNuma(naa1, na1, L_INSERT); + } + } + numaDestroy(&namap); + boxaaDestroy(&baa); + baa = baa1; + numaaDestroy(&naa); + naa = naa1; + + /* Sort the boxes in each boxa horizontally */ + m = boxaaGetCount(baa); + for (i = 0; i < m; i++) { + boxa1 = boxaaGetBoxa(baa, i, L_CLONE); + boxa2 = boxaSort(boxa1, L_SORT_BY_X, L_SORT_INCREASING, &nah); + boxaaReplaceBoxa(baa, i, boxa2); + na1 = numaaGetNuma(naa, i, L_CLONE); + na2 = numaSortByIndex(na1, nah); + numaaReplaceNuma(naa, i, na2); + boxaDestroy(&boxa1); + numaDestroy(&na1); + numaDestroy(&nah); + } + + /* Sort the boxa vertically within boxaa, using the first box + * in each boxa. */ + m = boxaaGetCount(baa); + boxav = boxaCreate(m); /* holds first box in each boxa in baa */ + naad = numaaCreate(m); + if (pnaad) + *pnaad = naad; + baad = boxaaCreate(m); + for (i = 0; i < m; i++) { + boxa1 = boxaaGetBoxa(baa, i, L_CLONE); + box = boxaGetBox(boxa1, 0, L_CLONE); + boxaAddBox(boxav, box, L_INSERT); + boxaDestroy(&boxa1); + } + boxavs = boxaSort(boxav, L_SORT_BY_Y, L_SORT_INCREASING, &nav); + for (i = 0; i < m; i++) { + numaGetIValue(nav, i, &index); + boxa = boxaaGetBoxa(baa, index, L_CLONE); + boxaaAddBoxa(baad, boxa, L_INSERT); + nad = numaaGetNuma(naa, index, L_CLONE); + numaaAddNuma(naad, nad, L_INSERT); + } + + +/* lept_stderr("box count = %d, numaa count = %d\n", nt, + numaaGetNumberCount(naad)); */ + + boxaaDestroy(&baa); + boxaDestroy(&boxav); + boxaDestroy(&boxavs); + boxaDestroy(&boxae); + numaDestroy(&nav); + numaDestroy(&nae); + numaaDestroy(&naa); + if (!pnaad) + numaaDestroy(&naad); + + return baad; +} + + +/*! + * \brief boxaSort2dByIndex() + * + * \param[in] boxas + * \param[in] naa numaa that maps from the new baa to the input boxa + * \return baa sorted boxaa, or NULL on error + */ +BOXAA * +boxaSort2dByIndex(BOXA *boxas, + NUMAA *naa) +{ +l_int32 ntot, boxtot, i, j, n, nn, index; +BOX *box; +BOXA *boxa; +BOXAA *baa; +NUMA *na; + + PROCNAME("boxaSort2dByIndex"); + + if (!boxas) + return (BOXAA *)ERROR_PTR("boxas not defined", procName, NULL); + if ((boxtot = boxaGetCount(boxas)) == 0) + return (BOXAA *)ERROR_PTR("boxas is empty", procName, NULL); + if (!naa) + return (BOXAA *)ERROR_PTR("naindex not defined", procName, NULL); + + /* Check counts */ + ntot = numaaGetNumberCount(naa); + if (ntot != boxtot) + return (BOXAA *)ERROR_PTR("element count mismatch", procName, NULL); + + n = numaaGetCount(naa); + baa = boxaaCreate(n); + for (i = 0; i < n; i++) { + na = numaaGetNuma(naa, i, L_CLONE); + nn = numaGetCount(na); + boxa = boxaCreate(nn); + for (j = 0; j < nn; j++) { + numaGetIValue(na, i, &index); + box = boxaGetBox(boxas, index, L_COPY); + boxaAddBox(boxa, box, L_INSERT); + } + boxaaAddBoxa(baa, boxa, L_INSERT); + numaDestroy(&na); + } + + return baa; +} + + +/*---------------------------------------------------------------------* + * Boxa array extraction * + *---------------------------------------------------------------------*/ +/*! + * \brief boxaExtractAsNuma() + * + * \param[in] boxa + * \param[out] pnal [optional] array of left locations + * \param[out] pnat [optional] array of top locations + * \param[out] pnar [optional] array of right locations + * \param[out] pnab [optional] array of bottom locations + * \param[out] pnaw [optional] array of widths + * \param[out] pnah [optional] array of heights + * \param[in] keepinvalid 1 to keep invalid boxes; 0 to remove them + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) If you are counting or sorting values, such as determining + * rank order, you must remove invalid boxes. + * (2) If you are parametrizing the values, or doing an evaluation + * where the position in the boxa sequence is important, you + * must replace the invalid boxes with valid ones before + * doing the extraction. This is easily done with boxaFillSequence(). + * </pre> + */ +l_ok +boxaExtractAsNuma(BOXA *boxa, + NUMA **pnal, + NUMA **pnat, + NUMA **pnar, + NUMA **pnab, + NUMA **pnaw, + NUMA **pnah, + l_int32 keepinvalid) +{ +l_int32 i, n, left, top, right, bot, w, h; + + PROCNAME("boxaExtractAsNuma"); + + if (!pnal && !pnat && !pnar && !pnab && !pnaw && !pnah) + return ERROR_INT("no output requested", procName, 1); + if (pnal) *pnal = NULL; + if (pnat) *pnat = NULL; + if (pnar) *pnar = NULL; + if (pnab) *pnab = NULL; + if (pnaw) *pnaw = NULL; + if (pnah) *pnah = NULL; + if (!boxa) + return ERROR_INT("boxa not defined", procName, 1); + if (!keepinvalid && boxaGetValidCount(boxa) == 0) + return ERROR_INT("no valid boxes", procName, 1); + + n = boxaGetCount(boxa); + if (pnal) *pnal = numaCreate(n); + if (pnat) *pnat = numaCreate(n); + if (pnar) *pnar = numaCreate(n); + if (pnab) *pnab = numaCreate(n); + if (pnaw) *pnaw = numaCreate(n); + if (pnah) *pnah = numaCreate(n); + for (i = 0; i < n; i++) { + boxaGetBoxGeometry(boxa, i, &left, &top, &w, &h); + if (!keepinvalid && (w <= 0 || h <= 0)) + continue; + right = left + w - 1; + bot = top + h - 1; + if (pnal) numaAddNumber(*pnal, left); + if (pnat) numaAddNumber(*pnat, top); + if (pnar) numaAddNumber(*pnar, right); + if (pnab) numaAddNumber(*pnab, bot); + if (pnaw) numaAddNumber(*pnaw, w); + if (pnah) numaAddNumber(*pnah, h); + } + + return 0; +} + + +/*! + * \brief boxaExtractAsPta() + * + * \param[in] boxa + * \param[out] pptal [optional] array of left locations vs. index + * \param[out] pptat [optional] array of top locations vs. index + * \param[out] pptar [optional] array of right locations vs. index + * \param[out] pptab [optional] array of bottom locations vs. index + * \param[out] pptaw [optional] array of widths vs. index + * \param[out] pptah [optional] array of heights vs. index + * \param[in] keepinvalid 1 to keep invalid boxes; 0 to remove them + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) For most applications, such as counting, sorting, fitting + * to some parametrized form, plotting or filtering in general, + * you should remove the invalid boxes. Each pta saves the + * box index in the x array, so replacing invalid boxes by + * filling with boxaFillSequence(), which is required for + * boxaExtractAsNuma(), is not necessary. + * (2) If invalid boxes are retained, each one will result in + * entries (typically 0) in all selected output pta. + * (3) Other boxa --> pta functions are: + * * boxaExtractCorners(): extracts any of the four corners as a pta. + * * boxaConvertToPta(): extracts sufficient number of corners + * to allow reconstruction of the original boxa from the pta. + * </pre> + */ +l_ok +boxaExtractAsPta(BOXA *boxa, + PTA **pptal, + PTA **pptat, + PTA **pptar, + PTA **pptab, + PTA **pptaw, + PTA **pptah, + l_int32 keepinvalid) +{ +l_int32 i, n, left, top, right, bot, w, h; + + PROCNAME("boxaExtractAsPta"); + + if (!pptal && !pptar && !pptat && !pptab && !pptaw && !pptah) + return ERROR_INT("no output requested", procName, 1); + if (pptal) *pptal = NULL; + if (pptat) *pptat = NULL; + if (pptar) *pptar = NULL; + if (pptab) *pptab = NULL; + if (pptaw) *pptaw = NULL; + if (pptah) *pptah = NULL; + if (!boxa) + return ERROR_INT("boxa not defined", procName, 1); + if (!keepinvalid && boxaGetValidCount(boxa) == 0) + return ERROR_INT("no valid boxes", procName, 1); + + n = boxaGetCount(boxa); + if (pptal) *pptal = ptaCreate(n); + if (pptat) *pptat = ptaCreate(n); + if (pptar) *pptar = ptaCreate(n); + if (pptab) *pptab = ptaCreate(n); + if (pptaw) *pptaw = ptaCreate(n); + if (pptah) *pptah = ptaCreate(n); + for (i = 0; i < n; i++) { + boxaGetBoxGeometry(boxa, i, &left, &top, &w, &h); + if (!keepinvalid && (w <= 0 || h <= 0)) + continue; + right = left + w - 1; + bot = top + h - 1; + if (pptal) ptaAddPt(*pptal, i, left); + if (pptat) ptaAddPt(*pptat, i, top); + if (pptar) ptaAddPt(*pptar, i, right); + if (pptab) ptaAddPt(*pptab, i, bot); + if (pptaw) ptaAddPt(*pptaw, i, w); + if (pptah) ptaAddPt(*pptah, i, h); + } + + return 0; +} + + +/*! + * \brief boxaExtractCorners() + * + * \param[in] boxa + * \param[in] loc L_UPPER_LEFT, L_UPPER_RIGHT, L_LOWER_LEFT, + * L_LOWER_RIGHT, L_BOX_CENTER + * \return pta of requested coordinates, or NULL on error + * + * <pre> + * Notes: + * (1) Extracts (0,0) for invalid boxes. + * (2) Other boxa --> pta functions are: + * * boxaExtractAsPta(): allows extraction of any dimension + * and/or side location, with each in a separate pta. + * * boxaConvertToPta(): extracts sufficient number of corners + * to allow reconstruction of the original boxa from the pta. + * </pre> + */ +PTA * +boxaExtractCorners(BOXA *boxa, + l_int32 loc) +{ +l_int32 i, n, left, top, right, bot, w, h; +PTA *pta; + + PROCNAME("boxaExtractCorners"); + + if (!boxa) + return (PTA *)ERROR_PTR("boxa not defined", procName, NULL); + if (loc != L_UPPER_LEFT && loc != L_UPPER_RIGHT && loc != L_LOWER_LEFT && + loc != L_LOWER_RIGHT && loc != L_BOX_CENTER) + return (PTA *)ERROR_PTR("invalid location", procName, NULL); + + n = boxaGetCount(boxa); + if ((pta = ptaCreate(n)) == NULL) + return (PTA *)ERROR_PTR("pta not made", procName, NULL); + + for (i = 0; i < n; i++) { + boxaGetBoxGeometry(boxa, i, &left, &top, &w, &h); + right = left + w - 1; + bot = top + h - 1; + if (w == 0 || h == 0) { /* invalid */ + left = 0; + top = 0; + right = 0; + bot = 0; + } + if (loc == L_UPPER_LEFT) + ptaAddPt(pta, left, top); + else if (loc == L_UPPER_RIGHT) + ptaAddPt(pta, right, top); + else if (loc == L_LOWER_LEFT) + ptaAddPt(pta, left, bot); + else if (loc == L_LOWER_RIGHT) + ptaAddPt(pta, right, bot); + else if (loc == L_BOX_CENTER) + ptaAddPt(pta, (left + right) / 2, (top + bot) / 2); + } + + return pta; +} + + +/*---------------------------------------------------------------------* + * Boxa statistics * + *---------------------------------------------------------------------*/ +/*! + * \brief boxaGetRankVals() + * + * \param[in] boxa + * \param[in] fract use 0.0 for smallest, 1.0 for largest width and height + * \param[out] px [optional] rank value of x (left side) + * \param[out] py [optional] rank value of y (top side) + * \param[out] pr [optional] rank value of right side + * \param[out] pb [optional] rank value of bottom side + * \param[out] pw [optional] rank value of width + * \param[out] ph [optional] rank value of height + * \return 0 if OK, 1 on error or if the boxa is empty or has no valid boxes + * + * <pre> + * Notes: + * (1) This function does not assume that all boxes in the boxa are valid + * (2) The six box parameters are sorted independently. + * For rank order, the width and height are sorted in increasing + * order. But what does it mean to sort x and y in "rank order"? + * If the boxes are of comparable size and somewhat + * aligned (e.g., from multiple images), it makes some sense + * to give a "rank order" for x and y by sorting them in + * decreasing order. (By the same argument, we choose to sort + * the r and b sides in increasing order.) In general, the + * interpretation of a rank order on x and y (or on r and b) + * is highly application dependent. In summary: + * ~ x and y are sorted in decreasing order + * ~ r and b are sorted in increasing order + * ~ w and h are sorted in increasing order + * </pre> + */ +l_ok +boxaGetRankVals(BOXA *boxa, + l_float32 fract, + l_int32 *px, + l_int32 *py, + l_int32 *pr, + l_int32 *pb, + l_int32 *pw, + l_int32 *ph) +{ +l_float32 xval, yval, rval, bval, wval, hval; +NUMA *nax, *nay, *nar, *nab, *naw, *nah; + + PROCNAME("boxaGetRankVals"); + + if (px) *px = 0; + if (py) *py = 0; + if (pr) *pr = 0; + if (pb) *pb = 0; + if (pw) *pw = 0; + if (ph) *ph = 0; + if (!boxa) + return ERROR_INT("boxa not defined", procName, 1); + if (fract < 0.0 || fract > 1.0) + return ERROR_INT("fract not in [0.0 ... 1.0]", procName, 1); + if (boxaGetValidCount(boxa) == 0) + return ERROR_INT("no valid boxes in boxa", procName, 1); + + /* Use only the valid boxes */ + boxaExtractAsNuma(boxa, &nax, &nay, &nar, &nab, &naw, &nah, 0); + + if (px) { + numaGetRankValue(nax, 1.0 - fract, NULL, 1, &xval); + *px = (l_int32)xval; + } + if (py) { + numaGetRankValue(nay, 1.0 - fract, NULL, 1, &yval); + *py = (l_int32)yval; + } + if (pr) { + numaGetRankValue(nar, fract, NULL, 1, &rval); + *pr = (l_int32)rval; + } + if (pb) { + numaGetRankValue(nab, fract, NULL, 1, &bval); + *pb = (l_int32)bval; + } + if (pw) { + numaGetRankValue(naw, fract, NULL, 1, &wval); + *pw = (l_int32)wval; + } + if (ph) { + numaGetRankValue(nah, fract, NULL, 1, &hval); + *ph = (l_int32)hval; + } + numaDestroy(&nax); + numaDestroy(&nay); + numaDestroy(&nar); + numaDestroy(&nab); + numaDestroy(&naw); + numaDestroy(&nah); + return 0; +} + + +/*! + * \brief boxaGetMedianVals() + * + * \param[in] boxa + * \param[out] px [optional] median value of x (left side) + * \param[out] py [optional] median value of y (top side) + * \param[out] pr [optional] median value of right side + * \param[out] pb [optional] median value of bottom side + * \param[out] pw [optional] median value of width + * \param[out] ph [optional] median value of height + * \return 0 if OK, 1 on error or if the boxa is empty or has no valid boxes + * + * <pre> + * Notes: + * (1) See boxaGetRankVals() + * </pre> + */ +l_ok +boxaGetMedianVals(BOXA *boxa, + l_int32 *px, + l_int32 *py, + l_int32 *pr, + l_int32 *pb, + l_int32 *pw, + l_int32 *ph) +{ + PROCNAME("boxaGetMedianVals"); + + if (!boxa) + return ERROR_INT("boxa not defined", procName, 1); + if (boxaGetValidCount(boxa) == 0) + return ERROR_INT("no valid boxes in boxa", procName, 1); + + return boxaGetRankVals(boxa, 0.5, px, py, pr, pb, pw, ph); +} + + +/*! + * \brief boxaGetAverageSize() + * + * \param[in] boxa + * \param[out] pw [optional] average width + * \param[out] ph [optional] average height + * \return 0 if OK, 1 on error or if the boxa is empty + */ +l_ok +boxaGetAverageSize(BOXA *boxa, + l_float32 *pw, + l_float32 *ph) +{ +l_int32 i, n, bw, bh; +l_float32 sumw, sumh; + + PROCNAME("boxaGetAverageSize"); + + if (pw) *pw = 0.0; + if (ph) *ph = 0.0; + if (!boxa) + return ERROR_INT("boxa not defined", procName, 1); + if ((n = boxaGetCount(boxa)) == 0) + return ERROR_INT("boxa is empty", procName, 1); + + sumw = sumh = 0.0; + for (i = 0; i < n; i++) { + boxaGetBoxGeometry(boxa, i, NULL, NULL, &bw, &bh); + sumw += bw; + sumh += bh; + } + + if (pw) *pw = sumw / n; + if (ph) *ph = sumh / n; + return 0; +} + + +/*---------------------------------------------------------------------* + * Other Boxaa functions * + *---------------------------------------------------------------------*/ +/*! + * \brief boxaaGetExtent() + * + * \param[in] baa + * \param[out] pw [optional] width + * \param[out] ph [optional] height + * \param[out] pbox [optional] minimum box containing all boxa + * in boxaa + * \param[out] pboxa [optional] boxa containing all boxes in each + * boxa in the boxaa + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) The returned w and h are the minimum size image + * that would contain all boxes untranslated. + * (2) Each box in the returned boxa is the minimum box required to + * hold all the boxes in the respective boxa of baa. + * (3) If there are no valid boxes in a boxa, the box corresponding + * to its extent has all fields set to 0 (an invalid box). + * </pre> + */ +l_ok +boxaaGetExtent(BOXAA *baa, + l_int32 *pw, + l_int32 *ph, + BOX **pbox, + BOXA **pboxa) +{ +l_int32 i, n, x, y, w, h, xmax, ymax, xmin, ymin, found; +BOX *box1; +BOXA *boxa, *boxa1; + + PROCNAME("boxaaGetExtent"); + + if (!pw && !ph && !pbox && !pboxa) + return ERROR_INT("no ptrs defined", procName, 1); + if (pw) *pw = 0; + if (ph) *ph = 0; + if (pbox) *pbox = NULL; + if (pboxa) *pboxa = NULL; + if (!baa) + return ERROR_INT("baa not defined", procName, 1); + + n = boxaaGetCount(baa); + if (n == 0) + return ERROR_INT("no boxa in baa", procName, 1); + + boxa = boxaCreate(n); + xmax = ymax = 0; + xmin = ymin = 100000000; + found = FALSE; + for (i = 0; i < n; i++) { + boxa1 = boxaaGetBoxa(baa, i, L_CLONE); + boxaGetExtent(boxa1, NULL, NULL, &box1); + boxaDestroy(&boxa1); + boxGetGeometry(box1, &x, &y, &w, &h); + if (w > 0 && h > 0) { /* a valid extent box */ + found = TRUE; /* found at least one valid extent box */ + xmin = L_MIN(xmin, x); + ymin = L_MIN(ymin, y); + xmax = L_MAX(xmax, x + w); + ymax = L_MAX(ymax, y + h); + } + boxaAddBox(boxa, box1, L_INSERT); + } + if (found == FALSE) /* no valid extent boxes */ + xmin = ymin = 0; + + if (pw) *pw = xmax; + if (ph) *ph = ymax; + if (pbox) + *pbox = boxCreate(xmin, ymin, xmax - xmin, ymax - ymin); + if (pboxa) + *pboxa = boxa; + else + boxaDestroy(&boxa); + return 0; +} + + +/*! + * \brief boxaaFlattenToBoxa() + * + * \param[in] baa + * \param[out] pnaindex [optional] the boxa index in the baa + * \param[in] copyflag L_COPY or L_CLONE + * \return boxa, or NULL on error + * + * <pre> + * Notes: + * (1) This 'flattens' the baa to a boxa, taking the boxes in + * order in the first boxa, then the second, etc. + * (2) If a boxa is empty, we generate an invalid, placeholder box + * of zero size. This is useful when converting from a baa + * where each boxa has either 0 or 1 boxes, and it is necessary + * to maintain a 1:1 correspondence between the initial + * boxa array and the resulting box array. + * (3) If &naindex is defined, we generate a Numa that gives, for + * each box in the baa, the index of the boxa to which it belongs. + * </pre> + */ +BOXA * +boxaaFlattenToBoxa(BOXAA *baa, + NUMA **pnaindex, + l_int32 copyflag) +{ +l_int32 i, j, m, n; +BOXA *boxa, *boxat; +BOX *box; +NUMA *naindex; + + PROCNAME("boxaaFlattenToBoxa"); + + if (pnaindex) *pnaindex = NULL; + if (!baa) + return (BOXA *)ERROR_PTR("baa not defined", procName, NULL); + if (copyflag != L_COPY && copyflag != L_CLONE) + return (BOXA *)ERROR_PTR("invalid copyflag", procName, NULL); + if (pnaindex) { + naindex = numaCreate(0); + *pnaindex = naindex; + } + + n = boxaaGetCount(baa); + boxa = boxaCreate(n); + for (i = 0; i < n; i++) { + boxat = boxaaGetBoxa(baa, i, L_CLONE); + m = boxaGetCount(boxat); + if (m == 0) { /* placeholder box */ + box = boxCreate(0, 0, 0, 0); + boxaAddBox(boxa, box, L_INSERT); + if (pnaindex) + numaAddNumber(naindex, i); /* save 'row' number */ + } else { + for (j = 0; j < m; j++) { + box = boxaGetBox(boxat, j, copyflag); + boxaAddBox(boxa, box, L_INSERT); + if (pnaindex) + numaAddNumber(naindex, i); /* save 'row' number */ + } + } + boxaDestroy(&boxat); + } + + return boxa; +} + + +/*! + * \brief boxaaFlattenAligned() + * + * \param[in] baa + * \param[in] num number extracted from each + * \param[in] fillerbox [optional] that fills if necessary + * \param[in] copyflag L_COPY or L_CLONE + * \return boxa, or NULL on error + * + * <pre> + * Notes: + * (1) This 'flattens' the baa to a boxa, taking the first %num + * boxes from each boxa. + * (2) In each boxa, if there are less than %num boxes, we preserve + * the alignment between the input baa and the output boxa + * by inserting one or more fillerbox(es) or, if %fillerbox == NULL, + * one or more invalid placeholder boxes. + * </pre> + */ +BOXA * +boxaaFlattenAligned(BOXAA *baa, + l_int32 num, + BOX *fillerbox, + l_int32 copyflag) +{ +l_int32 i, j, m, n, mval, nshort; +BOXA *boxat, *boxad; +BOX *box; + + PROCNAME("boxaaFlattenAligned"); + + if (!baa) + return (BOXA *)ERROR_PTR("baa not defined", procName, NULL); + if (copyflag != L_COPY && copyflag != L_CLONE) + return (BOXA *)ERROR_PTR("invalid copyflag", procName, NULL); + + n = boxaaGetCount(baa); + boxad = boxaCreate(n); + for (i = 0; i < n; i++) { + boxat = boxaaGetBoxa(baa, i, L_CLONE); + m = boxaGetCount(boxat); + mval = L_MIN(m, num); + nshort = num - mval; + for (j = 0; j < mval; j++) { /* take the first %num if possible */ + box = boxaGetBox(boxat, j, copyflag); + boxaAddBox(boxad, box, L_INSERT); + } + for (j = 0; j < nshort; j++) { /* add fillers if necessary */ + if (fillerbox) { + boxaAddBox(boxad, fillerbox, L_COPY); + } else { + box = boxCreate(0, 0, 0, 0); /* invalid placeholder box */ + boxaAddBox(boxad, box, L_INSERT); + } + } + boxaDestroy(&boxat); + } + + return boxad; +} + + +/*! + * \brief boxaEncapsulateAligned() + * + * \param[in] boxa + * \param[in] num number put into each boxa in the baa + * \param[in] copyflag L_COPY or L_CLONE + * \return baa, or NULL on error + * + * <pre> + * Notes: + * (1) This puts %num boxes from the input %boxa into each of a + * set of boxa within an output baa. + * (2) This assumes that the boxes in %boxa are in sets of %num each. + * </pre> + */ +BOXAA * +boxaEncapsulateAligned(BOXA *boxa, + l_int32 num, + l_int32 copyflag) +{ +l_int32 i, j, n, nbaa, index; +BOX *box; +BOXA *boxat; +BOXAA *baa; + + PROCNAME("boxaEncapsulateAligned"); + + if (!boxa) + return (BOXAA *)ERROR_PTR("boxa not defined", procName, NULL); + if (copyflag != L_COPY && copyflag != L_CLONE) + return (BOXAA *)ERROR_PTR("invalid copyflag", procName, NULL); + + n = boxaGetCount(boxa); + nbaa = n / num; + if (num * nbaa != n) + L_ERROR("inconsistent alignment: num doesn't divide n\n", procName); + baa = boxaaCreate(nbaa); + for (i = 0, index = 0; i < nbaa; i++) { + boxat = boxaCreate(num); + for (j = 0; j < num; j++, index++) { + box = boxaGetBox(boxa, index, copyflag); + boxaAddBox(boxat, box, L_INSERT); + } + boxaaAddBoxa(baa, boxat, L_INSERT); + } + + return baa; +} + + +/*! + * \brief boxaaTranspose() + * + * \param[in] baas + * \return baad, or NULL on error + * + * <pre> + * Notes: + * (1) If you think of a boxaa as a 2D array of boxes that is accessed + * row major, then each row is represented by one of the boxa. + * This function creates a new boxaa related to the input boxaa + * as a column major traversal of the input boxaa. + * (2) For example, if %baas has 2 boxa, each with 10 boxes, then + * %baad will have 10 boxa, each with 2 boxes. + * (3) Require for this transpose operation that each boxa in + * %baas has the same number of boxes. This operation is useful + * when the i-th boxes in each boxa are meaningfully related. + * </pre> + */ +BOXAA * +boxaaTranspose(BOXAA *baas) +{ +l_int32 i, j, ny, nb, nbox; +BOX *box; +BOXA *boxa; +BOXAA *baad; + + PROCNAME("boxaaTranspose"); + + if (!baas) + return (BOXAA *)ERROR_PTR("baas not defined", procName, NULL); + if ((ny = boxaaGetCount(baas)) == 0) + return (BOXAA *)ERROR_PTR("baas empty", procName, NULL); + + /* Make sure that each boxa in baas has the same number of boxes */ + for (i = 0; i < ny; i++) { + if ((boxa = boxaaGetBoxa(baas, i, L_CLONE)) == NULL) + return (BOXAA *)ERROR_PTR("baas is missing a boxa", procName, NULL); + nb = boxaGetCount(boxa); + boxaDestroy(&boxa); + if (i == 0) + nbox = nb; + else if (nb != nbox) + return (BOXAA *)ERROR_PTR("boxa are not all the same size", + procName, NULL); + } + + /* baad[i][j] = baas[j][i] */ + baad = boxaaCreate(nbox); + for (i = 0; i < nbox; i++) { + boxa = boxaCreate(ny); + for (j = 0; j < ny; j++) { + box = boxaaGetBox(baas, j, i, L_COPY); + boxaAddBox(boxa, box, L_INSERT); + } + boxaaAddBoxa(baad, boxa, L_INSERT); + } + return baad; +} + + +/*! + * \brief boxaaAlignBox() + * + * \param[in] baa + * \param[in] box to be aligned with bext boxa in the baa, if possible + * \param[in] delta amount by which consecutive components can miss + * in overlap and still be included in the array + * \param[out] pindex index of boxa with best overlap, or if none match, + * this is the index of the next boxa to be generated + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This is not greedy. It finds the boxa whose vertical + * extent has the closest overlap with the input box. + * </pre> + */ +l_ok +boxaaAlignBox(BOXAA *baa, + BOX *box, + l_int32 delta, + l_int32 *pindex) +{ +l_int32 i, n, m, y, yt, h, ht, ovlp, maxovlp, maxindex; +BOX *boxt; +BOXA *boxa; + + PROCNAME("boxaaAlignBox"); + + if (pindex) *pindex = 0; + if (!baa) + return ERROR_INT("baa not defined", procName, 1); + if (!box) + return ERROR_INT("box not defined", procName, 1); + if (!pindex) + return ERROR_INT("&index not defined", procName, 1); + + n = boxaaGetCount(baa); + boxGetGeometry(box, NULL, &y, NULL, &h); + maxovlp = -10000000; + for (i = 0; i < n; i++) { + boxa = boxaaGetBoxa(baa, i, L_CLONE); + if ((m = boxaGetCount(boxa)) == 0) { + boxaDestroy(&boxa); + L_WARNING("no boxes in boxa\n", procName); + continue; + } + boxaGetExtent(boxa, NULL, NULL, &boxt); + boxGetGeometry(boxt, NULL, &yt, NULL, &ht); + boxDestroy(&boxt); + boxaDestroy(&boxa); + + /* Overlap < 0 means the components do not overlap vertically */ + if (yt >= y) + ovlp = y + h - 1 - yt; + else + ovlp = yt + ht - 1 - y; + if (ovlp > maxovlp) { + maxovlp = ovlp; + maxindex = i; + } + } + + if (maxovlp + delta >= 0) + *pindex = maxindex; + else + *pindex = n; + return 0; +} |