1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
/*====================================================================*
- Copyright (C) 2001 Leptonica. All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
- 1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- 2. Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following
- disclaimer in the documentation and/or other materials
- provided with the distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*====================================================================*/
/*
* croptext.c
*
* Simple program that crops text pages to a given border
*
* Syntax:
* croptext dirin border dirout
* where
* border = number of pixels added on each side (e.g., 50)
*
* The output file name has the same tail as the input file name.
* If dirout is the same as dirin, you overwrite the input files.
*/
#ifdef HAVE_CONFIG_H
#include <config_auto.h>
#endif /* HAVE_CONFIG_H */
#include "allheaders.h"
int main(int argc,
char **argv)
{
char *dirin, *dirout, *infile, *outfile, *tail;
l_int32 i, nfiles, border, x, y, w, h, xb, yb, wb, hb;
BOX *box1, *box2;
BOXA *boxa1, *boxa2;
PIX *pixs, *pixt1, *pixd;
SARRAY *safiles;
static char mainName[] = "croptext";
if (argc != 4)
return ERROR_INT("Syntax: croptext dirin border dirout", mainName, 1);
dirin = argv[1];
border = atoi(argv[2]);
dirout = argv[3];
setLeptDebugOK(1);
safiles = getSortedPathnamesInDirectory(dirin, NULL, 0, 0);
nfiles = sarrayGetCount(safiles);
for (i = 0; i < nfiles; i++) {
infile = sarrayGetString(safiles, i, L_NOCOPY);
splitPathAtDirectory(infile, NULL, &tail);
outfile = genPathname(dirout, tail);
pixs = pixRead(infile);
pixt1 = pixMorphSequence(pixs, "r11 + c10.40 + o5.5 + x4", 0);
boxa1 = pixConnComp(pixt1, NULL, 8);
if (boxaGetCount(boxa1) == 0) {
lept_stderr("Warning: no components on page %s\n", tail);
continue;
}
boxa2 = boxaSort(boxa1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL);
box1 = boxaGetBox(boxa2, 0, L_CLONE);
boxGetGeometry(box1, &x, &y, &w, &h);
xb = L_MAX(0, x - border);
yb = L_MAX(0, y - border);
wb = w + 2 * border;
hb = h + 2 * border;
box2 = boxCreate(xb, yb, wb, hb);
pixd = pixClipRectangle(pixs, box2, NULL);
pixWrite(outfile, pixd, IFF_TIFF_G4);
pixDestroy(&pixs);
pixDestroy(&pixt1);
pixDestroy(&pixd);
boxaDestroy(&boxa1);
boxaDestroy(&boxa2);
}
return 0;
}
|