summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'extract/src/extract-exe.c')
-rw-r--r--extract/src/extract-exe.c244
1 files changed, 244 insertions, 0 deletions
diff --git a/extract/src/extract-exe.c b/extract/src/extract-exe.c
new file mode 100644
index 00000000..d3ac81d0
--- /dev/null
+++ b/extract/src/extract-exe.c
@@ -0,0 +1,244 @@
+/* Command-line programme for extract_ API. */
+
+#include "../include/extract.h"
+#include "../include/extract_alloc.h"
+
+#include "memento.h"
+#include "outf.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+/* Error-detecting equivalent to *out = argv[++i].
+*/
+static int arg_next_string(char** argv, int argc, int* i, const char** out)
+{
+ if (*i + 1 >= argc) {
+ printf("Expected arg after: %s\n", argv[*i]);
+ errno = EINVAL;
+ return -1;
+ }
+ *i += 1;
+ *out = argv[*i];
+ return 0;
+}
+
+/* Error-detecting equivalent to *out = atoi(argv[++i]).
+*/
+static int arg_next_int(char** argv, int argc, int* i, int* out)
+{
+ if (*i + 1 >= argc) {
+ printf("Expected integer arg after: %s\n", argv[*i]);
+ errno = EINVAL;
+ return -1;
+ }
+ *i += 1;
+ *out = atoi(argv[*i]);
+ return 0;
+}
+
+static void* s_realloc(void* state, void* prev, size_t size)
+{
+ assert(state == (void*) 123);
+ return realloc(prev, size);
+}
+
+int main(int argc, char** argv)
+{
+ int e = -1;
+ const char* docx_out_path = NULL;
+ const char* input_path = NULL;
+ const char* docx_template_path = NULL;
+ const char* content_path = NULL;
+ int preserve_dir = 0;
+ int spacing = 1;
+ int rotation = 1;
+ int autosplit = 0;
+ int images = 1;
+ int alloc_stats = 0;
+ int i;
+
+ extract_alloc_t* alloc = NULL;
+ extract_buffer_t* out_buffer = NULL;
+ extract_buffer_t* intermediate = NULL;
+ extract_t* extract = NULL;
+
+ /* Create an allocator so we test the allocation code. */
+ if (extract_alloc_create(s_realloc, (void*) 123, &alloc))
+ {
+ assert(0);
+ }
+
+ for (i=1; i<argc; ++i) {
+ const char* arg = argv[i];
+ if (!strcmp(arg, "-h") || !strcmp(arg, "--help")) {
+ printf(
+ "Converts intermediate data from mupdf or gs into a docx file.\n"
+ "\n"
+ "We require a file containing XML output from one of these commands:\n"
+ " mutool draw -F xmltext ...\n"
+ " gs -sDEVICE=txtwrite -dTextFormat=4 ...\n"
+ "\n"
+ "We also requires a template docx file.\n"
+ "\n"
+ "Args:\n"
+ " --alloc-exp-min <bytes>\n"
+ " Internal: set exponential allocation with minimum alloc size.\n"
+ " --autosplit 0|1\n"
+ " If 1, we initially split spans when y coordinate changes. This\n"
+ " stresses our handling of spans when input is from mupdf.\n"
+ " -i <intermediate-path>\n"
+ " Path of XML file containing intermediate text spans.\n"
+ " -o <docx-path>\n"
+ " If specified, we generate the specified docx file.\n"
+ " --o-content <path>\n"
+ " If specified, we write raw docx content to <path>; this is the\n"
+ " text that we embed inside the template word/document.xml file\n"
+ " when generating the docx file.\n"
+ " -p 0|1\n"
+ " If 1 and -t <docx-template> is specified, we preserve the\n"
+ " uncompressed <docx-path>.lib/ directory.\n"
+ " -r 0|1\n"
+ " If 1, we we output rotated text inside a rotated drawing. Otherwise\n"
+ " output text is always horizontal.\n"
+ " -s 0|1\n"
+ " If 1, we insert extra vertical space between paragraphs and extra\n"
+ " vertical space between paragraphs that had different ctm matrices\n"
+ " in the original document.\n"
+ " -t <docx-template>\n"
+ " If specified we use <docx-template> as template. Otheerwise we use"
+ " an internal template.\n"
+ " -v <verbose>\n"
+ " Set verbose level.\n"
+ " -v-alloc\n"
+ " Show alloc stats.\n"
+ );
+ if (i + 1 == argc) {
+ e = 0;
+ goto end;
+ }
+ }
+ else if (!strcmp(arg, "--alloc-exp-min")) {
+ int size;
+ if (arg_next_int(argv, argc, &i, &size)) goto end;
+ outf("Calling alloc_set_min_alloc_size(%i)", size);
+ extract_exp_min(extract, size);
+ }
+ else if (!strcmp(arg, "--autosplit")) {
+ if (arg_next_int(argv, argc, &i, &autosplit)) goto end;
+ }
+ else if (!strcmp(arg, "-i")) {
+ if (arg_next_string(argv, argc, &i, &input_path)) goto end;
+ }
+ else if (!strcmp(arg, "-o")) {
+ if (arg_next_string(argv, argc, &i, &docx_out_path)) goto end;
+ }
+ else if (!strcmp(arg, "--o-content")) {
+ if (arg_next_string(argv, argc, &i, &content_path)) goto end;
+ }
+ else if (!strcmp(arg, "-p")) {
+ if (arg_next_int(argv, argc, &i, &preserve_dir)) goto end;
+ }
+ else if (!strcmp(arg, "-r")) {
+ if (arg_next_int(argv, argc, &i, &rotation)) goto end;
+ }
+ else if (!strcmp(arg, "-s")) {
+ if (arg_next_int(argv, argc, &i, &spacing)) goto end;
+ }
+ else if (!strcmp(arg, "-t")) {
+ if (arg_next_string(argv, argc, &i, &docx_template_path)) goto end;
+ }
+ else if (!strcmp(arg, "-v")) {
+ int verbose;
+ if (arg_next_int(argv, argc, &i, &verbose)) goto end;
+ outf_verbose_set(verbose);
+ outf("Have changed verbose to %i", verbose);
+ }
+ else if (!strcmp(arg, "--v-alloc")) {
+ if (arg_next_int(argv, argc, &i, &alloc_stats)) goto end;
+ }
+ else {
+ printf("Unrecognised arg: '%s'\n", arg);
+ errno = EINVAL;
+ goto end;
+ }
+
+ assert(i < argc);
+ }
+
+ if (!input_path) {
+ printf("-i <input-path> not specified.\n");
+ errno = EINVAL;
+ goto end;
+ }
+
+ if (extract_buffer_open_file(alloc, input_path, 0 /*writable*/, &intermediate)) {
+ printf("Failed to open intermediate file: %s\n", input_path);
+ goto end;
+ }
+
+ if (extract_begin(alloc, &extract)) goto end;
+ if (extract_read_intermediate(extract, intermediate, autosplit)) goto end;
+ if (extract_process(extract, spacing, rotation, images)) goto end;
+
+ if (content_path) {
+ if (extract_buffer_open_file(alloc, content_path, 1 /*writable*/, &out_buffer)) goto end;
+ if (extract_write_content(extract, out_buffer)) goto end;
+ if (extract_buffer_close(&out_buffer)) goto end;
+ }
+ if (docx_out_path) {
+ if (docx_template_path) {
+ if (extract_write_template(
+ extract,
+ docx_template_path,
+ docx_out_path,
+ preserve_dir
+ )) {
+ printf("Failed to create docx file: %s\n", docx_out_path);
+ goto end;
+ }
+ }
+ else {
+ if (extract_buffer_open_file(alloc, docx_out_path, 1 /*writable*/, &out_buffer)) goto end;
+ if (extract_write(extract, out_buffer)) {
+ printf("Failed to create docx file: %s\n", docx_out_path);
+ goto end;
+ }
+ if (extract_buffer_close(&out_buffer)) goto end;
+ }
+ }
+
+ e = 0;
+ end:
+
+ extract_buffer_close(&intermediate);
+ extract_buffer_close(&out_buffer);
+ extract_end(&extract);
+
+ if (e) {
+ printf("Failed (errno=%i): %s\n", errno, strerror(errno));
+ return 1;
+ }
+
+ extract_internal_end();
+
+ if (alloc_stats) {
+ extract_alloc_stats_t* stats = extract_alloc_stats(alloc);
+ printf("Alloc stats: num_malloc=%i num_realloc=%i num_free=%i num_libc_realloc=%i\n",
+ stats->num_malloc,
+ stats->num_realloc,
+ stats->num_free,
+ stats->num_libc_realloc
+ );
+ }
+
+ extract_alloc_destroy(&alloc);
+ assert(alloc == NULL);
+
+ printf("Finished.\n");
+ return 0;
+}