--- /cise/tmp/ppadala/tidy/include/html.h Fri May 31 17:52:04 2002 +++ include/html.h Tue Jul 2 15:04:28 2002 @@ -653,6 +653,8 @@ Lexer *lexer, Node *node); void PPrintXMLTree(Out *fout, uint mode, uint indent, Lexer *lexer, Node *node); +void PrintSgml(Out *fout, uint mode, uint indent, + Lexer *lexer, Node *node); void PFlushLine(Out *out, uint indent); void PCondFlushLine(Out *out, uint indent); void PrintBody(Out *fout, Lexer *lexer, Node *root); /* Feature request #434940 - fix by Dave Raggett/Ignacio Vazquez-Abrams 21 Jun 01 */ @@ -908,6 +910,8 @@ extern Bool XmlOut; extern Bool xHTML; extern Bool HtmlOut; /* Yes means set explicitly. */ +extern Bool DbSgml; +extern Bool DbXml; extern Bool XmlPi; /* add */ extern Bool XmlPIs; /* assume PIs end with ?> as per XML */ extern Bool XmlSpace; --- /cise/tmp/ppadala/tidy/src/config.c Sun Jul 7 23:24:52 2002 +++ src/config.c Sat Jul 13 18:14:47 2002 @@ -81,6 +81,8 @@ Bool XmlOut = no; /* create output as XML */ Bool xHTML = no; /* output extensible HTML */ Bool HtmlOut = no; /* output plain-old HTML, even for XHTML input. Yes means set explicitly. */ +Bool DbSgml = no; /* output docbook SGML */ +Bool DbXml = no; /* output docbook XML */ Bool XmlPi = no; /* add for XML docs */ Bool RawOut = no; /* avoid mapping values > 127 to entities: not used for anything yet */ Bool UpperCaseTags = no; /* output tags in upper not lower case */ --- /cise/tmp/ppadala/tidy/src/lexer.c Sun Jul 7 23:25:47 2002 +++ src/lexer.c Sat Jul 13 18:14:47 2002 @@ -1674,6 +1674,35 @@ return doctype; } +Bool SetSgmlDocType(Lexer *lexer, Node *root) +{ char *fpi, *sysid; + Node *doctype; + + if (doctype_mode == doctype_user && doctype_str) + { + fpi = doctype_str; + sysid = ""; + } + doctype = FindDocType(root); + if(!doctype) /* The html file doesn't contain doctype */ + if ( !(doctype = NewXhtmlDocTypeNode( root )) ) + return no; + + lexer->txtstart = lexer->txtend = lexer->lexsize; + + /* add public identifier */ + AddStringLiteral(lexer, fpi); + /* add system identifier */ + AddStringLiteral(lexer, sysid); + + lexer->txtend = lexer->lexsize; + + doctype->start = lexer->txtstart; + doctype->end = lexer->txtend; + + return no; +} + Bool SetXHTMLDocType(Lexer *lexer, Node *root) { char *fpi = "", *sysid = "", *dtdsub, *name_space = XHTML_NAMESPACE; /* #578005 - fix by Anonymous 05 Jul 02 */ --- /cise/tmp/ppadala/tidy/src/localize.c Sun Jul 7 23:26:39 2002 +++ src/localize.c Sat Jul 13 18:14:47 2002 @@ -1054,6 +1054,8 @@ tidy_out(out, " -asxml to convert HTML to well formed XHTML\n"); tidy_out(out, " -asxhtml to convert HTML to well formed XHTML\n"); tidy_out(out, " -ashtml to force XHTML to well formed HTML\n"); + tidy_out(out, " -dbsgml to convert HTML to Docbook SGML\n"); + tidy_out(out, " -dbxml to convert HTML to Docbook XML\n"); tidy_out(out, " -slides to burst into slides on H2 elements\n"); /* TRT */ --- /cise/tmp/ppadala/tidy/src/parser.c Sun Jul 7 23:27:35 2002 +++ src/parser.c Sat Jul 13 18:14:47 2002 @@ -501,7 +501,7 @@ TrimTrailingSpace(lexer, element, text); } -static Bool DescendantOf(Node *element, Dict *tag) +Bool DescendantOf(Node *element, Dict *tag) { Node *parent; --- /cise/tmp/ppadala/tidy/src/pprint.c Thu Jul 18 14:21:27 2002 +++ src/pprint.c Tue Jul 30 13:50:53 2002 @@ -36,6 +36,13 @@ static void PPrintPhp(Out *fout, uint indent, Lexer *lexer, Node *node); +/* Tag types to distinguish printing */ +typedef enum { + SgmlTagStart, + SgmlTagEnd +}SgmlTagType; + +extern Bool DescendantOf(Node *element, Dict *tag); #define NORMAL 0 #define PREFORMATTED 1 @@ -1769,6 +1776,634 @@ } } +void PrintSgmlDefault(Out *fout) +{ + char *str = "SGML cannot contain these elements"; + + fprintf(stderr, str); +} + +void PrintSgmlBodyStart(Out *fout, uint indent) +{ + char *str = "
"; + PPrintString(fout, indent, str); +} + +#define DIGIT(c) (c - 48) +#define TOTAL_H 6 +static Bool seen_h[TOTAL_H] = {no, no, no, no, no, no}; + +/* Yuck ugly. FIXME */ +#define SECT(i) (i - startsect) +static startsect = 0; /* We are at level 0(H1) initially */ + +void PrintSgmlBodyEnd(Out *fout, uint indent) +{ int i = TOTAL_H - 1; + char str[10]; + + while(i >= 0) { + if(seen_h[i] == yes) { + if(i == 5) + sprintf(str, ""); + else + sprintf(str, "", SECT(i) + 1); + PPrintString(fout, indent, str); + seen_h[i] = no; + } + --i; + } + + sprintf(str, "
"); + PPrintString(fout, indent, str); +} + +char *GetContent(Lexer *lexer, Node *node) +{ Node *content, *temp_node; + char *str, *temp, c; + Bool flag = no; + int i; + + content = node->content; + + /* Find the tag */ + for (temp_node = content; + temp_node && temp_node->tag != tag_a; + temp_node = temp_node->next) + ; + + if(temp_node == NULL) { /* There is no .. tag */ + /* Discard all elements which are not text nodes */ + temp_node = content; + for (temp_node = content; + temp_node && temp_node->type != TextNode; + temp_node = temp_node->next) + ; + if(temp_node == NULL) { /* There's no TextNode either */ + str = MemAlloc(1); + str[0] = '\0'; + return str; + } + } + content = temp_node; + + if(content->type == TextNode) { + int size = content->end - content->start; + + str = MemAlloc(size + 1); + str[size] = '\0'; + wstrncpy(str, lexer->lexbuf + content->start, size); + } + else if(content->tag == tag_a){ + AttVal *name; + int size; + + name = GetAttrByName(content, "name"); + if(name == NULL) + name = GetAttrByName(content, "href"); + + if(name == NULL) { /* No href or name, let's take empty id */ + size = 0; + str = MemAlloc(size + 1); + str[size] = '\0'; + } + else { + size = wstrlen(name->value); + str = MemAlloc(size + 1); + str[size] = '\0'; + wstrncpy(str, name->value, size); + } + } + + temp = str; + if(str[0] == '#') + flag = yes; + +#define SGML_NAMELEN 44 /* Maximum id namelength */ + + i = 0; + + while(*temp && i < SGML_NAMELEN) { + if(flag) + *temp = *(temp + 1); + if(*temp == ' ') + *temp = '_'; + ++temp; + ++i; + } + *temp = '\0'; + return str; +} + +void PrintSectTag( Out *fout, uint indent, Lexer *lexer, Node *node, + uint startsect) +{ char sectnum = node->element[1]; + char str[100]; + + char *id = GetContent(lexer, node); + + if(sectnum == '6') /* there's no sect6. We can do variety of + things here. may be
.. */ + sprintf(str, "", id); + else + sprintf(str, "<sect%c id=\"%s\"><title>", SECT(sectnum), id); + PPrintString(fout, indent, str); + MemFree(id); +} + +Bool ImmediateDescendantOfHTags(Node *element) +{ Node *parent = element->parent; + + if (strlen(parent->element) == 2 && + parent->element[0] == 'h' && + IsDigit(parent->element[1])) + return yes; + return no; +} + +void PrintSgmlLink(Out *fout, uint indent, Node *node) +{ AttVal *addr; + char str[500]; /* FIXME allocate dynamically later */ + + addr = GetAttrByName(node, "name"); + if(addr == NULL) { + addr = GetAttrByName(node, "href"); + if(!ImmediateDescendantOfHTags(node)) { + if(addr->value[0] == '#') + sprintf(str, "<link linkend=\"%s\">", addr->value + 1); + else + sprintf(str, "<ulink url=\"%s\">", addr->value); + if( !DescendantOf(node, tag_p) && + node->prev && node->prev->type == TextNode) + PPrintString(fout, indent, "<para>"); + PPrintString(fout, indent, str); + } + } + else { + if(!ImmediateDescendantOfHTags(node)) { + if(!DescendantOf(node, tag_p)) + sprintf(str, "<para id=\"%s\">", addr->value); + else /* We cannnot have a <para> inside another <para> */ + sprintf(str, "<anchor id=\"%s\"/>", addr->value); + PPrintString(fout, indent, str); + } + } +} + +void PrintSgmlLinkEnd(Out *fout, uint indent, Node *node) +{ AttVal *addr; + + addr = GetAttrByName(node, "name"); + if(addr == NULL) { + addr = GetAttrByName(node, "href"); + if(!ImmediateDescendantOfHTags(node)) { + if(addr->value[0] == '#') + PPrintString(fout, indent, "</link>"); + else + PPrintString(fout, indent, "</ulink>"); + if( !DescendantOf(node, tag_p) && + node->prev && node->prev->type == TextNode) + PPrintString(fout, indent, "</para>"); + } + } + else { + if(!ImmediateDescendantOfHTags(node)) { + if(!DescendantOf(node, tag_p)) + PPrintString(fout, indent, "</para>"); + /* else + <anchor .. /> has already been placed. no need to + do any thing */ + } + } +} + + +void PrintSgmlTagString(Out *fout, uint mode, uint indent, + SgmlTagType sgmltag_type, char *str) +{ PPrintChar(str[0], mode | CDATA); + if(sgmltag_type == SgmlTagEnd) + PPrintChar('/', mode); + PPrintString(fout, indent, str + 1); +} + +void PrintSgmlList(Lexer *lexer, Out *fout, + uint mode, uint indent, + Node *node) +{ if(node->tag == tag_ul) + PPrintString(fout, indent, "<itemizedlist>"); + else if(node->tag == tag_ol) + PPrintString(fout, indent, "<orderedlist>"); + else if(node->tag == tag_dl) + PPrintString(fout, indent, "<variablelist>"); +} + +void PrintSgmlListEnd(Lexer *lexer, Out *fout, + uint mode, uint indent, + Node *node) +{ if(node->tag == tag_ul) + PPrintString(fout, indent, "</itemizedlist>"); + else if(node->tag == tag_ol) + PPrintString(fout, indent, "</orderedlist>"); + else if(node->tag == tag_dl) + PPrintString(fout, indent, "</variablelist>"); +} + +void PrintSgmlListItem(Out *fout, uint indent, Node *node) +{ if(node->tag == tag_li) + PPrintString(fout, indent, "<listitem>"); + else if(node->tag == tag_dd) + PPrintString(fout, indent, "<listitem>"); +} + +void PrintSgmlListItemEnd(Out *fout, uint indent, Node *node) +{ if(node->tag == tag_li) + PPrintString(fout, indent, "</listitem>"); + else if(node->tag == tag_dd) + PPrintString(fout, indent, "</listitem></varlistentry>"); +} + +void PrintSgmlImage(Out *fout, uint indent, Node *node) +{ AttVal *addr; + char str[100]; + + addr = GetAttrByName(node, "src"); + /* We can get other attributes like width, height etc.. */ + if(addr != NULL) { + PPrintString(fout, indent, "<inlinemediaobject><imageobject>"); + PCondFlushLine(fout, indent); + sprintf(str, "<imagedata fileref=\"%s\">", addr->value); + PPrintString(fout, indent, str); + PCondFlushLine(fout, indent); + PPrintString(fout, indent, "</imageobject></inlinemediaobject>"); + PCondFlushLine(fout, indent); + } +} + +int CountColumns(Node *node) +{ Node *temp, *row_content; + int ncols = 0; + + temp = node->content; + + /* FIXME */ + /* Perhaps this is not needed, check with HTML standard later */ + while(temp->tag != tag_tr) + temp = temp->next; + + /* This can contain th or td's */ + row_content = temp->content; + while(row_content) { + if(row_content->tag == tag_th || row_content->tag == tag_td) { + AttVal *colspan; + + colspan = GetAttrByName(row_content, "colspan"); + if(colspan) + ncols += atoi(colspan->value); + else + ++ncols; + } + else + fprintf(stderr, "PrintSgml: error in table processing\n"); + row_content = row_content->next; + } + return ncols; +} + +void PrintSgmlTable(Out *fout, uint indent, Node *node) +{ int ncols; + char str[100]; + + PPrintString(fout, indent, "<informaltable>"); + ncols = CountColumns(node); + sprintf(str, "<tgroup cols=\"%d\"><tbody>", ncols); + PPrintString(fout, indent, str); +} + +void PrintSgmlTableEnd(Out *fout, uint indent, Node *node) +{ + PPrintString(fout, indent, "</tbody></tgroup></informaltable>"); +} + +Bool DescendantOfAddress(Node *element) +{ + Node *parent; + + for (parent = element->parent; + parent != null; parent = parent->parent) + { if (parent->element && wstrcasecmp(parent->element, "address") == 0) + return yes; + } + + return no; +} + +void PrintSgmlTag( Out *fout, uint mode, uint indent, Lexer *lexer, Node *node, + SgmlTagType sgmltag_type) +{ static level = -1; + + if(node->tag == tag_html) { + if(sgmltag_type == SgmlTagStart) + PrintSgmlBodyStart(fout, indent); + else if(sgmltag_type == SgmlTagEnd) + PrintSgmlBodyEnd(fout, indent); + } + else if(node->tag == tag_head) + PrintSgmlTagString(fout, mode, indent, sgmltag_type,"<articleinfo>"); + else if(node->tag == tag_title) + PrintSgmlTagString(fout, mode, indent, sgmltag_type,"<title>"); + /* May be we can replace with node->model & CM_LIST */ + else if(node->tag == tag_ul || node->tag == tag_ol || + node->tag == tag_dl) { + if(sgmltag_type == SgmlTagStart) + PrintSgmlList(lexer, fout, mode, indent, node); + else if(sgmltag_type == SgmlTagEnd) + PrintSgmlListEnd(lexer, fout, mode, indent, node); + } + else if(node->tag == tag_dt) { + if(sgmltag_type == SgmlTagStart) + PPrintString(fout, indent, "<varlistentry><term>"); + else if(sgmltag_type == SgmlTagEnd) + PPrintString(fout, indent, "</term>"); + } + else if(node->tag == tag_li || node->tag == tag_dd) { + if(sgmltag_type == SgmlTagStart) + PrintSgmlListItem(fout, indent, node); + else if(sgmltag_type == SgmlTagEnd) + PrintSgmlListItemEnd(fout, indent, node); + } + /* Later we should clean this before coming to PrintSgml */ + else if(node->tag == tag_p && + /* Table <entry> processing */ + !DescendantOf(node, tag_th) && !DescendantOf(node, tag_td) && + !DescendantOfAddress(node)) + PrintSgmlTagString(fout, mode, indent, sgmltag_type, "<para>"); + else if(node->tag == tag_blockquote) + PrintSgmlTagString(fout, mode, indent, sgmltag_type, "<blockquote>"); + else if(node->tag == tag_pre && + /* Table <entry> processing */ + !DescendantOf(node, tag_th) && !DescendantOf(node, tag_td)) + PrintSgmlTagString(fout, mode, indent, sgmltag_type, + "<programlisting>"); + else if(node->tag == tag_a) { + if(sgmltag_type == SgmlTagStart) + PrintSgmlLink(fout, indent, node); + else if(sgmltag_type == SgmlTagEnd) + PrintSgmlLinkEnd(fout, indent, node); + } + /* Table would require more processing */ + else if(node->tag == tag_table) { + if(sgmltag_type == SgmlTagStart) + PrintSgmlTable(fout, indent, node); + else if(sgmltag_type == SgmlTagEnd) + PrintSgmlTableEnd(fout, indent, node); + } + else if(node->tag == tag_tr) + PrintSgmlTagString(fout, mode, indent, sgmltag_type, "<row>"); + else if(node->tag == tag_td || node->tag == tag_th) + PrintSgmlTagString(fout, mode, indent, sgmltag_type, "<entry>"); + else if(node->tag == tag_img) { /* This is a StartEndTag */ + if(sgmltag_type == SgmlTagStart) + PrintSgmlImage(fout, indent, node); + } + + else if(wstrcasecmp(node->element, "cite") == 0) + PrintSgmlTagString(fout, mode, indent, sgmltag_type, + "<citation>"); + /* We should distinguish tag_strong and tag_em later + haven't found proper docbook tag for <strong> */ + else if(node->tag == tag_em || node->tag == tag_strong || + wstrcasecmp(node->element, "address") == 0) { + if(sgmltag_type == SgmlTagStart) { + if(DescendantOf(node, tag_p) || DescendantOf(node, tag_pre)) + PPrintString(fout, indent, "<emphasis>"); + else + PPrintString(fout, indent, "<para><emphasis>"); + } + else if(sgmltag_type == SgmlTagEnd) { + if(DescendantOf(node, tag_p) || DescendantOf(node, tag_pre)) + PPrintString(fout, indent, "</emphasis>"); + else + PPrintString(fout, indent, "</emphasis></para>"); + } + } + else { + if(wstrcasecmp(node->element, "code") == 0 && + !(node->parent->tag == tag_dd || + node->parent->tag == tag_li)) + PrintSgmlTagString(fout, mode, indent, + sgmltag_type, "<literal>"); + else if(strlen(node->element) == 2 && + node->element[0] == 'h' && + IsDigit(node->element[1])) { + if(sgmltag_type == SgmlTagStart) { + int sectnum = DIGIT(node->element[1]) - 1; + char str[10]; + if(seen_h[sectnum] == no) + seen_h[sectnum] = yes; + else { + int i = level; + while(i > sectnum && seen_h[i] == yes) { + if(i == 5) + sprintf(str, "</simplesect>"); + else + sprintf(str, "</sect%d>", SECT(i) + 1); + PPrintString(fout, indent, str); + seen_h[i] = no; + --i; + } + if(sectnum == 5) + sprintf(str, "</simplesect>"); + else + sprintf(str, "</sect%d>", SECT(sectnum) + 1); + PPrintString(fout, indent, str); + } + /* H1 is not the first level + like the curses man2html pages */ + if(level == -1 && sectnum > 0) + startsect = sectnum; + + PrintSectTag(fout, indent, lexer, node, startsect); + level = sectnum; + } + else + PPrintString(fout, indent, ""); + } + } +} + +void PrintSgml( Out *fout, uint mode, uint indent, + Lexer *lexer, Node *node) +{ Node *content; + + if (node == null) + return; + + if (node->type == TextNode) { + if(DescendantOf(node, tag_dd) && !DescendantOf(node, tag_a) && + !DescendantOf(node, tag_p) && + /* We have to descide on this table stuff later + * processing is complex */ + !DescendantOf(node, tag_td) && !DescendantOf(node, tag_th)) + /* && wstrcasecmp(node->parent->element, "code") != 0) + above line may be needed later to properly convert stuff */ + { + PPrintString(fout, indent, ""); + PPrintText(fout, mode, indent, lexer, node->start, node->end); + PPrintString(fout, indent, ""); + } + else { + if(DescendantOf(node, tag_style)) + fprintf(stderr, "PrintSgml: skipping style elements\n\n"); + else + PPrintText(fout, mode, indent, lexer, node->start, node->end); + } + } + else if(node->type == CDATATag && EscapeCdata) + PPrintText(fout, mode, indent, lexer, node->start, node->end); + else if (node->type == CommentTag) + PPrintComment(fout, indent, lexer, node); + else if (node->type == RootNode) + { + for (content = node->content; + content != null; + content = content->next) + PrintSgml(fout, mode, indent, lexer, content); + } + else if (node->type == DocTypeTag) + PPrintDocType(fout, indent, lexer, node); + else if (node->type == CDATATag) + PPrintCDATA(fout, indent, lexer, node); + else if (node->type == SectionTag) + PPrintSection(fout, indent, lexer, node); + else if (node->type == AspTag || + node->type == JsteTag || + node->type == PhpTag ) + PrintSgmlDefault(fout); + else if (node->type == ProcInsTag) + PPrintPI(fout, indent, lexer, node); + else if (node->type == XmlDecl)// && DbXml May be this is needed + PPrintXmlDecl(fout, indent, lexer, node); + else if (node->tag->model & CM_EMPTY || + (node->type == StartEndTag && !xHTML)) + { + if (!(node->tag->model & CM_INLINE)) + PCondFlushLine(fout, indent); + + if (MakeClean && node->tag == tag_wbr) + PPrintString(fout, indent, " "); + else + PrintSgmlTag(fout, mode, indent, lexer, node, SgmlTagStart); + } + else { + if (node->type == StartEndTag) + node->type = StartTag; + + if (node->tag && node->tag->parser == ParsePre) + { + PCondFlushLine(fout, indent); + + indent = 0; + PCondFlushLine(fout, indent); + + PrintSgmlTag(fout, mode, indent, lexer, node, SgmlTagStart); + PFlushLine(fout, indent); + + for (content = node->content; + content != null; + content = content->next) + PrintSgml(fout, (mode | PREFORMATTED | NOWRAP), + indent, lexer, content); + + PCondFlushLine(fout, indent); + PrintSgmlTag(fout, mode, indent, lexer, node, SgmlTagEnd); + PFlushLine(fout, indent); + + if (IndentContent == no && node->next != null) + PFlushLine(fout, indent); + } + else if (node->tag->model & CM_INLINE) + { PrintSgmlTag(fout, mode, indent, lexer, node, SgmlTagStart); + + if (ShouldIndent(node)) + { + PCondFlushLine(fout, indent); + indent += spaces; + + for (content = node->content; + content != null; + content = content->next) + PrintSgml(fout, mode, indent, lexer, content); + + PCondFlushLine(fout, indent); + indent -= spaces; + PCondFlushLine(fout, indent); + } + else + { + + for (content = node->content; + content != null; + content = content->next) + PrintSgml(fout, mode, indent, lexer, content); + } + + PrintSgmlTag(fout, mode, indent, lexer, node, SgmlTagEnd); + } + else + { PCondFlushLine(fout, indent); + if (SmartIndent && node->prev != null) + PFlushLine(fout, indent); + + PrintSgmlTag(fout, mode ,indent, lexer, node, SgmlTagStart); + if (ShouldIndent(node)) + PCondFlushLine(fout, indent); + else if (node->tag->model & CM_HTML || + node->tag == tag_noframes || + (node->tag->model & CM_HEAD && !(node->tag == tag_title))) + PFlushLine(fout, indent); + + if (ShouldIndent(node)) + { PCondFlushLine(fout, indent); + indent += spaces; + + for (content = node->content; + content != null; + content = content->next) + PrintSgml(fout, mode, indent, lexer, content); + PCondFlushLine(fout, indent); + indent -= spaces; + PCondFlushLine(fout, indent); + } + else + { Node *last; + last = null; + for (content = node->content; + content != null; + content = content->next) { + /* kludge for naked text before block level tag */ + if (last && !IndentContent && last->type == TextNode && + content->tag && !(content->tag->model & CM_INLINE) ) + { + /* PFlushLine(fout, indent); */ + PFlushLine(fout, indent); + } + + PrintSgml(fout, mode, + (ShouldIndent(node) ? indent+spaces : indent), + lexer, content); + last = content; + } + } + PrintSgmlTag(fout, mode, indent, lexer, node, SgmlTagEnd); + PFlushLine(fout, indent); + if (IndentContent == no && + node->next != null && + HideEndTags == no && + (node->tag->model & (CM_BLOCK|CM_LIST|CM_DEFLIST|CM_TABLE))) + PFlushLine(fout, indent); + } + } +} + void PPrintTree(Out *fout, uint mode, uint indent, Lexer *lexer, Node *node) { @@ -2034,17 +2669,14 @@ PPrintJste(fout, indent, lexer, node); else if (node->type == PhpTag) PPrintPhp(fout, indent, lexer, node); - else if ( node->tag->model & CM_EMPTY - || (node->type == StartEndTag && !xHTML) ) + else if (node->tag->model & CM_EMPTY || (node->type == StartEndTag && !xHTML)) { PCondFlushLine(fout, indent); PPrintTag(lexer, fout, mode, indent, node); PFlushLine(fout, indent); - /* CPR: folks don't want so much vertical spacing in XML if (node->next) PFlushLine(fout, indent); - */ } else /* some kind of container element */ { @@ -2076,7 +2708,7 @@ PPrintTag(lexer, fout, mode, indent, node); - if ( !mixed && node->content ) + if (!mixed) PFlushLine(fout, indent); for (content = node->content; @@ -2084,16 +2716,14 @@ content = content->next) PPrintXMLTree(fout, mode, cindent, lexer, content); - if ( !mixed && node->content ) + if (!mixed) PCondFlushLine(fout, cindent); PPrintEndTag(fout, mode, indent, node); PCondFlushLine(fout, indent); - /* CPR: folks don't want so much vertical spacing in XML if (node->next) PFlushLine(fout, indent); - */ } } --- /cise/tmp/ppadala/tidy/src/tab2space.c Wed Feb 6 04:09:37 2002 +++ src/tab2space.c Sat Jul 6 23:50:55 2002 @@ -2,7 +2,7 @@ #include #include -#ifndef __BEOS__ +#if !(defined(__BEOS__) || defined(linux)) typedef unsigned int uint; #endif typedef unsigned char byte; --- /cise/tmp/ppadala/tidy/src/tidy.c Sun Jul 7 23:29:25 2002 +++ src/tidy.c Fri Jul 19 01:22:54 2002 @@ -1853,6 +1853,10 @@ IndentContent = yes; SmartIndent = yes; } + else if (wstrcasecmp(arg, "dbsgml") == 0) + DbSgml = yes; + else if(wstrcasecmp(arg, "dbxml") == 0) + DbXml = yes; else if (wstrcasecmp(arg, "omit") == 0) HideEndTags = yes; else if (wstrcasecmp(arg, "upper") == 0) @@ -2180,6 +2184,28 @@ else { lexer->warnings = 0; + + if (DbSgml || DbXml) { + char *str; + + if(DbSgml) + str = "article PUBLIC \"-//OASIS//DTD DocBook V4.1//EN\""; + else + str = "article PUBLIC \"-//OASIS//DTD DocBk XML V4.1.2 //EN\""; + + EncloseBodyText = yes; /* We want those

s */ + EncloseBlockText = yes; + LogicalEmphasis = yes; + DropFontTags = yes; /* .. are not needed */ + + /* May be this should be decided by user */ + QuoteMarks = yes; + + doctype_mode = doctype_user; + /* TidyDeInit does MemFree(doctype_str) if it's != NULL */ + doctype_str = MemAlloc(wstrlen(str)); + wstrcpy(doctype_str, str); + } document = ParseDocument(lexer); @@ -2226,6 +2252,10 @@ { if (xHTML) SetXHTMLDocType(lexer, document); + else if(DbSgml) + SetSgmlDocType(lexer, document); + else if(DbXml) + SetSgmlDocType(lexer, document); else FixDocType(lexer, document); @@ -2247,7 +2277,7 @@ } /* ensure presence of initial */ - if (XmlOut && XmlPi) + if ((XmlOut && XmlPi) || DbXml) FixXmlDecl(lexer, document); /* @@ -2381,9 +2411,12 @@ /* Feature request #434940 - fix by Dave Raggett/Ignacio Vazquez-Abrams 21 Jun 01 */ else if (BodyOnly) PrintBody(&out, lexer, document); - else - PPrintTree(&out, null, 0, lexer, document); - + else { + if(DbSgml || DbXml) + PrintSgml(&out, null, 0, lexer, document); + else + PPrintTree(&out, null, 0, lexer, document); + } PFlushLine(&out, 0); }