diff options
author | Matthew Turk <satai@gentoo.org> | 2003-01-03 05:43:19 +0000 |
---|---|---|
committer | Matthew Turk <satai@gentoo.org> | 2003-01-03 05:43:19 +0000 |
commit | 8091f15a2a06f83c3824d6e9709f03e42bcaf510 (patch) | |
tree | 8d50d21e9f20fef0f7ec12a0a4afa74aca89c255 /app-text | |
parent | Great ebuild and patch for GCC 3.x from Richard Garand <richard@garandnet.net>, (diff) | |
download | gentoo-2-8091f15a2a06f83c3824d6e9709f03e42bcaf510.tar.gz gentoo-2-8091f15a2a06f83c3824d6e9709f03e42bcaf510.tar.bz2 gentoo-2-8091f15a2a06f83c3824d6e9709f03e42bcaf510.zip |
Closing 10515 with patch from ferdy@ferdyx.org to convert HTML to
DocBook/XML and DocBook/SGML.
Diffstat (limited to 'app-text')
-rw-r--r-- | app-text/htmltidy/ChangeLog | 8 | ||||
-rw-r--r-- | app-text/htmltidy/files/digest-htmltidy-2.7.18-r1 | 1 | ||||
-rw-r--r-- | app-text/htmltidy/files/htmltidy-dbpatch.diff | 890 | ||||
-rw-r--r-- | app-text/htmltidy/htmltidy-2.7.18-r1.ebuild | 46 |
4 files changed, 944 insertions, 1 deletions
diff --git a/app-text/htmltidy/ChangeLog b/app-text/htmltidy/ChangeLog index 2a569bb2d426..0be2538eca8c 100644 --- a/app-text/htmltidy/ChangeLog +++ b/app-text/htmltidy/ChangeLog @@ -1,6 +1,12 @@ # ChangeLog for app-text/htmltidy # Copyright 2002 Gentoo Technologies, Inc.; Distributed under the GPL -# $Header: /var/cvsroot/gentoo-x86/app-text/htmltidy/ChangeLog,v 1.4 2002/12/13 10:56:19 blizzy Exp $ +# $Header: /var/cvsroot/gentoo-x86/app-text/htmltidy/ChangeLog,v 1.5 2003/01/03 05:43:19 satai Exp $ + +*htmltidy-2.7.18-r1 (03 Jan 2003) + + 03 Jan 2003; Matthew Turk <satai@gentoo.org> : + Added dbpatch to convert HTML to either Docbook/XML or Docbook/SGML. Thanks + to Ferdy <ferdy@ferdyx.org> for the patch. 06 Dec 2002; Rodney Rees <manson@gentoo.org> : changed sparc ~sparc keywords diff --git a/app-text/htmltidy/files/digest-htmltidy-2.7.18-r1 b/app-text/htmltidy/files/digest-htmltidy-2.7.18-r1 new file mode 100644 index 000000000000..40d794f8de51 --- /dev/null +++ b/app-text/htmltidy/files/digest-htmltidy-2.7.18-r1 @@ -0,0 +1 @@ +MD5 7694dcdb7d451b17477292d60186f477 tidy_src_020718.tgz 153771 diff --git a/app-text/htmltidy/files/htmltidy-dbpatch.diff b/app-text/htmltidy/files/htmltidy-dbpatch.diff new file mode 100644 index 000000000000..f3322e273a72 --- /dev/null +++ b/app-text/htmltidy/files/htmltidy-dbpatch.diff @@ -0,0 +1,890 @@ +--- /cise/tmp/ppadala/tidy/include/html.h Fri May 31 17:52:04 2002 ++++ include/html.h Tue Jul 2 15:04:28 2002 +@@ -653,6 +653,8 @@ + Lexer *lexer, Node *node); + void PPrintXMLTree(Out *fout, uint mode, uint indent, + Lexer *lexer, Node *node); ++void PrintSgml(Out *fout, uint mode, uint indent, ++ Lexer *lexer, Node *node); + void PFlushLine(Out *out, uint indent); + void PCondFlushLine(Out *out, uint indent); + void PrintBody(Out *fout, Lexer *lexer, Node *root); /* Feature request #434940 - fix by Dave Raggett/Ignacio Vazquez-Abrams 21 Jun 01 */ +@@ -908,6 +910,8 @@ + extern Bool XmlOut; + extern Bool xHTML; + extern Bool HtmlOut; /* Yes means set explicitly. */ ++extern Bool DbSgml; ++extern Bool DbXml; + extern Bool XmlPi; /* add <?xml?> */ + extern Bool XmlPIs; /* assume PIs end with ?> as per XML */ + extern Bool XmlSpace; +--- /cise/tmp/ppadala/tidy/src/config.c Sun Jul 7 23:24:52 2002 ++++ src/config.c Sat Jul 13 18:14:47 2002 +@@ -81,6 +81,8 @@ + Bool XmlOut = no; /* create output as XML */ + Bool xHTML = no; /* output extensible HTML */ + Bool HtmlOut = no; /* output plain-old HTML, even for XHTML input. Yes means set explicitly. */ ++Bool DbSgml = no; /* output docbook SGML */ ++Bool DbXml = no; /* output docbook XML */ + Bool XmlPi = no; /* add <?xml?> for XML docs */ + Bool RawOut = no; /* avoid mapping values > 127 to entities: not used for anything yet */ + Bool UpperCaseTags = no; /* output tags in upper not lower case */ +--- /cise/tmp/ppadala/tidy/src/lexer.c Sun Jul 7 23:25:47 2002 ++++ src/lexer.c Sat Jul 13 18:14:47 2002 +@@ -1674,6 +1674,35 @@ + return doctype; + } + ++Bool SetSgmlDocType(Lexer *lexer, Node *root) ++{ char *fpi, *sysid; ++ Node *doctype; ++ ++ if (doctype_mode == doctype_user && doctype_str) ++ { ++ fpi = doctype_str; ++ sysid = ""; ++ } ++ doctype = FindDocType(root); ++ if(!doctype) /* The html file doesn't contain doctype */ ++ if ( !(doctype = NewXhtmlDocTypeNode( root )) ) ++ return no; ++ ++ lexer->txtstart = lexer->txtend = lexer->lexsize; ++ ++ /* add public identifier */ ++ AddStringLiteral(lexer, fpi); ++ /* add system identifier */ ++ AddStringLiteral(lexer, sysid); ++ ++ lexer->txtend = lexer->lexsize; ++ ++ doctype->start = lexer->txtstart; ++ doctype->end = lexer->txtend; ++ ++ return no; ++} ++ + Bool SetXHTMLDocType(Lexer *lexer, Node *root) + { + char *fpi = "", *sysid = "", *dtdsub, *name_space = XHTML_NAMESPACE; /* #578005 - fix by Anonymous 05 Jul 02 */ +--- /cise/tmp/ppadala/tidy/src/localize.c Sun Jul 7 23:26:39 2002 ++++ src/localize.c Sat Jul 13 18:14:47 2002 +@@ -1054,6 +1054,8 @@ + tidy_out(out, " -asxml to convert HTML to well formed XHTML\n"); + tidy_out(out, " -asxhtml to convert HTML to well formed XHTML\n"); + tidy_out(out, " -ashtml to force XHTML to well formed HTML\n"); ++ tidy_out(out, " -dbsgml to convert HTML to Docbook SGML\n"); ++ tidy_out(out, " -dbxml to convert HTML to Docbook XML\n"); + tidy_out(out, " -slides to burst into slides on H2 elements\n"); + + /* TRT */ +--- /cise/tmp/ppadala/tidy/src/parser.c Sun Jul 7 23:27:35 2002 ++++ src/parser.c Sat Jul 13 18:14:47 2002 +@@ -501,7 +501,7 @@ + TrimTrailingSpace(lexer, element, text); + } + +-static Bool DescendantOf(Node *element, Dict *tag) ++Bool DescendantOf(Node *element, Dict *tag) + { + Node *parent; + +--- /cise/tmp/ppadala/tidy/src/pprint.c Thu Jul 18 14:21:27 2002 ++++ src/pprint.c Tue Jul 30 13:50:53 2002 +@@ -6,9 +6,9 @@ + + CVS Info : + +- $Author: satai $ +- $Date: 2003/01/03 05:43:19 $ +- $Revision: 1.1 $ ++ $Author: satai $ ++ $Date: 2003/01/03 05:43:19 $ ++ $Revision: 1.1 $ + + */ + +@@ -36,6 +36,13 @@ + static void PPrintPhp(Out *fout, uint indent, + Lexer *lexer, Node *node); + ++/* Tag types to distinguish printing */ ++typedef enum { ++ SgmlTagStart, ++ SgmlTagEnd ++}SgmlTagType; ++ ++extern Bool DescendantOf(Node *element, Dict *tag); + + #define NORMAL 0 + #define PREFORMATTED 1 +@@ -1769,6 +1776,634 @@ + } + } + ++void PrintSgmlDefault(Out *fout) ++{ ++ char *str = "SGML cannot contain these elements"; ++ ++ fprintf(stderr, str); ++} ++ ++void PrintSgmlBodyStart(Out *fout, uint indent) ++{ ++ char *str = "<article>"; ++ PPrintString(fout, indent, str); ++} ++ ++#define DIGIT(c) (c - 48) ++#define TOTAL_H 6 ++static Bool seen_h[TOTAL_H] = {no, no, no, no, no, no}; ++ ++/* Yuck ugly. FIXME */ ++#define SECT(i) (i - startsect) ++static startsect = 0; /* We are at level 0(H1) initially */ ++ ++void PrintSgmlBodyEnd(Out *fout, uint indent) ++{ int i = TOTAL_H - 1; ++ char str[10]; ++ ++ while(i >= 0) { ++ if(seen_h[i] == yes) { ++ if(i == 5) ++ sprintf(str, "</simpleect>"); ++ else ++ sprintf(str, "</sect%d>", SECT(i) + 1); ++ PPrintString(fout, indent, str); ++ seen_h[i] = no; ++ } ++ --i; ++ } ++ ++ sprintf(str, "</article>"); ++ PPrintString(fout, indent, str); ++} ++ ++char *GetContent(Lexer *lexer, Node *node) ++{ Node *content, *temp_node; ++ char *str, *temp, c; ++ Bool flag = no; ++ int i; ++ ++ content = node->content; ++ ++ /* Find the <a> tag */ ++ for (temp_node = content; ++ temp_node && temp_node->tag != tag_a; ++ temp_node = temp_node->next) ++ ; ++ ++ if(temp_node == NULL) { /* There is no <a> .. </a> tag */ ++ /* Discard all elements which are not text nodes */ ++ temp_node = content; ++ for (temp_node = content; ++ temp_node && temp_node->type != TextNode; ++ temp_node = temp_node->next) ++ ; ++ if(temp_node == NULL) { /* There's no TextNode either */ ++ str = MemAlloc(1); ++ str[0] = '\0'; ++ return str; ++ } ++ } ++ content = temp_node; ++ ++ if(content->type == TextNode) { ++ int size = content->end - content->start; ++ ++ str = MemAlloc(size + 1); ++ str[size] = '\0'; ++ wstrncpy(str, lexer->lexbuf + content->start, size); ++ } ++ else if(content->tag == tag_a){ ++ AttVal *name; ++ int size; ++ ++ name = GetAttrByName(content, "name"); ++ if(name == NULL) ++ name = GetAttrByName(content, "href"); ++ ++ if(name == NULL) { /* No href or name, let's take empty id */ ++ size = 0; ++ str = MemAlloc(size + 1); ++ str[size] = '\0'; ++ } ++ else { ++ size = wstrlen(name->value); ++ str = MemAlloc(size + 1); ++ str[size] = '\0'; ++ wstrncpy(str, name->value, size); ++ } ++ } ++ ++ temp = str; ++ if(str[0] == '#') ++ flag = yes; ++ ++#define SGML_NAMELEN 44 /* Maximum id namelength */ ++ ++ i = 0; ++ ++ while(*temp && i < SGML_NAMELEN) { ++ if(flag) ++ *temp = *(temp + 1); ++ if(*temp == ' ') ++ *temp = '_'; ++ ++temp; ++ ++i; ++ } ++ *temp = '\0'; ++ return str; ++} ++ ++void PrintSectTag( Out *fout, uint indent, Lexer *lexer, Node *node, ++ uint startsect) ++{ char sectnum = node->element[1]; ++ char str[100]; ++ ++ char *id = GetContent(lexer, node); ++ ++ if(sectnum == '6') /* there's no sect6. We can do variety of ++ things here. may be <section> .. */ ++ sprintf(str, "<simplesect id=\"%s\"><title>", id); ++ else ++ sprintf(str, "<sect%c id=\"%s\"><title>", SECT(sectnum), id); ++ PPrintString(fout, indent, str); ++ MemFree(id); ++} ++ ++Bool ImmediateDescendantOfHTags(Node *element) ++{ Node *parent = element->parent; ++ ++ if (strlen(parent->element) == 2 && ++ parent->element[0] == 'h' && ++ IsDigit(parent->element[1])) ++ return yes; ++ return no; ++} ++ ++void PrintSgmlLink(Out *fout, uint indent, Node *node) ++{ AttVal *addr; ++ char str[500]; /* FIXME allocate dynamically later */ ++ ++ addr = GetAttrByName(node, "name"); ++ if(addr == NULL) { ++ addr = GetAttrByName(node, "href"); ++ if(!ImmediateDescendantOfHTags(node)) { ++ if(addr->value[0] == '#') ++ sprintf(str, "<link linkend=\"%s\">", addr->value + 1); ++ else ++ sprintf(str, "<ulink url=\"%s\">", addr->value); ++ if( !DescendantOf(node, tag_p) && ++ node->prev && node->prev->type == TextNode) ++ PPrintString(fout, indent, "<para>"); ++ PPrintString(fout, indent, str); ++ } ++ } ++ else { ++ if(!ImmediateDescendantOfHTags(node)) { ++ if(!DescendantOf(node, tag_p)) ++ sprintf(str, "<para id=\"%s\">", addr->value); ++ else /* We cannnot have a <para> inside another <para> */ ++ sprintf(str, "<anchor id=\"%s\"/>", addr->value); ++ PPrintString(fout, indent, str); ++ } ++ } ++} ++ ++void PrintSgmlLinkEnd(Out *fout, uint indent, Node *node) ++{ AttVal *addr; ++ ++ addr = GetAttrByName(node, "name"); ++ if(addr == NULL) { ++ addr = GetAttrByName(node, "href"); ++ if(!ImmediateDescendantOfHTags(node)) { ++ if(addr->value[0] == '#') ++ PPrintString(fout, indent, "</link>"); ++ else ++ PPrintString(fout, indent, "</ulink>"); ++ if( !DescendantOf(node, tag_p) && ++ node->prev && node->prev->type == TextNode) ++ PPrintString(fout, indent, "</para>"); ++ } ++ } ++ else { ++ if(!ImmediateDescendantOfHTags(node)) { ++ if(!DescendantOf(node, tag_p)) ++ PPrintString(fout, indent, "</para>"); ++ /* else ++ <anchor .. /> has already been placed. no need to ++ do any thing */ ++ } ++ } ++} ++ ++ ++void PrintSgmlTagString(Out *fout, uint mode, uint indent, ++ SgmlTagType sgmltag_type, char *str) ++{ PPrintChar(str[0], mode | CDATA); ++ if(sgmltag_type == SgmlTagEnd) ++ PPrintChar('/', mode); ++ PPrintString(fout, indent, str + 1); ++} ++ ++void PrintSgmlList(Lexer *lexer, Out *fout, ++ uint mode, uint indent, ++ Node *node) ++{ if(node->tag == tag_ul) ++ PPrintString(fout, indent, "<itemizedlist>"); ++ else if(node->tag == tag_ol) ++ PPrintString(fout, indent, "<orderedlist>"); ++ else if(node->tag == tag_dl) ++ PPrintString(fout, indent, "<variablelist>"); ++} ++ ++void PrintSgmlListEnd(Lexer *lexer, Out *fout, ++ uint mode, uint indent, ++ Node *node) ++{ if(node->tag == tag_ul) ++ PPrintString(fout, indent, "</itemizedlist>"); ++ else if(node->tag == tag_ol) ++ PPrintString(fout, indent, "</orderedlist>"); ++ else if(node->tag == tag_dl) ++ PPrintString(fout, indent, "</variablelist>"); ++} ++ ++void PrintSgmlListItem(Out *fout, uint indent, Node *node) ++{ if(node->tag == tag_li) ++ PPrintString(fout, indent, "<listitem>"); ++ else if(node->tag == tag_dd) ++ PPrintString(fout, indent, "<listitem>"); ++} ++ ++void PrintSgmlListItemEnd(Out *fout, uint indent, Node *node) ++{ if(node->tag == tag_li) ++ PPrintString(fout, indent, "</listitem>"); ++ else if(node->tag == tag_dd) ++ PPrintString(fout, indent, "</listitem></varlistentry>"); ++} ++ ++void PrintSgmlImage(Out *fout, uint indent, Node *node) ++{ AttVal *addr; ++ char str[100]; ++ ++ addr = GetAttrByName(node, "src"); ++ /* We can get other attributes like width, height etc.. */ ++ if(addr != NULL) { ++ PPrintString(fout, indent, "<inlinemediaobject><imageobject>"); ++ PCondFlushLine(fout, indent); ++ sprintf(str, "<imagedata fileref=\"%s\">", addr->value); ++ PPrintString(fout, indent, str); ++ PCondFlushLine(fout, indent); ++ PPrintString(fout, indent, "</imageobject></inlinemediaobject>"); ++ PCondFlushLine(fout, indent); ++ } ++} ++ ++int CountColumns(Node *node) ++{ Node *temp, *row_content; ++ int ncols = 0; ++ ++ temp = node->content; ++ ++ /* FIXME */ ++ /* Perhaps this is not needed, check with HTML standard later */ ++ while(temp->tag != tag_tr) ++ temp = temp->next; ++ ++ /* This can contain th or td's */ ++ row_content = temp->content; ++ while(row_content) { ++ if(row_content->tag == tag_th || row_content->tag == tag_td) { ++ AttVal *colspan; ++ ++ colspan = GetAttrByName(row_content, "colspan"); ++ if(colspan) ++ ncols += atoi(colspan->value); ++ else ++ ++ncols; ++ } ++ else ++ fprintf(stderr, "PrintSgml: error in table processing\n"); ++ row_content = row_content->next; ++ } ++ return ncols; ++} ++ ++void PrintSgmlTable(Out *fout, uint indent, Node *node) ++{ int ncols; ++ char str[100]; ++ ++ PPrintString(fout, indent, "<informaltable>"); ++ ncols = CountColumns(node); ++ sprintf(str, "<tgroup cols=\"%d\"><tbody>", ncols); ++ PPrintString(fout, indent, str); ++} ++ ++void PrintSgmlTableEnd(Out *fout, uint indent, Node *node) ++{ ++ PPrintString(fout, indent, "</tbody></tgroup></informaltable>"); ++} ++ ++Bool DescendantOfAddress(Node *element) ++{ ++ Node *parent; ++ ++ for (parent = element->parent; ++ parent != null; parent = parent->parent) ++ { if (parent->element && wstrcasecmp(parent->element, "address") == 0) ++ return yes; ++ } ++ ++ return no; ++} ++ ++void PrintSgmlTag( Out *fout, uint mode, uint indent, Lexer *lexer, Node *node, ++ SgmlTagType sgmltag_type) ++{ static level = -1; ++ ++ if(node->tag == tag_html) { ++ if(sgmltag_type == SgmlTagStart) ++ PrintSgmlBodyStart(fout, indent); ++ else if(sgmltag_type == SgmlTagEnd) ++ PrintSgmlBodyEnd(fout, indent); ++ } ++ else if(node->tag == tag_head) ++ PrintSgmlTagString(fout, mode, indent, sgmltag_type,"<articleinfo>"); ++ else if(node->tag == tag_title) ++ PrintSgmlTagString(fout, mode, indent, sgmltag_type,"<title>"); ++ /* May be we can replace with node->model & CM_LIST */ ++ else if(node->tag == tag_ul || node->tag == tag_ol || ++ node->tag == tag_dl) { ++ if(sgmltag_type == SgmlTagStart) ++ PrintSgmlList(lexer, fout, mode, indent, node); ++ else if(sgmltag_type == SgmlTagEnd) ++ PrintSgmlListEnd(lexer, fout, mode, indent, node); ++ } ++ else if(node->tag == tag_dt) { ++ if(sgmltag_type == SgmlTagStart) ++ PPrintString(fout, indent, "<varlistentry><term>"); ++ else if(sgmltag_type == SgmlTagEnd) ++ PPrintString(fout, indent, "</term>"); ++ } ++ else if(node->tag == tag_li || node->tag == tag_dd) { ++ if(sgmltag_type == SgmlTagStart) ++ PrintSgmlListItem(fout, indent, node); ++ else if(sgmltag_type == SgmlTagEnd) ++ PrintSgmlListItemEnd(fout, indent, node); ++ } ++ /* Later we should clean this before coming to PrintSgml */ ++ else if(node->tag == tag_p && ++ /* Table <entry> processing */ ++ !DescendantOf(node, tag_th) && !DescendantOf(node, tag_td) && ++ !DescendantOfAddress(node)) ++ PrintSgmlTagString(fout, mode, indent, sgmltag_type, "<para>"); ++ else if(node->tag == tag_blockquote) ++ PrintSgmlTagString(fout, mode, indent, sgmltag_type, "<blockquote>"); ++ else if(node->tag == tag_pre && ++ /* Table <entry> processing */ ++ !DescendantOf(node, tag_th) && !DescendantOf(node, tag_td)) ++ PrintSgmlTagString(fout, mode, indent, sgmltag_type, ++ "<programlisting>"); ++ else if(node->tag == tag_a) { ++ if(sgmltag_type == SgmlTagStart) ++ PrintSgmlLink(fout, indent, node); ++ else if(sgmltag_type == SgmlTagEnd) ++ PrintSgmlLinkEnd(fout, indent, node); ++ } ++ /* Table would require more processing */ ++ else if(node->tag == tag_table) { ++ if(sgmltag_type == SgmlTagStart) ++ PrintSgmlTable(fout, indent, node); ++ else if(sgmltag_type == SgmlTagEnd) ++ PrintSgmlTableEnd(fout, indent, node); ++ } ++ else if(node->tag == tag_tr) ++ PrintSgmlTagString(fout, mode, indent, sgmltag_type, "<row>"); ++ else if(node->tag == tag_td || node->tag == tag_th) ++ PrintSgmlTagString(fout, mode, indent, sgmltag_type, "<entry>"); ++ else if(node->tag == tag_img) { /* This is a StartEndTag */ ++ if(sgmltag_type == SgmlTagStart) ++ PrintSgmlImage(fout, indent, node); ++ } ++ ++ else if(wstrcasecmp(node->element, "cite") == 0) ++ PrintSgmlTagString(fout, mode, indent, sgmltag_type, ++ "<citation>"); ++ /* We should distinguish tag_strong and tag_em later ++ haven't found proper docbook tag for <strong> */ ++ else if(node->tag == tag_em || node->tag == tag_strong || ++ wstrcasecmp(node->element, "address") == 0) { ++ if(sgmltag_type == SgmlTagStart) { ++ if(DescendantOf(node, tag_p) || DescendantOf(node, tag_pre)) ++ PPrintString(fout, indent, "<emphasis>"); ++ else ++ PPrintString(fout, indent, "<para><emphasis>"); ++ } ++ else if(sgmltag_type == SgmlTagEnd) { ++ if(DescendantOf(node, tag_p) || DescendantOf(node, tag_pre)) ++ PPrintString(fout, indent, "</emphasis>"); ++ else ++ PPrintString(fout, indent, "</emphasis></para>"); ++ } ++ } ++ else { ++ if(wstrcasecmp(node->element, "code") == 0 && ++ !(node->parent->tag == tag_dd || ++ node->parent->tag == tag_li)) ++ PrintSgmlTagString(fout, mode, indent, ++ sgmltag_type, "<literal>"); ++ else if(strlen(node->element) == 2 && ++ node->element[0] == 'h' && ++ IsDigit(node->element[1])) { ++ if(sgmltag_type == SgmlTagStart) { ++ int sectnum = DIGIT(node->element[1]) - 1; ++ char str[10]; ++ if(seen_h[sectnum] == no) ++ seen_h[sectnum] = yes; ++ else { ++ int i = level; ++ while(i > sectnum && seen_h[i] == yes) { ++ if(i == 5) ++ sprintf(str, "</simplesect>"); ++ else ++ sprintf(str, "</sect%d>", SECT(i) + 1); ++ PPrintString(fout, indent, str); ++ seen_h[i] = no; ++ --i; ++ } ++ if(sectnum == 5) ++ sprintf(str, "</simplesect>"); ++ else ++ sprintf(str, "</sect%d>", SECT(sectnum) + 1); ++ PPrintString(fout, indent, str); ++ } ++ /* H1 is not the first level ++ like the curses man2html pages */ ++ if(level == -1 && sectnum > 0) ++ startsect = sectnum; ++ ++ PrintSectTag(fout, indent, lexer, node, startsect); ++ level = sectnum; ++ } ++ else ++ PPrintString(fout, indent, "</title>"); ++ } ++ } ++} ++ ++void PrintSgml( Out *fout, uint mode, uint indent, ++ Lexer *lexer, Node *node) ++{ Node *content; ++ ++ if (node == null) ++ return; ++ ++ if (node->type == TextNode) { ++ if(DescendantOf(node, tag_dd) && !DescendantOf(node, tag_a) && ++ !DescendantOf(node, tag_p) && ++ /* We have to descide on this table stuff later ++ * <entry> processing is complex */ ++ !DescendantOf(node, tag_td) && !DescendantOf(node, tag_th)) ++ /* && wstrcasecmp(node->parent->element, "code") != 0) ++ above line may be needed later to properly convert <code> stuff */ ++ { ++ PPrintString(fout, indent, "<para>"); ++ PPrintText(fout, mode, indent, lexer, node->start, node->end); ++ PPrintString(fout, indent, "</para>"); ++ } ++ else { ++ if(DescendantOf(node, tag_style)) ++ fprintf(stderr, "PrintSgml: skipping style elements\n\n"); ++ else ++ PPrintText(fout, mode, indent, lexer, node->start, node->end); ++ } ++ } ++ else if(node->type == CDATATag && EscapeCdata) ++ PPrintText(fout, mode, indent, lexer, node->start, node->end); ++ else if (node->type == CommentTag) ++ PPrintComment(fout, indent, lexer, node); ++ else if (node->type == RootNode) ++ { ++ for (content = node->content; ++ content != null; ++ content = content->next) ++ PrintSgml(fout, mode, indent, lexer, content); ++ } ++ else if (node->type == DocTypeTag) ++ PPrintDocType(fout, indent, lexer, node); ++ else if (node->type == CDATATag) ++ PPrintCDATA(fout, indent, lexer, node); ++ else if (node->type == SectionTag) ++ PPrintSection(fout, indent, lexer, node); ++ else if (node->type == AspTag || ++ node->type == JsteTag || ++ node->type == PhpTag ) ++ PrintSgmlDefault(fout); ++ else if (node->type == ProcInsTag) ++ PPrintPI(fout, indent, lexer, node); ++ else if (node->type == XmlDecl)// && DbXml May be this is needed ++ PPrintXmlDecl(fout, indent, lexer, node); ++ else if (node->tag->model & CM_EMPTY || ++ (node->type == StartEndTag && !xHTML)) ++ { ++ if (!(node->tag->model & CM_INLINE)) ++ PCondFlushLine(fout, indent); ++ ++ if (MakeClean && node->tag == tag_wbr) ++ PPrintString(fout, indent, " "); ++ else ++ PrintSgmlTag(fout, mode, indent, lexer, node, SgmlTagStart); ++ } ++ else { ++ if (node->type == StartEndTag) ++ node->type = StartTag; ++ ++ if (node->tag && node->tag->parser == ParsePre) ++ { ++ PCondFlushLine(fout, indent); ++ ++ indent = 0; ++ PCondFlushLine(fout, indent); ++ ++ PrintSgmlTag(fout, mode, indent, lexer, node, SgmlTagStart); ++ PFlushLine(fout, indent); ++ ++ for (content = node->content; ++ content != null; ++ content = content->next) ++ PrintSgml(fout, (mode | PREFORMATTED | NOWRAP), ++ indent, lexer, content); ++ ++ PCondFlushLine(fout, indent); ++ PrintSgmlTag(fout, mode, indent, lexer, node, SgmlTagEnd); ++ PFlushLine(fout, indent); ++ ++ if (IndentContent == no && node->next != null) ++ PFlushLine(fout, indent); ++ } ++ else if (node->tag->model & CM_INLINE) ++ { PrintSgmlTag(fout, mode, indent, lexer, node, SgmlTagStart); ++ ++ if (ShouldIndent(node)) ++ { ++ PCondFlushLine(fout, indent); ++ indent += spaces; ++ ++ for (content = node->content; ++ content != null; ++ content = content->next) ++ PrintSgml(fout, mode, indent, lexer, content); ++ ++ PCondFlushLine(fout, indent); ++ indent -= spaces; ++ PCondFlushLine(fout, indent); ++ } ++ else ++ { ++ ++ for (content = node->content; ++ content != null; ++ content = content->next) ++ PrintSgml(fout, mode, indent, lexer, content); ++ } ++ ++ PrintSgmlTag(fout, mode, indent, lexer, node, SgmlTagEnd); ++ } ++ else ++ { PCondFlushLine(fout, indent); ++ if (SmartIndent && node->prev != null) ++ PFlushLine(fout, indent); ++ ++ PrintSgmlTag(fout, mode ,indent, lexer, node, SgmlTagStart); ++ if (ShouldIndent(node)) ++ PCondFlushLine(fout, indent); ++ else if (node->tag->model & CM_HTML || ++ node->tag == tag_noframes || ++ (node->tag->model & CM_HEAD && !(node->tag == tag_title))) ++ PFlushLine(fout, indent); ++ ++ if (ShouldIndent(node)) ++ { PCondFlushLine(fout, indent); ++ indent += spaces; ++ ++ for (content = node->content; ++ content != null; ++ content = content->next) ++ PrintSgml(fout, mode, indent, lexer, content); ++ PCondFlushLine(fout, indent); ++ indent -= spaces; ++ PCondFlushLine(fout, indent); ++ } ++ else ++ { Node *last; ++ last = null; ++ for (content = node->content; ++ content != null; ++ content = content->next) { ++ /* kludge for naked text before block level tag */ ++ if (last && !IndentContent && last->type == TextNode && ++ content->tag && !(content->tag->model & CM_INLINE) ) ++ { ++ /* PFlushLine(fout, indent); */ ++ PFlushLine(fout, indent); ++ } ++ ++ PrintSgml(fout, mode, ++ (ShouldIndent(node) ? indent+spaces : indent), ++ lexer, content); ++ last = content; ++ } ++ } ++ PrintSgmlTag(fout, mode, indent, lexer, node, SgmlTagEnd); ++ PFlushLine(fout, indent); ++ if (IndentContent == no && ++ node->next != null && ++ HideEndTags == no && ++ (node->tag->model & (CM_BLOCK|CM_LIST|CM_DEFLIST|CM_TABLE))) ++ PFlushLine(fout, indent); ++ } ++ } ++} ++ + void PPrintTree(Out *fout, uint mode, uint indent, + Lexer *lexer, Node *node) + { +@@ -2034,17 +2669,14 @@ + PPrintJste(fout, indent, lexer, node); + else if (node->type == PhpTag) + PPrintPhp(fout, indent, lexer, node); +- else if ( node->tag->model & CM_EMPTY +- || (node->type == StartEndTag && !xHTML) ) ++ else if (node->tag->model & CM_EMPTY || (node->type == StartEndTag && !xHTML)) + { + PCondFlushLine(fout, indent); + PPrintTag(lexer, fout, mode, indent, node); + PFlushLine(fout, indent); + +- /* CPR: folks don't want so much vertical spacing in XML + if (node->next) + PFlushLine(fout, indent); +- */ + } + else /* some kind of container element */ + { +@@ -2076,7 +2708,7 @@ + + PPrintTag(lexer, fout, mode, indent, node); + +- if ( !mixed && node->content ) ++ if (!mixed) + PFlushLine(fout, indent); + + for (content = node->content; +@@ -2084,16 +2716,14 @@ + content = content->next) + PPrintXMLTree(fout, mode, cindent, lexer, content); + +- if ( !mixed && node->content ) ++ if (!mixed) + PCondFlushLine(fout, cindent); + + PPrintEndTag(fout, mode, indent, node); + PCondFlushLine(fout, indent); + +- /* CPR: folks don't want so much vertical spacing in XML + if (node->next) + PFlushLine(fout, indent); +- */ + } + } + +--- /cise/tmp/ppadala/tidy/src/tab2space.c Wed Feb 6 04:09:37 2002 ++++ src/tab2space.c Sat Jul 6 23:50:55 2002 +@@ -2,7 +2,7 @@ + #include <stdlib.h> + #include <string.h> + +-#ifndef __BEOS__ ++#if !(defined(__BEOS__) || defined(linux)) + typedef unsigned int uint; + #endif + typedef unsigned char byte; +--- /cise/tmp/ppadala/tidy/src/tidy.c Sun Jul 7 23:29:25 2002 ++++ src/tidy.c Fri Jul 19 01:22:54 2002 +@@ -1853,6 +1853,10 @@ + IndentContent = yes; + SmartIndent = yes; + } ++ else if (wstrcasecmp(arg, "dbsgml") == 0) ++ DbSgml = yes; ++ else if(wstrcasecmp(arg, "dbxml") == 0) ++ DbXml = yes; + else if (wstrcasecmp(arg, "omit") == 0) + HideEndTags = yes; + else if (wstrcasecmp(arg, "upper") == 0) +@@ -2180,6 +2184,28 @@ + else + { + lexer->warnings = 0; ++ ++ if (DbSgml || DbXml) { ++ char *str; ++ ++ if(DbSgml) ++ str = "article PUBLIC \"-//OASIS//DTD DocBook V4.1//EN\""; ++ else ++ str = "article PUBLIC \"-//OASIS//DTD DocBk XML V4.1.2 //EN\""; ++ ++ EncloseBodyText = yes; /* We want those <p>s */ ++ EncloseBlockText = yes; ++ LogicalEmphasis = yes; ++ DropFontTags = yes; /* <font> .. </font> are not needed */ ++ ++ /* May be this should be decided by user */ ++ QuoteMarks = yes; ++ ++ doctype_mode = doctype_user; ++ /* TidyDeInit does MemFree(doctype_str) if it's != NULL */ ++ doctype_str = MemAlloc(wstrlen(str)); ++ wstrcpy(doctype_str, str); ++ } + + document = ParseDocument(lexer); + +@@ -2226,6 +2252,10 @@ + { + if (xHTML) + SetXHTMLDocType(lexer, document); ++ else if(DbSgml) ++ SetSgmlDocType(lexer, document); ++ else if(DbXml) ++ SetSgmlDocType(lexer, document); + else + FixDocType(lexer, document); + +@@ -2247,7 +2277,7 @@ + } + + /* ensure presence of initial <?XML version="1.0"?> */ +- if (XmlOut && XmlPi) ++ if ((XmlOut && XmlPi) || DbXml) + FixXmlDecl(lexer, document); + + /* +@@ -2381,9 +2411,12 @@ + /* Feature request #434940 - fix by Dave Raggett/Ignacio Vazquez-Abrams 21 Jun 01 */ + else if (BodyOnly) + PrintBody(&out, lexer, document); +- else +- PPrintTree(&out, null, 0, lexer, document); +- ++ else { ++ if(DbSgml || DbXml) ++ PrintSgml(&out, null, 0, lexer, document); ++ else ++ PPrintTree(&out, null, 0, lexer, document); ++ } + PFlushLine(&out, 0); + } + diff --git a/app-text/htmltidy/htmltidy-2.7.18-r1.ebuild b/app-text/htmltidy/htmltidy-2.7.18-r1.ebuild new file mode 100644 index 000000000000..e1b76f8ed50a --- /dev/null +++ b/app-text/htmltidy/htmltidy-2.7.18-r1.ebuild @@ -0,0 +1,46 @@ +# Copyright 1999-2002 Gentoo Technologies, Inc. +# Distributed under the terms of the GNU General Public License v2 +# $Header: /var/cvsroot/gentoo-x86/app-text/htmltidy/htmltidy-2.7.18-r1.ebuild,v 1.1 2003/01/03 05:43:19 satai Exp $ /home/cvsroot/gentoo-x86/app-text/htmltidy/htmltidy-2.7.18.ebuild, v 1.5 2002/07/25 11:07:00 cybersystem Exp $ + +# convert from normalized gentoo version number to htmltidy's wacky date thing + +SLOT="0" +LICENSE="GPL-2" +KEYWORDS="x86 ppc sparc" + +parts=(${PV//./ }) +vers=$(printf "%02d%02d%02d" ${parts[0]} ${parts[1]} ${parts[2]}) +MY_P=tidy_src_${vers} +S=${WORKDIR}/tidy + +DESCRIPTION="fix mistakes and tidy up sloppy editing in HTML and XML" +SRC_URI="http://tidy.sourceforge.net/src/${MY_P}.tgz" +HOMEPAGE="http://tidy.sourceforge.net/" +DEPEND="" + +src_unpack() { + unpack ${MY_P}.tgz + cd ${S} + ( use xml || use sgml ) && + patch -p0 < ${FILESDIR}/htmltidy-dbpatch.diff + + # skip "chown" + t=Makefile + cp $t $t.orig + sed 's:chgrp\|chown:#&:' $t.orig > $t +} + +src_compile() { + emake \ + OTHERCFLAGS="${CFLAGS}" \ + || die "emake failed" +} + +src_install() { + dodir /usr/bin + dodir /usr/share/man/man1 + make \ + INSTALLDIR="${D}/usr/" \ + MANPAGESDIR="${D}/usr/share/man" \ + install || die +} |