initial source import

commit: 897bc2556fed43b76f6d1b14470c3e806df15af8 [log] [tgz]
author: Jean-Paul Calderone <exarkun@boson> Mon Feb 18 20:50:23 2008 -0500
committer: Jean-Paul Calderone <exarkun@boson> Mon Feb 18 20:50:23 2008 -0500
tree: bdd09f33c3fadb7efecca918e06e3dbef0a82bb9
diff --git a/doc/tools/sgmlconv/Makefile b/doc/tools/sgmlconv/Makefile
new file mode 100644
index 0000000..30a846e
--- /dev/null
+++ b/doc/tools/sgmlconv/Makefile

@@ -0,0 +1,67 @@
+# Simple makefile to control XML generation for the entire document tree.
+# This should be used from the top-level directory (Doc/), not the directory
+# that actually contains this file:
+#
+#  $ pwd
+#  .../Doc
+#  $ make -f tools/sgmlconv/Makefile
+
+TOPDIR=.
+TOOLSDIR=tools
+
+SGMLRULES=../$(TOOLSDIR)/sgmlconv/make.rules
+# The 'inst' directory breaks the conversion, so skip it for now.
+SUBDIRS=api dist ext lib mac ref tut
+SUBMAKE=$(MAKE) -f $(SGMLRULES) TOOLSDIR=../$(TOOLSDIR)
+
+all:	xml
+
+.PHONY: esis xml
+.PHONY: $(SUBDIRS)
+
+xml:
+	for DIR in $(SUBDIRS) ; do \
+	    (cd $$DIR; $(SUBMAKE) xml) || exit $$? ; done
+
+esis:
+	for DIR in $(SUBDIRS) ; do \
+	    (cd $$DIR; $(SUBMAKE) esis) || exit $$? ; done
+
+esis1:
+	for DIR in $(SUBDIRS) ; do \
+	    (cd $$DIR; $(SUBMAKE) esis1) || exit $$? ; done
+
+tarball:  xml
+	tar cf - tools/sgmlconv */*.xml | gzip -9 >xml-1.5.2b2.tgz
+
+api:
+	cd api; $(SUBMAKE)
+
+dist:
+	cd dist; $(SUBMAKE)
+
+ext:
+	cd ext; $(SUBMAKE)
+
+inst:
+	cd inst; $(SUBMAKE)
+
+lib:
+	cd lib; $(SUBMAKE)
+
+mac:
+	cd mac; $(SUBMAKE)
+
+ref:
+	cd ref; $(SUBMAKE)
+
+tut:
+	cd tut; $(SUBMAKE)
+
+clean:
+	for DIR in $(SUBDIRS) ; do \
+	    (cd $$DIR; $(SUBMAKE) clean) ; done
+
+clobber:
+	for DIR in $(SUBDIRS) ; do \
+	    (cd $$DIR; $(SUBMAKE) clobber) ; done

diff --git a/doc/tools/sgmlconv/README b/doc/tools/sgmlconv/README
new file mode 100644
index 0000000..1546293
--- /dev/null
+++ b/doc/tools/sgmlconv/README

@@ -0,0 +1,58 @@
+These scripts and Makefile fragment are used to convert the Python
+documentation in LaTeX format to XML.
+
+This material is preliminary and incomplete.  Python 2.0 is required.
+
+To convert all documents to XML:
+
+	cd Doc/
+	make -f tools/sgmlconv/Makefile
+
+To convert one document to XML:
+
+	cd Doc/<document-dir>
+	make -f ../tools/sgmlconv/make.rules TOOLSDIR=../tools
+
+Please send comments and bug reports to python-docs@python.org.
+
+
+What do the tools do?
+---------------------
+
+latex2esis.py
+    Reads in a conversion specification written in XML
+    (conversion.xml), reads a LaTeX document fragment, and interprets
+    the markup according to the specification.  The output is a stream
+    of ESIS events like those created by the nsgmls SGML parser, but
+    is *not* guaranteed to represent a single tree!  This is done to
+    allow conversion per entity rather than per document.  Since many
+    of the LaTeX files for the Python documentation contain two
+    sections on closely related modules, it is important to allow both
+    of the resulting <section> elements to exist in the same output
+    stream.  Additionally, since comments are not supported in ESIS,
+    comments are converted to <COMMENT> elements, which might exist at
+    the same level as the top-level content elements.
+
+    The output of latex2esis.py gets saved as <filename>.esis1.
+
+docfixer.py
+    This is the really painful part of the conversion.  Well, it's the 
+    second really painful part, but more of the pain is specific to
+    the structure of the Python documentation and desired output
+    rather than to the parsing of LaTeX markup.
+
+    This script loads the ESIS data created by latex2esis.py into a
+    DOM document *fragment* (remember, the latex2esis.py output may
+    not be well-formed).  Once loaded, it walks over the tree many
+    times looking for a variety of possible specific
+    micro-conversions.  Most of the code is not in any way "general".
+    After processing the fragment, a new ESIS data stream is written
+    out.  Like the input, it may not represent a well-formed
+    document, but does represent a parsed entity.
+
+    The output of docfixer.py is what gets saved in <filename>.esis.
+
+esis2sgml.py
+    Reads an ESIS stream and convert to SGML or XML.  This also
+    converts <COMMENT> elements to real comments.  This works quickly
+    because there's not much to actually do.

diff --git a/doc/tools/sgmlconv/conversion.xml b/doc/tools/sgmlconv/conversion.xml
new file mode 100644
index 0000000..7759bad
--- /dev/null
+++ b/doc/tools/sgmlconv/conversion.xml

@@ -0,0 +1,757 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<conversion>
+  <!-- Miscellaneous. -->
+  <macro name="declaremodule">
+    <attribute name="id" optional="yes"/>
+    <attribute name="type"/>
+    <attribute name="name"/>
+    </macro>
+  <macro name="modulesynopsis">
+    <content/>
+    </macro>
+  <macro name="platform">
+    <content/>
+    </macro>
+  <macro name="deprecated">
+    <attribute name="version"/>
+    <content/>
+    </macro>
+  <macro name="label">
+    <attribute name="id"/>
+    </macro>
+  <macro name="nodename" outputname="label">
+    <attribute name="id"/>
+    </macro>
+  <macro name="localmoduletable"/>
+  <macro name="manpage">
+    <attribute name="name"/>
+    <attribute name="section"/>
+    </macro>
+  <macro name="module">
+    <content/>
+    </macro>
+  <macro name="moduleauthor">
+    <attribute name="name"/>
+    <attribute name="email"/>
+    </macro>
+  <macro name="citetitle">
+    <attribute name="href" optional="yes"/>
+    <content/>
+    </macro>
+  <macro name="rfc">
+    <attribute name="num"/>
+    </macro>
+  <macro name="sectionauthor" outputname="author">
+    <attribute name="name"/>
+    <attribute name="email"/>
+    </macro>
+  <macro name="author">
+    <attribute name="name"/>
+    </macro>
+  <macro name="authoraddress">
+    <content/>
+    </macro>
+  <macro name="shortversion"/>
+  <macro name="versionadded">
+    <attribute name="version"/>
+    </macro>
+  <!-- This is broken:  we need to re-order the optional and required
+       parameters, making the optional parameter the content for the
+       element.  The processor is not powerful enough to handle this.
+    -->
+  <macro name="versionchanged">
+    <attribute name="how" optional="yes"/>
+    <attribute name="version"/>
+    </macro>
+
+  <!-- Module referencing. -->
+  <macro name="refmodule" outputname="module">
+    <attribute name="" optional="yes"/>
+    <attribute name="link">yes</attribute>
+    <content/>
+    </macro>
+
+  <!-- Information units. -->
+  <!-- C things. -->
+  <environment name="cfuncdesc">
+    <attribute name="type"/>
+    <attribute name="name"/>
+    <child name="args"/>
+    </environment>
+  <environment name="ctypedesc">
+    <attribute name="tag" optional="yes"/>
+    <attribute name="name"/>
+    </environment>
+  <environment name="cvardesc">
+    <attribute name="type"/>
+    <attribute name="name"/>
+    </environment>
+
+  <!-- Python things. -->
+  <macro name="optional">
+    <content/>
+    </macro>
+  <macro name="unspecified"/>
+  <macro name="moreargs"/>
+  <environment name="classdesc">
+    <attribute name="name"/>
+    <child name="args"/>
+    </environment>
+  <environment name="datadesc">
+    <attribute name="name"/>
+    </environment>
+  <macro name="dataline">
+    <attribute name="name"/>
+    </macro>
+  <environment name="excdesc">
+    <attribute name="name"/>
+    </environment>
+
+  <environment name="funcdesc">
+    <attribute name="name"/>
+    <child name="args"/>
+    </environment>
+  <macro name="funcline">
+    <attribute name="name"/>
+    <child name="args"/>
+    </macro>
+  <environment name="funcdescni" outputname="funcdesc">
+    <attribute name="index">no</attribute>
+    <attribute name="name"/>
+    <child name="args"/>
+    </environment>
+  <macro name="funclineni" outputname="funcline">
+    <attribute name="index">no</attribute>
+    <attribute name="name"/>
+    <child name="args"/>
+    </macro>
+
+  <environment name="memberdesc">
+    <attribute name="class" optional="yes"/>
+    <attribute name="name"/>
+    </environment>
+  <environment name="memberdescni" outputname="memberdesc">
+    <attribute name="index">no</attribute>
+    <attribute name="class" optional="yes"/>
+    <attribute name="name"/>
+    </environment>
+
+  <environment name="methoddesc">
+    <attribute name="class" optional="yes"/>
+    <attribute name="name"/>
+    <child name="args"/>
+    </environment>
+  <macro name="methodline">
+    <attribute name="class" optional="yes"/>
+    <attribute name="name"/>
+    <child name="args"/>
+    </macro>
+  <environment name="methoddescni">
+    <attribute name="index">no</attribute>
+    <attribute name="class" optional="yes"/>
+    <attribute name="name"/>
+    <child name="args"/>
+    </environment>
+  <macro name="methodlineni" outputname="methodline">
+    <attribute name="index">no</attribute>
+    <attribute name="class" optional="yes"/>
+    <attribute name="name"/>
+    <child name="args"/>
+    </macro>
+
+  <environment name="opcodedesc">
+    <attribute name="name"/>
+    <attribute name="var"/>
+    </environment>
+
+  <!-- "See also:" sections. -->
+  <macro name="seemodule">
+    <attribute name="ref" optional="yes"/>
+    <attribute name="name"/>
+    <child name="description"/>
+    </macro>
+  <macro name="seepep">
+    <attribute name="number"/>
+    <child name="title"/>
+    <child name="description"/>
+    </macro>
+  <macro name="seerfc">
+    <attribute name="number"/>
+    <child name="title"/>
+    <child name="description"/>
+    </macro>
+  <macro name="seetext">
+    <child name="description"/>
+    </macro>
+  <macro name="seetitle">
+    <attribute name="href" optional="yes"/>
+    <child name="title"/>
+    <child name="description"/>
+    </macro>
+  <macro name="seeurl">
+    <attribute name="href"/>
+    <child name="description"/>
+    </macro>
+
+  <!-- Index-generating markup. -->
+  <macro name="index" outputname="indexterm">
+    <attribute name="term1"/>
+    </macro>
+  <macro name="indexii" outputname="indexterm">
+    <attribute name="term1"/>
+    <attribute name="term2"/>
+    </macro>
+  <macro name="indexiii" outputname="indexterm">
+    <attribute name="term1"/>
+    <attribute name="term2"/>
+    <attribute name="term3"/>
+    </macro>
+  <macro name="indexiv" outputname="indexterm">
+    <attribute name="term1"/>
+    <attribute name="term2"/>
+    <attribute name="term3"/>
+    <attribute name="term4"/>
+    </macro>
+
+  <macro name="ttindex" outputname="indexterm">
+    <attribute name="style">tt</attribute>
+    <attribute name="term1"/>
+    </macro>
+
+  <macro name="refmodindex">
+    <attribute name="module"/>
+    </macro>
+  <macro name="stmodindex">
+    <attribute name="module"/>
+    </macro>
+  <macro name="refbimodindex" outputname="refmodindex">
+    <attribute name="module"/>
+    </macro>
+  <macro name="refexmodindex" outputname="refmodindex">
+    <attribute name="module"/>
+    </macro>
+  <macro name="refstmodindex" outputname="refmodindex">
+    <attribute name="module"/>
+    </macro>
+
+  <macro name="bifuncindex">
+    <attribute name="name"/>
+    </macro>
+  <macro name="exindex">
+    <attribute name="name"/>
+    </macro>
+  <macro name="obindex">
+    <attribute name="name"/>
+    </macro>
+  <macro name="kwindex">
+    <attribute name="name"/>
+    </macro>
+  <macro name="opindex">
+    <attribute name="type"/>
+    </macro>
+  <macro name="stindex">
+    <attribute name="type"/>
+    </macro>
+  <macro name="withsubitem">
+    <attribute name="text"/>
+    <content/>
+    </macro>
+  <macro name="setindexsubitem">
+    <attribute name="text"/>
+    </macro>
+
+  <!-- Entity management. -->
+  <macro name="include">
+    <attribute name="source"/>
+    </macro>
+  <macro name="input">
+    <attribute name="source"/>
+    </macro>
+
+  <!-- Large-scale document structure. -->
+  <macro name="documentclass">
+    <attribute name="classname"/>
+    </macro>
+
+  <macro name="usepackage">
+    <attribute name="options" optional="yes"/>
+    <attribute name="pkg"/>
+    </macro>
+
+  <environment name="document"
+               endcloses="chapter chapter* section section*
+                          subsection subsection*
+                          subsubsection subsubsection*
+                          paragraph paragraph* subparagraph subparagraph*"/>
+
+  <macro name="chapter"
+         closes="chapter chapter* section section* subsection subsection*
+                 subsubsection subsubsection*
+                 paragraph paragraph* subparagraph subparagraph*">
+    <text>
+</text>
+    <child name="title"/>
+    <content implied="yes"/>
+    </macro>
+  <macro name="chapter*" outputname="chapter"
+         closes="chapter chapter* section section* subsection subsection*
+                 subsubsection subsubsection*
+                 paragraph paragraph* subparagraph subparagraph*">
+    <attribute name="numbered">no</attribute>
+    <text>
+</text>
+    <child name="title"/>
+    <content implied="yes"/>
+    </macro>
+
+  <macro name="section"
+         closes="section section* subsection subsection*
+                 subsubsection subsubsection*
+                 paragraph paragraph* subparagraph subparagraph*">
+    <text>
+</text>
+    <child name="title"/>
+    <content implied="yes"/>
+    </macro>
+  <macro name="section*" outputname="section"
+         closes="section section* subsection subsection*
+                 subsubsection subsubsection*
+                 paragraph paragraph* subparagraph subparagraph*">
+    <attribute name="numbered">no</attribute>
+    <text>
+</text>
+    <child name="title"/>
+    <content implied="yes"/>
+    </macro>
+
+  <macro name="subsection"
+         closes="subsection subsection* subsubsection subsubsection*
+                 paragraph paragraph* subparagraph subparagraph*">
+    <text>
+</text>
+    <child name="title"/>
+    <content implied="yes"/>
+    </macro>
+  <macro name="subsection*" outputname="subsection"
+         closes="subsection subsection* subsubsection subsubsection*
+                 paragraph paragraph* subparagraph subparagraph*">
+    <attribute name="numbered">no</attribute>
+    <text>
+</text>
+    <child name="title"/>
+    <content implied="yes"/>
+    </macro>
+
+  <macro name="subsubsection"
+         closes="subsubsection subsubsection*
+                 paragraph paragraph* subparagraph subparagraph*">
+    <text>
+</text>
+    <child name="title"/>
+    <content implied="yes"/>
+    </macro>
+  <macro name="subsubsection*" outputname="subsubsection"
+         closes="subsubsection subsubsection*
+                 paragraph paragraph* subparagraph subparagraph*">
+    <attribute name="numbered">no</attribute>
+    <text>
+</text>
+    <child name="title"/>
+    <content implied="yes"/>
+    </macro>
+
+  <macro name="paragraph"
+         closes="paragraph paragraph* subparagraph subparagraph*">
+    <text>
+</text>
+    <child name="title"/>
+    <content implied="yes"/>
+    </macro>
+  <macro name="paragraph*" outputname="paragraph"
+         closes="paragraph paragraph* subparagraph subparagraph*">
+    <attribute name="numbered">no</attribute>
+    <text>
+</text>
+    <child name="title"/>
+    <content implied="yes"/>
+    </macro>
+
+  <macro name="subparagraph"
+         closes="subparagraph subparagraph*">
+    <text>
+</text>
+    <child name="title"/>
+    <content implied="yes"/>
+    </macro>
+  <macro name="subparagraph*" outputname="subparagraph"
+         closes="subparagraph subparagraph*">
+    <attribute name="numbered">no</attribute>
+    <text>
+</text>
+    <child name="title"/>
+    <content implied="yes"/>
+    </macro>
+  <macro name="title">
+    <content/>
+    </macro>
+
+  <macro name="appendix" outputname="back-matter"
+         closes="chapter chapter* section subsection subsubsection
+                 paragraph subparagraph"/>
+
+  <environment name="list"
+               endcloses="item">
+    <attribute name="bullet"/>
+    <attribute name="init"/>
+    </environment>
+  <macro name="item" closes="item">
+    <child name="leader" optional="yes"/>
+    <content implied="yes"/>
+    </macro>
+
+  <macro name="ref">
+    <attribute name="ref"/>
+    </macro>
+
+  <environment name="description" outputname="descriptionlist"
+               endcloses="item"/>
+
+  <environment name="enumerate" outputname="enumeration"
+               endcloses="item"/>
+
+  <environment name="fulllineitems"
+               endcloses="item"/>
+
+  <environment name="itemize"
+               endcloses="item"/>
+
+  <environment name="definitions" outputname="definitionlist"
+               encloses="term"/>
+  <macro name="term" closes="definition">
+    <!-- not really optional, but uses the [] syntax -->
+    <child name="term" optional="yes"/>
+    <child name="definition" implied="yes"/>
+    </macro>
+
+  <environment name="alltt" outputname="verbatim"/>
+  <environment name="comment" verbatim="yes"/>
+  <environment name="verbatim" verbatim="yes"/>
+  <environment name="verbatim*" verbatim="yes">
+    <!-- not used anywhere, but it's a standard LaTeXism -->
+    <attribute name="spaces">visible</attribute>
+    </environment>
+
+  <!-- Table markup. -->
+  <macro name="hline"/>
+  <environment name="tableii" outputname="table">
+    <attribute name="cols">2</attribute>
+    <attribute name="colspec"/>
+    <attribute name="style"/>
+    <child name="entry"/>
+    <text>
+         </text>
+    <child name="entry"/>
+    </environment>
+  <environment name="longtableii" outputname="table">
+    <attribute name="cols">2</attribute>
+    <attribute name="colspec"/>
+    <attribute name="style"/>
+    <child name="entry"/>
+    <text>
+         </text>
+    <child name="entry"/>
+    </environment>
+  <macro name="lineii" outputname="row">
+    <child name="entry"/>
+    <text>
+         </text>
+    <child name="entry"/>
+    </macro>
+
+  <environment name="tableiii" outputname="table">
+    <attribute name="cols">3</attribute>
+    <attribute name="colspec"/>
+    <attribute name="style"/>
+    <child name="entry"/>
+    <text>
+         </text>
+    <child name="entry"/>
+    <text>
+         </text>
+    <child name="entry"/>
+    </environment>
+  <environment name="longtableiii" outputname="table">
+    <attribute name="cols">3</attribute>
+    <attribute name="colspec"/>
+    <attribute name="style"/>
+    <child name="entry"/>
+    <text>
+         </text>
+    <child name="entry"/>
+    <text>
+         </text>
+    <child name="entry"/>
+    </environment>
+  <macro name="lineiii" outputname="row">
+    <child name="entry"/>
+    <text>
+         </text>
+    <child name="entry"/>
+    <text>
+         </text>
+    <child name="entry"/>
+    </macro>
+
+  <environment name="tableiv" outputname="table">
+    <attribute name="cols">4</attribute>
+    <attribute name="colspec"/>
+    <attribute name="style"/>
+    <child name="entry"/>
+    <text>
+         </text>
+    <child name="entry"/>
+    <text>
+         </text>
+    <child name="entry"/>
+    <text>
+         </text>
+    <child name="entry"/>
+    </environment>
+  <environment name="longtableiv" outputname="table">
+    <attribute name="cols">4</attribute>
+    <attribute name="colspec"/>
+    <attribute name="style"/>
+    <child name="entry"/>
+    <text>
+         </text>
+    <child name="entry"/>
+    <text>
+         </text>
+    <child name="entry"/>
+    <text>
+         </text>
+    <child name="entry"/>
+    </environment>
+  <macro name="lineiv" outputname="row">
+    <child name="entry"/>
+    <text>
+         </text>
+    <child name="entry"/>
+    <text>
+         </text>
+    <child name="entry"/>
+    <text>
+         </text>
+    <child name="entry"/>
+    </macro>
+
+  <!-- These are handled at a later translation stage, at least for now. -->
+  <macro name="Cpp" outputname="">
+    <text>C++</text>
+    </macro>
+  <macro name="geq" outputname="">
+    <entityref name="geq"/>
+    </macro>
+  <macro name="LaTeX" outputname="">
+    <text>LaTeX</text>
+    </macro>
+  <macro name="ldots" outputname="">
+    <text>...</text>
+    </macro>
+  <macro name="leq" outputname="">
+    <entityref name="leq"/>
+    </macro>
+  <macro name="TeX" outputname="">
+    <text>TeX</text>
+    </macro>
+  <macro name="version"/>
+
+  <!-- Distutils things. -->
+  <macro name="command">
+    <content/>
+    </macro>
+  <macro name="option">
+    <content/>
+    </macro>
+  <macro name="filevar" outputname="var">
+    <content/>
+    </macro>
+  <macro name="XXX" outputname="editorial-comment">
+    <content/>
+    </macro>
+
+  <!-- Misc. -->
+  <macro name="emph">
+    <content/>
+    </macro>
+  <macro name="strong">
+    <content/>
+    </macro>
+  <macro name="textrm">
+    <content/>
+    </macro>
+  <macro name="texttt">
+    <content/>
+    </macro>
+  <macro name="code">
+    <content/>
+    </macro>
+  <macro name="exception">
+    <content/>
+    </macro>
+  <macro name="keyword">
+    <content/>
+    </macro>
+  <macro name="samp">
+    <content/>
+    </macro>
+  <macro name="class">
+    <content/>
+    </macro>
+  <macro name="cdata">
+    <content/>
+    </macro>
+  <macro name="cfunction">
+    <content/>
+    </macro>
+  <macro name="ctype">
+    <content/>
+    </macro>
+  <macro name="pytype">
+    <content/>
+    </macro>
+  <macro name="character">
+    <content/>
+    </macro>
+  <macro name="constant">
+    <content/>
+    </macro>
+  <macro name="envvar" outputname="envar">
+    <content/>
+    </macro>
+  <macro name="file" outputname="filename">
+    <content/>
+    </macro>
+  <macro name="filenq" outputname="filename">
+    <attribute name="quote">no</attribute>
+    <content/>
+    </macro>
+  <macro name="function">
+    <content/>
+    </macro>
+  <macro name="kbd">
+    <content/>
+    </macro>
+  <macro name="makevar">
+    <content/>
+    </macro>
+  <macro name="method">
+    <content/>
+    </macro>
+  <macro name="member">
+    <content/>
+    </macro>
+  <macro name="mimetype">
+    <content/>
+    </macro>
+  <macro name="newsgroup">
+    <content/>
+    </macro>
+  <macro name="program" outputname="command">
+    <content/>
+    </macro>
+  <macro name="programopt" outputname="option">
+    <content/>
+    </macro>
+  <macro name="longprogramopt" outputname="longoption">
+    <content/>
+    </macro>
+  <macro name="regexp">
+    <content/>
+    </macro>
+  <macro name="var">
+    <content/>
+    </macro>
+  <macro name="email">
+    <content/>
+    </macro>
+  <macro name="url">
+    <content/>
+    </macro>
+  <macro name="footnote">
+    <content/>
+    </macro>
+  <macro name="dfn" outputname="definedterm">
+    <content/>
+    </macro>
+
+  <macro name="mbox">
+    <content/>
+    </macro>
+
+  <!-- minimal math stuff to get by -->
+  <macro name="pi"/>
+  <macro name="sqrt">
+    <content/>
+    </macro>
+  <macro name="frac" outputname="fraction">
+    <child name="numerator"/>
+    <child name="denominator"/>
+    </macro>
+  <macro name="sum">
+    <content/>
+    </macro>
+
+  <!-- Conversions to text; perhaps could be different?  There's -->
+  <!-- no way for a style sheet to work with these this way.	 -->
+  <macro name="ABC" outputname="">
+    <text>ABC</text>
+    </macro>
+  <macro name="ASCII" outputname="">
+    <text>ASCII</text>
+    </macro>
+  <macro name="C" outputname="">
+    <text>C</text>
+    </macro>
+  <macro name="EOF" outputname="">
+    <text>EOF</text>
+    </macro>
+  <macro name="e" outputname="">
+    <text>\</text>
+    </macro>
+  <macro name="NULL" outputname="constant">
+    <text>NULL</text>
+    </macro>
+  <macro name="POSIX" outputname="">
+    <text>POSIX</text>
+    </macro>
+  <macro name="UNIX" outputname="">
+    <text>Unix</text>
+    </macro>
+  <macro name="textasciitilde" outputname="">
+    <text>~</text>
+    </macro>
+
+  <!-- These will end up disappearing as well! -->
+  <macro name="catcode" outputname=""/>
+  <macro name="fi" outputname=""/>
+  <macro name="ifhtml" outputname=""/>
+  <macro name="indexname" outputname=""/>
+  <macro name="labelwidth" outputname=""/>
+  <macro name="large" outputname=""/>
+  <macro name="leftmargin" outputname=""/>
+  <macro name="makeindex" outputname=""/>
+  <macro name="makemodindex" outputname=""/>
+  <macro name="maketitle" outputname=""/>
+  <macro name="noindent" outputname=""/>
+  <macro name="protect" outputname=""/>
+  <macro name="renewcommand">
+    <attribute name="macro"/>
+    <attribute name="nargs" optional="yes"/>
+    <content/>
+    </macro>
+  <macro name="tableofcontents" outputname=""/>
+  <macro name="vspace">
+    <attribute name="size"/>
+    </macro>
+</conversion>

diff --git a/doc/tools/sgmlconv/docfixer.py b/doc/tools/sgmlconv/docfixer.py
new file mode 100755
index 0000000..463276b
--- /dev/null
+++ b/doc/tools/sgmlconv/docfixer.py

@@ -0,0 +1,1033 @@
+#! /usr/bin/env python
+
+"""Perform massive transformations on a document tree created from the LaTeX
+of the Python documentation, and dump the ESIS data for the transformed tree.
+"""
+
+
+import errno
+import esistools
+import re
+import string
+import sys
+import xml.dom
+import xml.dom.minidom
+
+ELEMENT = xml.dom.Node.ELEMENT_NODE
+ENTITY_REFERENCE = xml.dom.Node.ENTITY_REFERENCE_NODE
+TEXT = xml.dom.Node.TEXT_NODE
+
+
+class ConversionError(Exception):
+    pass
+
+
+ewrite = sys.stderr.write
+try:
+    # We can only do this trick on Unix (if tput is on $PATH)!
+    if sys.platform != "posix" or not sys.stderr.isatty():
+        raise ImportError
+    import commands
+except ImportError:
+    bwrite = ewrite
+else:
+    def bwrite(s, BOLDON=commands.getoutput("tput bold"),
+               BOLDOFF=commands.getoutput("tput sgr0")):
+        ewrite("%s%s%s" % (BOLDON, s, BOLDOFF))
+
+
+PARA_ELEMENT = "para"
+
+DEBUG_PARA_FIXER = 0
+
+if DEBUG_PARA_FIXER:
+    def para_msg(s):
+        ewrite("*** %s\n" % s)
+else:
+    def para_msg(s):
+        pass
+
+
+def get_first_element(doc, gi):
+    for n in doc.childNodes:
+        if n.nodeName == gi:
+            return n
+
+def extract_first_element(doc, gi):
+    node = get_first_element(doc, gi)
+    if node is not None:
+        doc.removeChild(node)
+    return node
+
+
+def get_documentElement(node):
+    result = None
+    for child in node.childNodes:
+        if child.nodeType == ELEMENT:
+            result = child
+    return result
+
+
+def set_tagName(elem, gi):
+    elem.nodeName = elem.tagName = gi
+
+
+def find_all_elements(doc, gi):
+    nodes = []
+    if doc.nodeName == gi:
+        nodes.append(doc)
+    for child in doc.childNodes:
+        if child.nodeType == ELEMENT:
+            if child.tagName == gi:
+                nodes.append(child)
+            for node in child.getElementsByTagName(gi):
+                nodes.append(node)
+    return nodes
+
+def find_all_child_elements(doc, gi):
+    nodes = []
+    for child in doc.childNodes:
+        if child.nodeName == gi:
+            nodes.append(child)
+    return nodes
+
+
+def find_all_elements_from_set(doc, gi_set):
+    return __find_all_elements_from_set(doc, gi_set, [])
+
+def __find_all_elements_from_set(doc, gi_set, nodes):
+    if doc.nodeName in gi_set:
+        nodes.append(doc)
+    for child in doc.childNodes:
+        if child.nodeType == ELEMENT:
+            __find_all_elements_from_set(child, gi_set, nodes)
+    return nodes
+
+
+def simplify(doc, fragment):
+    # Try to rationalize the document a bit, since these things are simply
+    # not valid SGML/XML documents as they stand, and need a little work.
+    documentclass = "document"
+    inputs = []
+    node = extract_first_element(fragment, "documentclass")
+    if node is not None:
+        documentclass = node.getAttribute("classname")
+    node = extract_first_element(fragment, "title")
+    if node is not None:
+        inputs.append(node)
+    # update the name of the root element
+    node = get_first_element(fragment, "document")
+    if node is not None:
+        set_tagName(node, documentclass)
+    while 1:
+        node = extract_first_element(fragment, "input")
+        if node is None:
+            break
+        inputs.append(node)
+    if inputs:
+        docelem = get_documentElement(fragment)
+        inputs.reverse()
+        for node in inputs:
+            text = doc.createTextNode("\n")
+            docelem.insertBefore(text, docelem.firstChild)
+            docelem.insertBefore(node, text)
+        docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
+    while fragment.firstChild and fragment.firstChild.nodeType == TEXT:
+        fragment.removeChild(fragment.firstChild)
+
+
+def cleanup_root_text(doc):
+    discards = []
+    skip = 0
+    for n in doc.childNodes:
+        prevskip = skip
+        skip = 0
+        if n.nodeType == TEXT and not prevskip:
+            discards.append(n)
+        elif n.nodeName == "COMMENT":
+            skip = 1
+    for node in discards:
+        doc.removeChild(node)
+
+
+DESCRIPTOR_ELEMENTS = (
+    "cfuncdesc", "cvardesc", "ctypedesc",
+    "classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni",
+    "excdesc", "funcdesc", "funcdescni", "opcodedesc",
+    "datadesc", "datadescni",
+    )
+
+def fixup_descriptors(doc, fragment):
+    sections = find_all_elements(fragment, "section")
+    for section in sections:
+        find_and_fix_descriptors(doc, section)
+
+
+def find_and_fix_descriptors(doc, container):
+    children = container.childNodes
+    for child in children:
+        if child.nodeType == ELEMENT:
+            tagName = child.tagName
+            if tagName in DESCRIPTOR_ELEMENTS:
+                rewrite_descriptor(doc, child)
+            elif tagName == "subsection":
+                find_and_fix_descriptors(doc, child)
+
+
+def rewrite_descriptor(doc, descriptor):
+    #
+    # Do these things:
+    #   1. Add an "index='no'" attribute to the element if the tagName
+    #      ends in 'ni', removing the 'ni' from the name.
+    #   2. Create a <signature> from the name attribute
+    #   2a.Create an <args> if it appears to be available.
+    #   3. Create additional <signature>s from <*line{,ni}> elements,
+    #      if found.
+    #   4. If a <versionadded> is found, move it to an attribute on the
+    #      descriptor.
+    #   5. Move remaining child nodes to a <description> element.
+    #   6. Put it back together.
+    #
+    # 1.
+    descname = descriptor.tagName
+    index = 1
+    if descname[-2:] == "ni":
+        descname = descname[:-2]
+        descriptor.setAttribute("index", "no")
+        set_tagName(descriptor, descname)
+        index = 0
+    desctype = descname[:-4] # remove 'desc'
+    linename = desctype + "line"
+    if not index:
+        linename = linename + "ni"
+    # 2.
+    signature = doc.createElement("signature")
+    name = doc.createElement("name")
+    signature.appendChild(doc.createTextNode("\n    "))
+    signature.appendChild(name)
+    name.appendChild(doc.createTextNode(descriptor.getAttribute("name")))
+    descriptor.removeAttribute("name")
+    # 2a.
+    if descriptor.hasAttribute("var"):
+        if descname != "opcodedesc":
+            raise RuntimeError, \
+                  "got 'var' attribute on descriptor other than opcodedesc"
+        variable = descriptor.getAttribute("var")
+        if variable:
+            args = doc.createElement("args")
+            args.appendChild(doc.createTextNode(variable))
+            signature.appendChild(doc.createTextNode("\n    "))
+            signature.appendChild(args)
+        descriptor.removeAttribute("var")
+    newchildren = [signature]
+    children = descriptor.childNodes
+    pos = skip_leading_nodes(children)
+    if pos < len(children):
+        child = children[pos]
+        if child.nodeName == "args":
+            # move <args> to <signature>, or remove if empty:
+            child.parentNode.removeChild(child)
+            if len(child.childNodes):
+                signature.appendChild(doc.createTextNode("\n    "))
+                signature.appendChild(child)
+    signature.appendChild(doc.createTextNode("\n  "))
+    # 3, 4.
+    pos = skip_leading_nodes(children, pos)
+    while pos < len(children) \
+          and children[pos].nodeName in (linename, "versionadded"):
+        if children[pos].tagName == linename:
+            # this is really a supplemental signature, create <signature>
+            oldchild = children[pos].cloneNode(1)
+            try:
+                sig = methodline_to_signature(doc, children[pos])
+            except KeyError:
+                print oldchild.toxml()
+                raise
+            newchildren.append(sig)
+        else:
+            # <versionadded added=...>
+            descriptor.setAttribute(
+                "added", children[pos].getAttribute("version"))
+        pos = skip_leading_nodes(children, pos + 1)
+    # 5.
+    description = doc.createElement("description")
+    description.appendChild(doc.createTextNode("\n"))
+    newchildren.append(description)
+    move_children(descriptor, description, pos)
+    last = description.childNodes[-1]
+    if last.nodeType == TEXT:
+        last.data = string.rstrip(last.data) + "\n  "
+    # 6.
+    # should have nothing but whitespace and signature lines in <descriptor>;
+    # discard them
+    while descriptor.childNodes:
+        descriptor.removeChild(descriptor.childNodes[0])
+    for node in newchildren:
+        descriptor.appendChild(doc.createTextNode("\n  "))
+        descriptor.appendChild(node)
+    descriptor.appendChild(doc.createTextNode("\n"))
+
+
+def methodline_to_signature(doc, methodline):
+    signature = doc.createElement("signature")
+    signature.appendChild(doc.createTextNode("\n    "))
+    name = doc.createElement("name")
+    name.appendChild(doc.createTextNode(methodline.getAttribute("name")))
+    methodline.removeAttribute("name")
+    signature.appendChild(name)
+    if len(methodline.childNodes):
+        args = doc.createElement("args")
+        signature.appendChild(doc.createTextNode("\n    "))
+        signature.appendChild(args)
+        move_children(methodline, args)
+    signature.appendChild(doc.createTextNode("\n  "))
+    return signature
+
+
+def move_children(origin, dest, start=0):
+    children = origin.childNodes
+    while start < len(children):
+        node = children[start]
+        origin.removeChild(node)
+        dest.appendChild(node)
+
+
+def handle_appendix(doc, fragment):
+    # must be called after simplfy() if document is multi-rooted to begin with
+    docelem = get_documentElement(fragment)
+    toplevel = docelem.tagName == "manual" and "chapter" or "section"
+    appendices = 0
+    nodes = []
+    for node in docelem.childNodes:
+        if appendices:
+            nodes.append(node)
+        elif node.nodeType == ELEMENT:
+            appnodes = node.getElementsByTagName("appendix")
+            if appnodes:
+                appendices = 1
+                parent = appnodes[0].parentNode
+                parent.removeChild(appnodes[0])
+                parent.normalize()
+    if nodes:
+        map(docelem.removeChild, nodes)
+        docelem.appendChild(doc.createTextNode("\n\n\n"))
+        back = doc.createElement("back-matter")
+        docelem.appendChild(back)
+        back.appendChild(doc.createTextNode("\n"))
+        while nodes and nodes[0].nodeType == TEXT \
+              and not string.strip(nodes[0].data):
+            del nodes[0]
+        map(back.appendChild, nodes)
+        docelem.appendChild(doc.createTextNode("\n"))
+
+
+def handle_labels(doc, fragment):
+    for label in find_all_elements(fragment, "label"):
+        id = label.getAttribute("id")
+        if not id:
+            continue
+        parent = label.parentNode
+        parentTagName = parent.tagName
+        if parentTagName == "title":
+            parent.parentNode.setAttribute("id", id)
+        else:
+            parent.setAttribute("id", id)
+        # now, remove <label id="..."/> from parent:
+        parent.removeChild(label)
+        if parentTagName == "title":
+            parent.normalize()
+            children = parent.childNodes
+            if children[-1].nodeType == TEXT:
+                children[-1].data = string.rstrip(children[-1].data)
+
+
+def fixup_trailing_whitespace(doc, wsmap):
+    queue = [doc]
+    while queue:
+        node = queue[0]
+        del queue[0]
+        if wsmap.has_key(node.nodeName):
+            ws = wsmap[node.tagName]
+            children = node.childNodes
+            children.reverse()
+            if children[0].nodeType == TEXT:
+                data = string.rstrip(children[0].data) + ws
+                children[0].data = data
+            children.reverse()
+            # hack to get the title in place:
+            if node.tagName == "title" \
+               and node.parentNode.firstChild.nodeType == ELEMENT:
+                node.parentNode.insertBefore(doc.createText("\n  "),
+                                             node.parentNode.firstChild)
+        for child in node.childNodes:
+            if child.nodeType == ELEMENT:
+                queue.append(child)
+
+
+def normalize(doc):
+    for node in doc.childNodes:
+        if node.nodeType == ELEMENT:
+            node.normalize()
+
+
+def cleanup_trailing_parens(doc, element_names):
+    d = {}
+    for gi in element_names:
+        d[gi] = gi
+    rewrite_element = d.has_key
+    queue = []
+    for node in doc.childNodes:
+        if node.nodeType == ELEMENT:
+            queue.append(node)
+    while queue:
+        node = queue[0]
+        del queue[0]
+        if rewrite_element(node.tagName):
+            children = node.childNodes
+            if len(children) == 1 \
+               and children[0].nodeType == TEXT:
+                data = children[0].data
+                if data[-2:] == "()":
+                    children[0].data = data[:-2]
+        else:
+            for child in node.childNodes:
+                if child.nodeType == ELEMENT:
+                    queue.append(child)
+
+
+def contents_match(left, right):
+    left_children = left.childNodes
+    right_children = right.childNodes
+    if len(left_children) != len(right_children):
+        return 0
+    for l, r in map(None, left_children, right_children):
+        nodeType = l.nodeType
+        if nodeType != r.nodeType:
+            return 0
+        if nodeType == ELEMENT:
+            if l.tagName != r.tagName:
+                return 0
+            # should check attributes, but that's not a problem here
+            if not contents_match(l, r):
+                return 0
+        elif nodeType == TEXT:
+            if l.data != r.data:
+                return 0
+        else:
+            # not quite right, but good enough
+            return 0
+    return 1
+
+
+def create_module_info(doc, section):
+    # Heavy.
+    node = extract_first_element(section, "modulesynopsis")
+    if node is None:
+        return
+    set_tagName(node, "synopsis")
+    lastchild = node.childNodes[-1]
+    if lastchild.nodeType == TEXT \
+       and lastchild.data[-1:] == ".":
+        lastchild.data = lastchild.data[:-1]
+    modauthor = extract_first_element(section, "moduleauthor")
+    if modauthor:
+        set_tagName(modauthor, "author")
+        modauthor.appendChild(doc.createTextNode(
+            modauthor.getAttribute("name")))
+        modauthor.removeAttribute("name")
+    platform = extract_first_element(section, "platform")
+    if section.tagName == "section":
+        modinfo_pos = 2
+        modinfo = doc.createElement("moduleinfo")
+        moddecl = extract_first_element(section, "declaremodule")
+        name = None
+        if moddecl:
+            modinfo.appendChild(doc.createTextNode("\n    "))
+            name = moddecl.attributes["name"].value
+            namenode = doc.createElement("name")
+            namenode.appendChild(doc.createTextNode(name))
+            modinfo.appendChild(namenode)
+            type = moddecl.attributes.get("type")
+            if type:
+                type = type.value
+                modinfo.appendChild(doc.createTextNode("\n    "))
+                typenode = doc.createElement("type")
+                typenode.appendChild(doc.createTextNode(type))
+                modinfo.appendChild(typenode)
+        versionadded = extract_first_element(section, "versionadded")
+        if versionadded:
+            modinfo.setAttribute("added", versionadded.getAttribute("version"))
+        title = get_first_element(section, "title")
+        if title:
+            children = title.childNodes
+            if len(children) >= 2 \
+               and children[0].nodeName == "module" \
+               and children[0].childNodes[0].data == name:
+                # this is it; morph the <title> into <short-synopsis>
+                first_data = children[1]
+                if first_data.data[:4] == " ---":
+                    first_data.data = string.lstrip(first_data.data[4:])
+                set_tagName(title, "short-synopsis")
+                if children[-1].nodeType == TEXT \
+                   and children[-1].data[-1:] == ".":
+                    children[-1].data = children[-1].data[:-1]
+                section.removeChild(title)
+                section.removeChild(section.childNodes[0])
+                title.removeChild(children[0])
+                modinfo_pos = 0
+            else:
+                ewrite("module name in title doesn't match"
+                       " <declaremodule/>; no <short-synopsis/>\n")
+        else:
+            ewrite("Unexpected condition: <section/> without <title/>\n")
+        modinfo.appendChild(doc.createTextNode("\n    "))
+        modinfo.appendChild(node)
+        if title and not contents_match(title, node):
+            # The short synopsis is actually different,
+            # and needs to be stored:
+            modinfo.appendChild(doc.createTextNode("\n    "))
+            modinfo.appendChild(title)
+        if modauthor:
+            modinfo.appendChild(doc.createTextNode("\n    "))
+            modinfo.appendChild(modauthor)
+        if platform:
+            modinfo.appendChild(doc.createTextNode("\n    "))
+            modinfo.appendChild(platform)
+        modinfo.appendChild(doc.createTextNode("\n  "))
+        section.insertBefore(modinfo, section.childNodes[modinfo_pos])
+        section.insertBefore(doc.createTextNode("\n  "), modinfo)
+        #
+        # The rest of this removes extra newlines from where we cut out
+        # a lot of elements.  A lot of code for minimal value, but keeps
+        # keeps the generated *ML from being too funny looking.
+        #
+        section.normalize()
+        children = section.childNodes
+        for i in range(len(children)):
+            node = children[i]
+            if node.nodeName == "moduleinfo":
+                nextnode = children[i+1]
+                if nextnode.nodeType == TEXT:
+                    data = nextnode.data
+                    if len(string.lstrip(data)) < (len(data) - 4):
+                        nextnode.data = "\n\n\n" + string.lstrip(data)
+
+
+def cleanup_synopses(doc, fragment):
+    for node in find_all_elements(fragment, "section"):
+        create_module_info(doc, node)
+
+
+def fixup_table_structures(doc, fragment):
+    for table in find_all_elements(fragment, "table"):
+        fixup_table(doc, table)
+
+
+def fixup_table(doc, table):
+    # create the table head
+    thead = doc.createElement("thead")
+    row = doc.createElement("row")
+    move_elements_by_name(doc, table, row, "entry")
+    thead.appendChild(doc.createTextNode("\n    "))
+    thead.appendChild(row)
+    thead.appendChild(doc.createTextNode("\n    "))
+    # create the table body
+    tbody = doc.createElement("tbody")
+    prev_row = None
+    last_was_hline = 0
+    children = table.childNodes
+    for child in children:
+        if child.nodeType == ELEMENT:
+            tagName = child.tagName
+            if tagName == "hline" and prev_row is not None:
+                prev_row.setAttribute("rowsep", "1")
+            elif tagName == "row":
+                prev_row = child
+    # save the rows:
+    tbody.appendChild(doc.createTextNode("\n    "))
+    move_elements_by_name(doc, table, tbody, "row", sep="\n    ")
+    # and toss the rest:
+    while children:
+        child = children[0]
+        nodeType = child.nodeType
+        if nodeType == TEXT:
+            if string.strip(child.data):
+                raise ConversionError("unexpected free data in <%s>: %r"
+                                      % (table.tagName, child.data))
+            table.removeChild(child)
+            continue
+        if nodeType == ELEMENT:
+            if child.tagName != "hline":
+                raise ConversionError(
+                    "unexpected <%s> in table" % child.tagName)
+            table.removeChild(child)
+            continue
+        raise ConversionError(
+            "unexpected %s node in table" % child.__class__.__name__)
+    # nothing left in the <table>; add the <thead> and <tbody>
+    tgroup = doc.createElement("tgroup")
+    tgroup.appendChild(doc.createTextNode("\n  "))
+    tgroup.appendChild(thead)
+    tgroup.appendChild(doc.createTextNode("\n  "))
+    tgroup.appendChild(tbody)
+    tgroup.appendChild(doc.createTextNode("\n  "))
+    table.appendChild(tgroup)
+    # now make the <entry>s look nice:
+    for row in table.getElementsByTagName("row"):
+        fixup_row(doc, row)
+
+
+def fixup_row(doc, row):
+    entries = []
+    map(entries.append, row.childNodes[1:])
+    for entry in entries:
+        row.insertBefore(doc.createTextNode("\n         "), entry)
+#    row.appendChild(doc.createTextNode("\n      "))
+
+
+def move_elements_by_name(doc, source, dest, name, sep=None):
+    nodes = []
+    for child in source.childNodes:
+        if child.nodeName == name:
+            nodes.append(child)
+    for node in nodes:
+        source.removeChild(node)
+        dest.appendChild(node)
+        if sep:
+            dest.appendChild(doc.createTextNode(sep))
+
+
+RECURSE_INTO_PARA_CONTAINERS = (
+    "chapter", "abstract", "enumerate",
+    "section", "subsection", "subsubsection",
+    "paragraph", "subparagraph", "back-matter",
+    "howto", "manual",
+    "item", "itemize", "fulllineitems", "enumeration", "descriptionlist",
+    "definitionlist", "definition",
+    )
+
+PARA_LEVEL_ELEMENTS = (
+    "moduleinfo", "title", "verbatim", "enumerate", "item",
+    "interpreter-session", "back-matter", "interactive-session",
+    "opcodedesc", "classdesc", "datadesc",
+    "funcdesc", "methoddesc", "excdesc", "memberdesc", "membderdescni",
+    "funcdescni", "methoddescni", "excdescni",
+    "tableii", "tableiii", "tableiv", "localmoduletable",
+    "sectionauthor", "seealso", "itemize",
+    # include <para>, so we can just do it again to get subsequent paras:
+    PARA_ELEMENT,
+    )
+
+PARA_LEVEL_PRECEEDERS = (
+    "setindexsubitem", "author",
+    "stindex", "obindex", "COMMENT", "label", "input", "title",
+    "versionadded", "versionchanged", "declaremodule", "modulesynopsis",
+    "moduleauthor", "indexterm", "leader",
+    )
+
+
+def fixup_paras(doc, fragment):
+    for child in fragment.childNodes:
+        if child.nodeName in RECURSE_INTO_PARA_CONTAINERS:
+            fixup_paras_helper(doc, child)
+    descriptions = find_all_elements(fragment, "description")
+    for description in descriptions:
+        fixup_paras_helper(doc, description)
+
+
+def fixup_paras_helper(doc, container, depth=0):
+    # document is already normalized
+    children = container.childNodes
+    start = skip_leading_nodes(children)
+    while len(children) > start:
+        if children[start].nodeName in RECURSE_INTO_PARA_CONTAINERS:
+            # Something to recurse into:
+            fixup_paras_helper(doc, children[start])
+        else:
+            # Paragraph material:
+            build_para(doc, container, start, len(children))
+            if DEBUG_PARA_FIXER and depth == 10:
+                sys.exit(1)
+        start = skip_leading_nodes(children, start + 1)
+
+
+def build_para(doc, parent, start, i):
+    children = parent.childNodes
+    after = start + 1
+    have_last = 0
+    BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + RECURSE_INTO_PARA_CONTAINERS
+    # Collect all children until \n\n+ is found in a text node or a
+    # member of BREAK_ELEMENTS is found.
+    for j in range(start, i):
+        after = j + 1
+        child = children[j]
+        nodeType = child.nodeType
+        if nodeType == ELEMENT:
+            if child.tagName in BREAK_ELEMENTS:
+                after = j
+                break
+        elif nodeType == TEXT:
+            pos = string.find(child.data, "\n\n")
+            if pos == 0:
+                after = j
+                break
+            if pos >= 1:
+                child.splitText(pos)
+                break
+    else:
+        have_last = 1
+    if (start + 1) > after:
+        raise ConversionError(
+            "build_para() could not identify content to turn into a paragraph")
+    if children[after - 1].nodeType == TEXT:
+        # we may need to split off trailing white space:
+        child = children[after - 1]
+        data = child.data
+        if string.rstrip(data) != data:
+            have_last = 0
+            child.splitText(len(string.rstrip(data)))
+    para = doc.createElement(PARA_ELEMENT)
+    prev = None
+    indexes = range(start, after)
+    indexes.reverse()
+    for j in indexes:
+        node = parent.childNodes[j]
+        parent.removeChild(node)
+        para.insertBefore(node, prev)
+        prev = node
+    if have_last:
+        parent.appendChild(para)
+        parent.appendChild(doc.createTextNode("\n\n"))
+        return len(parent.childNodes)
+    else:
+        nextnode = parent.childNodes[start]
+        if nextnode.nodeType == TEXT:
+            if nextnode.data and nextnode.data[0] != "\n":
+                nextnode.data = "\n" + nextnode.data
+        else:
+            newnode = doc.createTextNode("\n")
+            parent.insertBefore(newnode, nextnode)
+            nextnode = newnode
+            start = start + 1
+        parent.insertBefore(para, nextnode)
+        return start + 1
+
+
+def skip_leading_nodes(children, start=0):
+    """Return index into children of a node at which paragraph building should
+    begin or a recursive call to fixup_paras_helper() should be made (for
+    subsections, etc.).
+
+    When the return value >= len(children), we've built all the paras we can
+    from this list of children.
+    """
+    i = len(children)
+    while i > start:
+        # skip over leading comments and whitespace:
+        child = children[start]
+        nodeType = child.nodeType
+        if nodeType == TEXT:
+            data = child.data
+            shortened = string.lstrip(data)
+            if shortened:
+                if data != shortened:
+                    # break into two nodes: whitespace and non-whitespace
+                    child.splitText(len(data) - len(shortened))
+                    return start + 1
+                return start
+            # all whitespace, just skip
+        elif nodeType == ELEMENT:
+            tagName = child.tagName
+            if tagName in RECURSE_INTO_PARA_CONTAINERS:
+                return start
+            if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
+                return start
+        start = start + 1
+    return start
+
+
+def fixup_rfc_references(doc, fragment):
+    for rfcnode in find_all_elements(fragment, "rfc"):
+        rfcnode.appendChild(doc.createTextNode(
+            "RFC " + rfcnode.getAttribute("num")))
+
+
+def fixup_signatures(doc, fragment):
+    for child in fragment.childNodes:
+        if child.nodeType == ELEMENT:
+            args = child.getElementsByTagName("args")
+            for arg in args:
+                fixup_args(doc, arg)
+                arg.normalize()
+            args = child.getElementsByTagName("constructor-args")
+            for arg in args:
+                fixup_args(doc, arg)
+                arg.normalize()
+
+
+def fixup_args(doc, arglist):
+    for child in arglist.childNodes:
+        if child.nodeName == "optional":
+            # found it; fix and return
+            arglist.insertBefore(doc.createTextNode("["), child)
+            optkids = child.childNodes
+            while optkids:
+                k = optkids[0]
+                child.removeChild(k)
+                arglist.insertBefore(k, child)
+            arglist.insertBefore(doc.createTextNode("]"), child)
+            arglist.removeChild(child)
+            return fixup_args(doc, arglist)
+
+
+def fixup_sectionauthors(doc, fragment):
+    for sectauth in find_all_elements(fragment, "sectionauthor"):
+        section = sectauth.parentNode
+        section.removeChild(sectauth)
+        set_tagName(sectauth, "author")
+        sectauth.appendChild(doc.createTextNode(
+            sectauth.getAttribute("name")))
+        sectauth.removeAttribute("name")
+        after = section.childNodes[2]
+        title = section.childNodes[1]
+        if title.nodeName != "title":
+            after = section.childNodes[0]
+        section.insertBefore(doc.createTextNode("\n  "), after)
+        section.insertBefore(sectauth, after)
+
+
+def fixup_verbatims(doc):
+    for verbatim in find_all_elements(doc, "verbatim"):
+        child = verbatim.childNodes[0]
+        if child.nodeType == TEXT \
+           and string.lstrip(child.data)[:3] == ">>>":
+            set_tagName(verbatim, "interactive-session")
+
+
+def add_node_ids(fragment, counter=0):
+    fragment.node_id = counter
+    for node in fragment.childNodes:
+        counter = counter + 1
+        if node.nodeType == ELEMENT:
+            counter = add_node_ids(node, counter)
+        else:
+            node.node_id = counter
+    return counter + 1
+
+
+REFMODINDEX_ELEMENTS = ('refmodindex', 'refbimodindex',
+                        'refexmodindex', 'refstmodindex')
+
+def fixup_refmodindexes(fragment):
+    # Locate <ref*modindex>...</> co-located with <module>...</>, and
+    # remove the <ref*modindex>, replacing it with index=index on the
+    # <module> element.
+    nodes = find_all_elements_from_set(fragment, REFMODINDEX_ELEMENTS)
+    d = {}
+    for node in nodes:
+        parent = node.parentNode
+        d[parent.node_id] = parent
+    del nodes
+    map(fixup_refmodindexes_chunk, d.values())
+
+
+def fixup_refmodindexes_chunk(container):
+    # node is probably a <para>; let's see how often it isn't:
+    if container.tagName != PARA_ELEMENT:
+        bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container)
+    module_entries = find_all_elements(container, "module")
+    if not module_entries:
+        return
+    index_entries = find_all_elements_from_set(container, REFMODINDEX_ELEMENTS)
+    removes = []
+    for entry in index_entries:
+        children = entry.childNodes
+        if len(children) != 0:
+            bwrite("--- unexpected number of children for %s node:\n"
+                   % entry.tagName)
+            ewrite(entry.toxml() + "\n")
+            continue
+        found = 0
+        module_name = entry.getAttribute("module")
+        for node in module_entries:
+            if len(node.childNodes) != 1:
+                continue
+            this_name = node.childNodes[0].data
+            if this_name == module_name:
+                found = 1
+                node.setAttribute("index", "yes")
+        if found:
+            removes.append(entry)
+    for node in removes:
+        container.removeChild(node)
+
+
+def fixup_bifuncindexes(fragment):
+    nodes = find_all_elements(fragment, 'bifuncindex')
+    d = {}
+    # make sure that each parent is only processed once:
+    for node in nodes:
+        parent = node.parentNode
+        d[parent.node_id] = parent
+    del nodes
+    map(fixup_bifuncindexes_chunk, d.values())
+
+
+def fixup_bifuncindexes_chunk(container):
+    removes = []
+    entries = find_all_child_elements(container, "bifuncindex")
+    function_entries = find_all_child_elements(container, "function")
+    for entry in entries:
+        function_name = entry.getAttribute("name")
+        found = 0
+        for func_entry in function_entries:
+            t2 = func_entry.childNodes[0].data
+            if t2[-2:] != "()":
+                continue
+            t2 = t2[:-2]
+            if t2 == function_name:
+                func_entry.setAttribute("index", "yes")
+                func_entry.setAttribute("module", "__builtin__")
+                if not found:
+                    found = 1
+                    removes.append(entry)
+    for entry in removes:
+        container.removeChild(entry)
+
+
+def join_adjacent_elements(container, gi):
+    queue = [container]
+    while queue:
+        parent = queue.pop()
+        i = 0
+        children = parent.childNodes
+        nchildren = len(children)
+        while i < (nchildren - 1):
+            child = children[i]
+            if child.nodeName == gi:
+                if children[i+1].nodeName == gi:
+                    ewrite("--- merging two <%s/> elements\n" % gi)
+                    child = children[i]
+                    nextchild = children[i+1]
+                    nextchildren = nextchild.childNodes
+                    while len(nextchildren):
+                        node = nextchildren[0]
+                        nextchild.removeChild(node)
+                        child.appendChild(node)
+                    parent.removeChild(nextchild)
+                    continue
+            if child.nodeType == ELEMENT:
+                queue.append(child)
+            i = i + 1
+
+
+_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
+
+def write_esis(doc, ofp, knownempty):
+    for node in doc.childNodes:
+        nodeType = node.nodeType
+        if nodeType == ELEMENT:
+            gi = node.tagName
+            if knownempty(gi):
+                if node.hasChildNodes():
+                    raise ValueError, \
+                          "declared-empty node <%s> has children" % gi
+                ofp.write("e\n")
+            for k, value in node.attributes.items():
+                if _token_rx.match(value):
+                    dtype = "TOKEN"
+                else:
+                    dtype = "CDATA"
+                ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
+            ofp.write("(%s\n" % gi)
+            write_esis(node, ofp, knownempty)
+            ofp.write(")%s\n" % gi)
+        elif nodeType == TEXT:
+            ofp.write("-%s\n" % esistools.encode(node.data))
+        elif nodeType == ENTITY_REFERENCE:
+            ofp.write("&%s\n" % node.nodeName)
+        else:
+            raise RuntimeError, "unsupported node type: %s" % nodeType
+
+
+def convert(ifp, ofp):
+    events = esistools.parse(ifp)
+    toktype, doc = events.getEvent()
+    fragment = doc.createDocumentFragment()
+    events.expandNode(fragment)
+
+    normalize(fragment)
+    simplify(doc, fragment)
+    handle_labels(doc, fragment)
+    handle_appendix(doc, fragment)
+    fixup_trailing_whitespace(doc, {
+        "abstract": "\n",
+        "title": "",
+        "chapter": "\n\n",
+        "section": "\n\n",
+        "subsection": "\n\n",
+        "subsubsection": "\n\n",
+        "paragraph": "\n\n",
+        "subparagraph": "\n\n",
+        })
+    cleanup_root_text(doc)
+    cleanup_trailing_parens(fragment, ["function", "method", "cfunction"])
+    cleanup_synopses(doc, fragment)
+    fixup_descriptors(doc, fragment)
+    fixup_verbatims(fragment)
+    normalize(fragment)
+    fixup_paras(doc, fragment)
+    fixup_sectionauthors(doc, fragment)
+    fixup_table_structures(doc, fragment)
+    fixup_rfc_references(doc, fragment)
+    fixup_signatures(doc, fragment)
+    add_node_ids(fragment)
+    fixup_refmodindexes(fragment)
+    fixup_bifuncindexes(fragment)
+    # Take care of ugly hacks in the LaTeX markup to avoid LaTeX and
+    # LaTeX2HTML screwing with GNU-style long options (the '--' problem).
+    join_adjacent_elements(fragment, "option")
+    #
+    d = {}
+    for gi in events.parser.get_empties():
+        d[gi] = gi
+    if d.has_key("author"):
+        del d["author"]
+    if d.has_key("rfc"):
+        del d["rfc"]
+    knownempty = d.has_key
+    #
+    try:
+        write_esis(fragment, ofp, knownempty)
+    except IOError, (err, msg):
+        # Ignore EPIPE; it just means that whoever we're writing to stopped
+        # reading.  The rest of the output would be ignored.  All other errors
+        # should still be reported,
+        if err != errno.EPIPE:
+            raise
+
+
+def main():
+    if len(sys.argv) == 1:
+        ifp = sys.stdin
+        ofp = sys.stdout
+    elif len(sys.argv) == 2:
+        ifp = open(sys.argv[1])
+        ofp = sys.stdout
+    elif len(sys.argv) == 3:
+        ifp = open(sys.argv[1])
+        import StringIO
+        ofp = StringIO.StringIO()
+    else:
+        usage()
+        sys.exit(2)
+    convert(ifp, ofp)
+    if len(sys.argv) == 3:
+        fp = open(sys.argv[2], "w")
+        fp.write(ofp.getvalue())
+        fp.close()
+        ofp.close()
+
+
+if __name__ == "__main__":
+    main()

diff --git a/doc/tools/sgmlconv/esis2sgml.py b/doc/tools/sgmlconv/esis2sgml.py
new file mode 100755
index 0000000..7bda929
--- /dev/null
+++ b/doc/tools/sgmlconv/esis2sgml.py

@@ -0,0 +1,263 @@
+#! /usr/bin/env python
+
+"""Convert ESIS events to SGML or XML markup.
+
+This is limited, but seems sufficient for the ESIS generated by the
+latex2esis.py script when run over the Python documentation.
+"""
+
+# This should have an explicit option to indicate whether the *INPUT* was
+# generated from an SGML or an XML application.
+
+import errno
+import esistools
+import os
+import re
+import string
+
+from xml.sax.saxutils import escape
+
+
+AUTOCLOSE = ()
+
+EMPTIES_FILENAME = "../sgml/empties.dat"
+LIST_EMPTIES = 0
+
+
+_elem_map = {}
+_attr_map = {}
+_token_map = {}
+
+_normalize_case = str
+
+def map_gi(sgmlgi, map):
+    uncased = _normalize_case(sgmlgi)
+    try:
+        return map[uncased]
+    except IndexError:
+        map[uncased] = sgmlgi
+        return sgmlgi
+
+def null_map_gi(sgmlgi, map):
+    return sgmlgi
+
+
+def format_attrs(attrs, xml=0):
+    attrs = attrs.items()
+    attrs.sort()
+    parts = []
+    append = parts.append
+    for name, value in attrs:
+        if xml:
+            append('%s="%s"' % (name, escape(value)))
+        else:
+            # this is a little bogus, but should do for now
+            if name == value and isnmtoken(value):
+                append(value)
+            elif istoken(value):
+                if value == "no" + name:
+                    append(value)
+                else:
+                    append("%s=%s" % (name, value))
+            else:
+                append('%s="%s"' % (name, escape(value)))
+    if parts:
+        parts.insert(0, '')
+    return string.join(parts)
+
+
+_nmtoken_rx = re.compile("[a-z][-._a-z0-9]*$", re.IGNORECASE)
+def isnmtoken(s):
+    return _nmtoken_rx.match(s) is not None
+
+_token_rx = re.compile("[a-z0-9][-._a-z0-9]*$", re.IGNORECASE)
+def istoken(s):
+    return _token_rx.match(s) is not None
+
+
+def convert(ifp, ofp, xml=0, autoclose=(), verbatims=()):
+    if xml:
+        autoclose = ()
+    attrs = {}
+    lastopened = None
+    knownempties = []
+    knownempty = 0
+    lastempty = 0
+    inverbatim = 0
+    while 1:
+        line = ifp.readline()
+        if not line:
+            break
+
+        type = line[0]
+        data = line[1:]
+        if data and data[-1] == "\n":
+            data = data[:-1]
+        if type == "-":
+            data = esistools.decode(data)
+            data = escape(data)
+            if not inverbatim:
+                data = string.replace(data, "---", "&mdash;")
+            ofp.write(data)
+            if "\n" in data:
+                lastopened = None
+            knownempty = 0
+            lastempty = 0
+        elif type == "(":
+            if data == "COMMENT":
+                ofp.write("<!--")
+                continue
+            data = map_gi(data, _elem_map)
+            if knownempty and xml:
+                ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml)))
+            else:
+                ofp.write("<%s%s>" % (data, format_attrs(attrs, xml)))
+            if knownempty and data not in knownempties:
+                # accumulate knowledge!
+                knownempties.append(data)
+            attrs = {}
+            lastopened = data
+            lastempty = knownempty
+            knownempty = 0
+            inverbatim = data in verbatims
+        elif type == ")":
+            if data == "COMMENT":
+                ofp.write("-->")
+                continue
+            data = map_gi(data, _elem_map)
+            if xml:
+                if not lastempty:
+                    ofp.write("</%s>" % data)
+            elif data not in knownempties:
+                if data in autoclose:
+                    pass
+                elif lastopened == data:
+                    ofp.write("</>")
+                else:
+                    ofp.write("</%s>" % data)
+            lastopened = None
+            lastempty = 0
+            inverbatim = 0
+        elif type == "A":
+            name, type, value = string.split(data, " ", 2)
+            name = map_gi(name, _attr_map)
+            attrs[name] = esistools.decode(value)
+        elif type == "e":
+            knownempty = 1
+        elif type == "&":
+            ofp.write("&%s;" % data)
+            knownempty = 0
+        else:
+            raise RuntimeError, "unrecognized ESIS event type: '%s'" % type
+
+    if LIST_EMPTIES:
+        dump_empty_element_names(knownempties)
+
+
+def dump_empty_element_names(knownempties):
+    d = {}
+    for gi in knownempties:
+        d[gi] = gi
+    knownempties.append("")
+    if os.path.isfile(EMPTIES_FILENAME):
+        fp = open(EMPTIES_FILENAME)
+        while 1:
+            line = fp.readline()
+            if not line:
+                break
+            gi = string.strip(line)
+            if gi:
+                d[gi] = gi
+    fp = open(EMPTIES_FILENAME, "w")
+    gilist = d.keys()
+    gilist.sort()
+    fp.write(string.join(gilist, "\n"))
+    fp.write("\n")
+    fp.close()
+
+
+def update_gi_map(map, names, fromsgml=1):
+    for name in string.split(names, ","):
+        if fromsgml:
+            uncased = string.lower(name)
+        else:
+            uncased = name
+        map[uncased] = name
+
+
+def main():
+    import getopt
+    import sys
+    #
+    autoclose = AUTOCLOSE
+    xml = 1
+    xmldecl = 0
+    elem_names = ''
+    attr_names = ''
+    value_names = ''
+    verbatims = ('verbatim', 'interactive-session')
+    opts, args = getopt.getopt(sys.argv[1:], "adesx",
+                               ["autoclose=", "declare", "sgml", "xml",
+                                "elements-map=", "attributes-map",
+                                "values-map="])
+    for opt, arg in opts:
+        if opt in ("-d", "--declare"):
+            xmldecl = 1
+        elif opt == "-e":
+            global LIST_EMPTIES
+            LIST_EMPTIES = 1
+        elif opt in ("-s", "--sgml"):
+            xml = 0
+        elif opt in ("-x", "--xml"):
+            xml = 1
+        elif opt in ("-a", "--autoclose"):
+            autoclose = string.split(arg, ",")
+        elif opt == "--elements-map":
+            elem_names = ("%s,%s" % (elem_names, arg))[1:]
+        elif opt == "--attributes-map":
+            attr_names = ("%s,%s" % (attr_names, arg))[1:]
+        elif opt == "--values-map":
+            value_names = ("%s,%s" % (value_names, arg))[1:]
+    #
+    # open input streams:
+    #
+    if len(args) == 0:
+        ifp = sys.stdin
+        ofp = sys.stdout
+    elif len(args) == 1:
+        ifp = open(args[0])
+        ofp = sys.stdout
+    elif len(args) == 2:
+        ifp = open(args[0])
+        ofp = open(args[1], "w")
+    else:
+        usage()
+        sys.exit(2)
+    #
+    # setup the name maps:
+    #
+    if elem_names or attr_names or value_names:
+        # assume the origin was SGML; ignore case of the names from the ESIS
+        # stream but set up conversion tables to get the case right on output
+        global _normalize_case
+        _normalize_case = string.lower
+        update_gi_map(_elem_map, string.split(elem_names, ","))
+        update_gi_map(_attr_map, string.split(attr_names, ","))
+        update_gi_map(_values_map, string.split(value_names, ","))
+    else:
+        global map_gi
+        map_gi = null_map_gi
+    #
+    # run the conversion:
+    #
+    try:
+        if xml and xmldecl:
+            opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n')
+        convert(ifp, ofp, xml=xml, autoclose=autoclose, verbatims=verbatims)
+    except IOError, (err, msg):
+        if err != errno.EPIPE:
+            raise
+
+
+if __name__ == "__main__":
+    main()

diff --git a/doc/tools/sgmlconv/esistools.py b/doc/tools/sgmlconv/esistools.py
new file mode 100644
index 0000000..893af76
--- /dev/null
+++ b/doc/tools/sgmlconv/esistools.py

@@ -0,0 +1,309 @@
+"""Miscellaneous utility functions useful for dealing with ESIS streams."""
+
+import re
+import string
+
+import xml.dom.pulldom
+
+import xml.sax
+import xml.sax.handler
+import xml.sax.xmlreader
+
+
+_data_match = re.compile(r"[^\\][^\\]*").match
+
+def decode(s):
+    r = ''
+    while s:
+        m = _data_match(s)
+        if m:
+            r = r + m.group()
+            s = s[m.end():]
+        elif s[1] == "\\":
+            r = r + "\\"
+            s = s[2:]
+        elif s[1] == "n":
+            r = r + "\n"
+            s = s[2:]
+        elif s[1] == "%":
+            s = s[2:]
+            n, s = s.split(";", 1)
+            r = r + unichr(int(n))
+        else:
+            raise ValueError, "can't handle " + `s`
+    return r
+
+
+_charmap = {}
+for c in map(chr, range(256)):
+    _charmap[c] = c
+_charmap["\n"] = r"\n"
+_charmap["\\"] = r"\\"
+del c
+
+_null_join = ''.join
+def encode(s):
+    return _null_join(map(_charmap.get, s))
+
+
+class ESISReader(xml.sax.xmlreader.XMLReader):
+    """SAX Reader which reads from an ESIS stream.
+
+    No verification of the document structure is performed by the
+    reader; a general verifier could be used as the target
+    ContentHandler instance.
+
+    """
+    _decl_handler = None
+    _lexical_handler = None
+
+    _public_id = None
+    _system_id = None
+
+    _buffer = ""
+    _is_empty = 0
+    _lineno = 0
+    _started = 0
+
+    def __init__(self, contentHandler=None, errorHandler=None):
+        xml.sax.xmlreader.XMLReader.__init__(self)
+        self._attrs = {}
+        self._attributes = Attributes(self._attrs)
+        self._locator = Locator()
+        self._empties = {}
+        if contentHandler:
+            self.setContentHandler(contentHandler)
+        if errorHandler:
+            self.setErrorHandler(errorHandler)
+
+    def get_empties(self):
+        return self._empties.keys()
+
+    #
+    #  XMLReader interface
+    #
+
+    def parse(self, source):
+        raise RuntimeError
+        self._locator._public_id = source.getPublicId()
+        self._locator._system_id = source.getSystemId()
+        fp = source.getByteStream()
+        handler = self.getContentHandler()
+        if handler:
+            handler.startDocument()
+        lineno = 0
+        while 1:
+            token, data = self._get_token(fp)
+            if token is None:
+                break
+            lineno = lineno + 1
+            self._locator._lineno = lineno
+            self._handle_token(token, data)
+        handler = self.getContentHandler()
+        if handler:
+            handler.startDocument()
+
+    def feed(self, data):
+        if not self._started:
+            handler = self.getContentHandler()
+            if handler:
+                handler.startDocument()
+            self._started = 1
+        data = self._buffer + data
+        self._buffer = None
+        lines = data.split("\n")
+        if lines:
+            for line in lines[:-1]:
+                self._lineno = self._lineno + 1
+                self._locator._lineno = self._lineno
+                if not line:
+                    e = xml.sax.SAXParseException(
+                        "ESIS input line contains no token type mark",
+                        None, self._locator)
+                    self.getErrorHandler().error(e)
+                else:
+                    self._handle_token(line[0], line[1:])
+            self._buffer = lines[-1]
+        else:
+            self._buffer = ""
+
+    def close(self):
+        handler = self.getContentHandler()
+        if handler:
+            handler.endDocument()
+        self._buffer = ""
+
+    def _get_token(self, fp):
+        try:
+            line = fp.readline()
+        except IOError, e:
+            e = SAXException("I/O error reading input stream", e)
+            self.getErrorHandler().fatalError(e)
+            return
+        if not line:
+            return None, None
+        if line[-1] == "\n":
+            line = line[:-1]
+        if not line:
+            e = xml.sax.SAXParseException(
+                "ESIS input line contains no token type mark",
+                None, self._locator)
+            self.getErrorHandler().error(e)
+            return
+        return line[0], line[1:]
+
+    def _handle_token(self, token, data):
+        handler = self.getContentHandler()
+        if token == '-':
+            if data and handler:
+                handler.characters(decode(data))
+        elif token == ')':
+            if handler:
+                handler.endElement(decode(data))
+        elif token == '(':
+            if self._is_empty:
+                self._empties[data] = 1
+            if handler:
+                handler.startElement(data, self._attributes)
+            self._attrs.clear()
+            self._is_empty = 0
+        elif token == 'A':
+            name, value = data.split(' ', 1)
+            if value != "IMPLIED":
+                type, value = value.split(' ', 1)
+                self._attrs[name] = (decode(value), type)
+        elif token == '&':
+            # entity reference in SAX?
+            pass
+        elif token == '?':
+            if handler:
+                if ' ' in data:
+                    target, data = string.split(data, None, 1)
+                else:
+                    target, data = data, ""
+                handler.processingInstruction(target, decode(data))
+        elif token == 'N':
+            handler = self.getDTDHandler()
+            if handler:
+                handler.notationDecl(data, self._public_id, self._system_id)
+            self._public_id = None
+            self._system_id = None
+        elif token == 'p':
+            self._public_id = decode(data)
+        elif token == 's':
+            self._system_id = decode(data)
+        elif token == 'e':
+            self._is_empty = 1
+        elif token == 'C':
+            pass
+        else:
+            e = SAXParseException("unknown ESIS token in event stream",
+                                  None, self._locator)
+            self.getErrorHandler().error(e)
+
+    def setContentHandler(self, handler):
+        old = self.getContentHandler()
+        if old:
+            old.setDocumentLocator(None)
+        if handler:
+            handler.setDocumentLocator(self._locator)
+        xml.sax.xmlreader.XMLReader.setContentHandler(self, handler)
+
+    def getProperty(self, property):
+        if property == xml.sax.handler.property_lexical_handler:
+            return self._lexical_handler
+
+        elif property == xml.sax.handler.property_declaration_handler:
+            return self._decl_handler
+
+        else:
+            raise xml.sax.SAXNotRecognizedException("unknown property %s"
+                                                    % `property`)
+
+    def setProperty(self, property, value):
+        if property == xml.sax.handler.property_lexical_handler:
+            if self._lexical_handler:
+                self._lexical_handler.setDocumentLocator(None)
+            if value:
+                value.setDocumentLocator(self._locator)
+            self._lexical_handler = value
+
+        elif property == xml.sax.handler.property_declaration_handler:
+            if self._decl_handler:
+                self._decl_handler.setDocumentLocator(None)
+            if value:
+                value.setDocumentLocator(self._locator)
+            self._decl_handler = value
+
+        else:
+            raise xml.sax.SAXNotRecognizedException()
+
+    def getFeature(self, feature):
+        if feature == xml.sax.handler.feature_namespaces:
+            return 1
+        else:
+            return xml.sax.xmlreader.XMLReader.getFeature(self, feature)
+
+    def setFeature(self, feature, enabled):
+        if feature == xml.sax.handler.feature_namespaces:
+            pass
+        else:
+            xml.sax.xmlreader.XMLReader.setFeature(self, feature, enabled)
+
+
+class Attributes(xml.sax.xmlreader.AttributesImpl):
+    # self._attrs has the form {name: (value, type)}
+
+    def getType(self, name):
+        return self._attrs[name][1]
+
+    def getValue(self, name):
+        return self._attrs[name][0]
+
+    def getValueByQName(self, name):
+        return self._attrs[name][0]
+
+    def __getitem__(self, name):
+        return self._attrs[name][0]
+
+    def get(self, name, default=None):
+        if self._attrs.has_key(name):
+            return self._attrs[name][0]
+        return default
+
+    def items(self):
+        L = []
+        for name, (value, type) in self._attrs.items():
+            L.append((name, value))
+        return L
+
+    def values(self):
+        L = []
+        for value, type in self._attrs.values():
+            L.append(value)
+        return L
+
+
+class Locator(xml.sax.xmlreader.Locator):
+    _lineno = -1
+    _public_id = None
+    _system_id = None
+
+    def getLineNumber(self):
+        return self._lineno
+
+    def getPublicId(self):
+        return self._public_id
+
+    def getSystemId(self):
+        return self._system_id
+
+
+def parse(stream_or_string, parser=None):
+    if type(stream_or_string) in [type(""), type(u"")]:
+        stream = open(stream_or_string)
+    else:
+        stream = stream_or_string
+    if not parser:
+        parser = ESISReader()
+    return xml.dom.pulldom.DOMEventStream(stream, parser, (2 ** 14) - 20)

diff --git a/doc/tools/sgmlconv/latex2esis.py b/doc/tools/sgmlconv/latex2esis.py
new file mode 100755
index 0000000..74e1dc7
--- /dev/null
+++ b/doc/tools/sgmlconv/latex2esis.py

@@ -0,0 +1,555 @@
+#! /usr/bin/env python
+
+"""Generate ESIS events based on a LaTeX source document and
+configuration data.
+
+The conversion is not strong enough to work with arbitrary LaTeX
+documents; it has only been designed to work with the highly stylized
+markup used in the standard Python documentation.  A lot of
+information about specific markup is encoded in the control table
+passed to the convert() function; changing this table can allow this
+tool to support additional LaTeX markups.
+
+The format of the table is largely undocumented; see the commented
+headers where the table is specified in main().  There is no provision 
+to load an alternate table from an external file.
+"""
+
+import errno
+import getopt
+import os
+import re
+import string
+import sys
+import UserList
+import xml.sax.saxutils
+
+from types import ListType, StringType, TupleType
+
+try:
+    from xml.parsers.xmllib import XMLParser
+except ImportError:
+    from xmllib import XMLParser
+
+
+from esistools import encode
+
+
+DEBUG = 0
+
+
+class LaTeXFormatError(Exception):
+    pass
+
+
+class LaTeXStackError(LaTeXFormatError):
+    def __init__(self, found, stack):
+        msg = "environment close for %s doesn't match;\n  stack = %s" \
+              % (found, stack)
+        self.found = found
+        self.stack = stack[:]
+        LaTeXFormatError.__init__(self, msg)
+
+
+_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
+_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
+_begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)")
+_comment_rx = re.compile("%+ ?(.*)\n[ \t]*")
+_text_rx = re.compile(r"[^]~%\\{}]+")
+_optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
+# _parameter_rx is this complicated to allow {...} inside a parameter;
+# this is useful to match tabular layout specifications like {c|p{24pt}}
+_parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
+_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
+_start_group_rx = re.compile("[ \n]*{")
+_start_optional_rx = re.compile("[ \n]*[[]")
+
+
+ESCAPED_CHARS = "$%#^ {}&~"
+
+
+def dbgmsg(msg):
+    if DEBUG:
+        sys.stderr.write(msg + "\n")
+
+def pushing(name, point, depth):
+    dbgmsg("pushing <%s> at %s" % (name, point))
+
+def popping(name, point, depth):
+    dbgmsg("popping </%s> at %s" % (name, point))
+
+
+class _Stack(UserList.UserList):
+    def append(self, entry):
+        if type(entry) is not StringType:
+            raise LaTeXFormatError("cannot push non-string on stack: "
+                                   + `entry`)
+        #dbgmsg("%s<%s>" % (" "*len(self.data), entry))
+        self.data.append(entry)
+
+    def pop(self, index=-1):
+        entry = self.data[index]
+        del self.data[index]
+        #dbgmsg("%s</%s>" % (" "*len(self.data), entry))
+
+    def __delitem__(self, index):
+        entry = self.data[index]
+        del self.data[index]
+        #dbgmsg("%s</%s>" % (" "*len(self.data), entry))
+
+
+def new_stack():
+    if DEBUG:
+        return _Stack()
+    return []
+
+
+class Conversion:
+    def __init__(self, ifp, ofp, table):
+        self.write = ofp.write
+        self.ofp = ofp
+        self.table = table
+        self.line = string.join(map(string.rstrip, ifp.readlines()), "\n")
+        self.preamble = 1
+
+    def convert(self):
+        self.subconvert()
+
+    def subconvert(self, endchar=None, depth=0):
+        #
+        # Parses content, including sub-structures, until the character
+        # 'endchar' is found (with no open structures), or until the end
+        # of the input data is endchar is None.
+        #
+        stack = new_stack()
+        line = self.line
+        while line:
+            if line[0] == endchar and not stack:
+                self.line = line
+                return line
+            m = _comment_rx.match(line)
+            if m:
+                text = m.group(1)
+                if text:
+                    self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
+                               % encode(text))
+                line = line[m.end():]
+                continue
+            m = _begin_env_rx.match(line)
+            if m:
+                name = m.group(1)
+                entry = self.get_env_entry(name)
+                # re-write to use the macro handler
+                line = r"\%s %s" % (name, line[m.end():])
+                continue
+            m = _end_env_rx.match(line)
+            if m:
+                # end of environment
+                envname = m.group(1)
+                entry = self.get_entry(envname)
+                while stack and envname != stack[-1] \
+                      and stack[-1] in entry.endcloses:
+                    self.write(")%s\n" % stack.pop())
+                if stack and envname == stack[-1]:
+                    self.write(")%s\n" % entry.outputname)
+                    del stack[-1]
+                else:
+                    raise LaTeXStackError(envname, stack)
+                line = line[m.end():]
+                continue
+            m = _begin_macro_rx.match(line)
+            if m:
+                # start of macro
+                macroname = m.group(1)
+                if macroname == "c":
+                    # Ugh!  This is a combining character...
+                    endpos = m.end()
+                    self.combining_char("c", line[endpos])
+                    line = line[endpos + 1:]
+                    continue
+                entry = self.get_entry(macroname)
+                if entry.verbatim:
+                    # magic case!
+                    pos = string.find(line, "\\end{%s}" % macroname)
+                    text = line[m.end(1):pos]
+                    stack.append(entry.name)
+                    self.write("(%s\n" % entry.outputname)
+                    self.write("-%s\n" % encode(text))
+                    self.write(")%s\n" % entry.outputname)
+                    stack.pop()
+                    line = line[pos + len("\\end{%s}" % macroname):]
+                    continue
+                while stack and stack[-1] in entry.closes:
+                    top = stack.pop()
+                    topentry = self.get_entry(top)
+                    if topentry.outputname:
+                        self.write(")%s\n-\\n\n" % topentry.outputname)
+                #
+                if entry.outputname:
+                    if entry.empty:
+                        self.write("e\n")
+                #
+                params, optional, empty, environ = self.start_macro(macroname)
+                # rip off the macroname
+                if params:
+                    line = line[m.end(1):]
+                elif empty:
+                    line = line[m.end(1):]
+                else:
+                    line = line[m.end():]
+                opened = 0
+                implied_content = 0
+
+                # handle attribute mappings here:
+                for pentry in params:
+                    if pentry.type == "attribute":
+                        if pentry.optional:
+                            m = _optional_rx.match(line)
+                            if m and entry.outputname:
+                                line = line[m.end():]
+                                self.dump_attr(pentry, m.group(1))
+                        elif pentry.text and entry.outputname:
+                            # value supplied by conversion spec:
+                            self.dump_attr(pentry, pentry.text)
+                        else:
+                            m = _parameter_rx.match(line)
+                            if not m:
+                                raise LaTeXFormatError(
+                                    "could not extract parameter %s for %s: %s"
+                                    % (pentry.name, macroname, `line[:100]`))
+                            if entry.outputname:
+                                self.dump_attr(pentry, m.group(1))
+                            line = line[m.end():]
+                    elif pentry.type == "child":
+                        if pentry.optional:
+                            m = _optional_rx.match(line)
+                            if m:
+                                line = line[m.end():]
+                                if entry.outputname and not opened:
+                                    opened = 1
+                                    self.write("(%s\n" % entry.outputname)
+                                    stack.append(macroname)
+                                stack.append(pentry.name)
+                                self.write("(%s\n" % pentry.name)
+                                self.write("-%s\n" % encode(m.group(1)))
+                                self.write(")%s\n" % pentry.name)
+                                stack.pop()
+                        else:
+                            if entry.outputname and not opened:
+                                opened = 1
+                                self.write("(%s\n" % entry.outputname)
+                                stack.append(entry.name)
+                            self.write("(%s\n" % pentry.name)
+                            stack.append(pentry.name)
+                            self.line = skip_white(line)[1:]
+                            line = self.subconvert(
+                                "}", len(stack) + depth + 1)[1:]
+                            self.write(")%s\n" % stack.pop())
+                    elif pentry.type == "content":
+                        if pentry.implied:
+                            implied_content = 1
+                        else:
+                            if entry.outputname and not opened:
+                                opened = 1
+                                self.write("(%s\n" % entry.outputname)
+                                stack.append(entry.name)
+                            line = skip_white(line)
+                            if line[0] != "{":
+                                raise LaTeXFormatError(
+                                    "missing content for " + macroname)
+                            self.line = line[1:]
+                            line = self.subconvert("}", len(stack) + depth + 1)
+                            if line and line[0] == "}":
+                                line = line[1:]
+                    elif pentry.type == "text" and pentry.text:
+                        if entry.outputname and not opened:
+                            opened = 1
+                            stack.append(entry.name)
+                            self.write("(%s\n" % entry.outputname)
+                        #dbgmsg("--- text: %s" % `pentry.text`)
+                        self.write("-%s\n" % encode(pentry.text))
+                    elif pentry.type == "entityref":
+                        self.write("&%s\n" % pentry.name)
+                if entry.outputname:
+                    if not opened:
+                        self.write("(%s\n" % entry.outputname)
+                        stack.append(entry.name)
+                    if not implied_content:
+                        self.write(")%s\n" % entry.outputname)
+                        stack.pop()
+                continue
+            if line[0] == endchar and not stack:
+                self.line = line[1:]
+                return self.line
+            if line[0] == "}":
+                # end of macro or group
+                macroname = stack[-1]
+                if macroname:
+                    conversion = self.table[macroname]
+                    if conversion.outputname:
+                        # otherwise, it was just a bare group
+                        self.write(")%s\n" % conversion.outputname)
+                del stack[-1]
+                line = line[1:]
+                continue
+            if line[0] == "~":
+                # don't worry about the "tie" aspect of this command
+                line = line[1:]
+                self.write("- \n")
+                continue
+            if line[0] == "{":
+                stack.append("")
+                line = line[1:]
+                continue
+            if line[0] == "\\" and line[1] in ESCAPED_CHARS:
+                self.write("-%s\n" % encode(line[1]))
+                line = line[2:]
+                continue
+            if line[:2] == r"\\":
+                self.write("(BREAK\n)BREAK\n")
+                line = line[2:]
+                continue
+            if line[:2] == r"\_":
+                line = "_" + line[2:]
+                continue
+            if line[:2] in (r"\'", r'\"'):
+                # combining characters...
+                self.combining_char(line[1], line[2])
+                line = line[3:]
+                continue
+            m = _text_rx.match(line)
+            if m:
+                text = encode(m.group())
+                self.write("-%s\n" % text)
+                line = line[m.end():]
+                continue
+            # special case because of \item[]
+            # XXX can we axe this???
+            if line[0] == "]":
+                self.write("-]\n")
+                line = line[1:]
+                continue
+            # avoid infinite loops
+            extra = ""
+            if len(line) > 100:
+                extra = "..."
+            raise LaTeXFormatError("could not identify markup: %s%s"
+                                   % (`line[:100]`, extra))
+        while stack:
+            entry = self.get_entry(stack[-1])
+            if entry.closes:
+                self.write(")%s\n-%s\n" % (entry.outputname, encode("\n")))
+                del stack[-1]
+            else:
+                break
+        if stack:
+            raise LaTeXFormatError("elements remain on stack: "
+                                   + string.join(stack, ", "))
+        # otherwise we just ran out of input here...
+
+    # This is a really limited table of combinations, but it will have
+    # to do for now.
+    _combinations = {
+        ("c", "c"): 0x00E7,
+        ("'", "e"): 0x00E9,
+        ('"', "o"): 0x00F6,
+        }
+
+    def combining_char(self, prefix, char):
+        ordinal = self._combinations[(prefix, char)]
+        self.write("-\\%%%d;\n" % ordinal)
+
+    def start_macro(self, name):
+        conversion = self.get_entry(name)
+        parameters = conversion.parameters
+        optional = parameters and parameters[0].optional
+        return parameters, optional, conversion.empty, conversion.environment
+
+    def get_entry(self, name):
+        entry = self.table.get(name)
+        if entry is None:
+            dbgmsg("get_entry(%s) failing; building default entry!" % `name`)
+            # not defined; build a default entry:
+            entry = TableEntry(name)
+            entry.has_content = 1
+            entry.parameters.append(Parameter("content"))
+            self.table[name] = entry
+        return entry
+
+    def get_env_entry(self, name):
+        entry = self.table.get(name)
+        if entry is None:
+            # not defined; build a default entry:
+            entry = TableEntry(name, 1)
+            entry.has_content = 1
+            entry.parameters.append(Parameter("content"))
+            entry.parameters[-1].implied = 1
+            self.table[name] = entry
+        elif not entry.environment:
+            raise LaTeXFormatError(
+                name + " is defined as a macro; expected environment")
+        return entry
+
+    def dump_attr(self, pentry, value):
+        if not (pentry.name and value):
+            return
+        if _token_rx.match(value):
+            dtype = "TOKEN"
+        else:
+            dtype = "CDATA"
+        self.write("A%s %s %s\n" % (pentry.name, dtype, encode(value)))
+
+
+def convert(ifp, ofp, table):
+    c = Conversion(ifp, ofp, table)
+    try:
+        c.convert()
+    except IOError, (err, msg):
+        if err != errno.EPIPE:
+            raise
+
+
+def skip_white(line):
+    while line and line[0] in " %\n\t\r":
+        line = string.lstrip(line[1:])
+    return line
+
+
+
+class TableEntry:
+    def __init__(self, name, environment=0):
+        self.name = name
+        self.outputname = name
+        self.environment = environment
+        self.empty = not environment
+        self.has_content = 0
+        self.verbatim = 0
+        self.auto_close = 0
+        self.parameters = []
+        self.closes = []
+        self.endcloses = []
+
+class Parameter:
+    def __init__(self, type, name=None, optional=0):
+        self.type = type
+        self.name = name
+        self.optional = optional
+        self.text = ''
+        self.implied = 0
+
+
+class TableParser(XMLParser):
+    def __init__(self, table=None):
+        if table is None:
+            table = {}
+        self.__table = table
+        self.__current = None
+        self.__buffer = ''
+        XMLParser.__init__(self)
+
+    def get_table(self):
+        for entry in self.__table.values():
+            if entry.environment and not entry.has_content:
+                p = Parameter("content")
+                p.implied = 1
+                entry.parameters.append(p)
+                entry.has_content = 1
+        return self.__table
+
+    def start_environment(self, attrs):
+        name = attrs["name"]
+        self.__current = TableEntry(name, environment=1)
+        self.__current.verbatim = attrs.get("verbatim") == "yes"
+        if attrs.has_key("outputname"):
+            self.__current.outputname = attrs.get("outputname")
+        self.__current.endcloses = string.split(attrs.get("endcloses", ""))
+    def end_environment(self):
+        self.end_macro()
+
+    def start_macro(self, attrs):
+        name = attrs["name"]
+        self.__current = TableEntry(name)
+        self.__current.closes = string.split(attrs.get("closes", ""))
+        if attrs.has_key("outputname"):
+            self.__current.outputname = attrs.get("outputname")
+    def end_macro(self):
+        self.__table[self.__current.name] = self.__current
+        self.__current = None
+
+    def start_attribute(self, attrs):
+        name = attrs.get("name")
+        optional = attrs.get("optional") == "yes"
+        if name:
+            p = Parameter("attribute", name, optional=optional)
+        else:
+            p = Parameter("attribute", optional=optional)
+        self.__current.parameters.append(p)
+        self.__buffer = ''
+    def end_attribute(self):
+        self.__current.parameters[-1].text = self.__buffer
+
+    def start_entityref(self, attrs):
+        name = attrs["name"]
+        p = Parameter("entityref", name)
+        self.__current.parameters.append(p)
+
+    def start_child(self, attrs):
+        name = attrs["name"]
+        p = Parameter("child", name, attrs.get("optional") == "yes")
+        self.__current.parameters.append(p)
+        self.__current.empty = 0
+
+    def start_content(self, attrs):
+        p = Parameter("content")
+        p.implied = attrs.get("implied") == "yes"
+        if self.__current.environment:
+            p.implied = 1
+        self.__current.parameters.append(p)
+        self.__current.has_content = 1
+        self.__current.empty = 0
+
+    def start_text(self, attrs):
+        self.__current.empty = 0
+        self.__buffer = ''
+    def end_text(self):
+        p = Parameter("text")
+        p.text = self.__buffer
+        self.__current.parameters.append(p)
+
+    def handle_data(self, data):
+        self.__buffer = self.__buffer + data
+
+
+def load_table(fp, table=None):
+    parser = TableParser(table=table)
+    parser.feed(fp.read())
+    parser.close()
+    return parser.get_table()
+
+
+def main():
+    global DEBUG
+    #
+    opts, args = getopt.getopt(sys.argv[1:], "D", ["debug"])
+    for opt, arg in opts:
+        if opt in ("-D", "--debug"):
+            DEBUG = DEBUG + 1
+    if len(args) == 0:
+        ifp = sys.stdin
+        ofp = sys.stdout
+    elif len(args) == 1:
+        ifp = open(args)
+        ofp = sys.stdout
+    elif len(args) == 2:
+        ifp = open(args[0])
+        ofp = open(args[1], "w")
+    else:
+        usage()
+        sys.exit(2)
+
+    table = load_table(open(os.path.join(sys.path[0], 'conversion.xml')))
+    convert(ifp, ofp, table)
+
+
+if __name__ == "__main__":
+    main()

diff --git a/doc/tools/sgmlconv/make.rules b/doc/tools/sgmlconv/make.rules
new file mode 100644
index 0000000..93579c5
--- /dev/null
+++ b/doc/tools/sgmlconv/make.rules

@@ -0,0 +1,48 @@
+# -*- makefile -*-
+#
+# Extra magic needed by the LaTeX->XML conversion process.  This requires
+# $(TOOLSDIR) to be properly defined.
+
+DOCFIXER=	$(TOOLSDIR)/sgmlconv/docfixer.py
+ESIS2ML=	$(TOOLSDIR)/sgmlconv/esis2sgml.py
+LATEX2ESIS=	$(TOOLSDIR)/sgmlconv/latex2esis.py
+CONVERSION=	$(TOOLSDIR)/sgmlconv/conversion.xml
+
+ESISTARGETS=	$(patsubst %.tex,%.esis,$(wildcard *.tex))
+ESIS1TARGETS=	$(patsubst %.tex,%.esis1,$(wildcard *.tex))
+XMLTARGETS=	$(patsubst %.tex,%.xml,$(wildcard *.tex))
+
+L2EFLAGS=
+
+all:	xml
+
+esis:	$(ESISTARGETS)
+esis1:	$(ESIS1TARGETS)
+xml:	$(XMLTARGETS)
+
+ESISTOOLS=	$(TOOLSDIR)/sgmlconv/esistools.py
+
+$(ESISTARGETS): $(LATEX2ESIS) $(DOCFIXER) $(ESISTOOLS) $(CONVERSION)
+$(ESIS1TARGETS): $(LATEX2ESIS) $(CONVERSION)
+# This variant is easier to work with while debugging the conversion spec:
+#$(ESISTARGETS): $(LATEX2ESIS) $(DOCFIXER) $(ESISTOOLS)
+$(XMLTARGETS): $(ESIS2ML)
+
+
+.SUFFIXES: .esis .esis1 .tex .xml
+
+.tex.esis1:
+	$(LATEX2ESIS) $(L2EFLAGS) $< $@
+
+.esis1.esis:
+	$(DOCFIXER) $< $@
+
+.esis.xml:
+	$(ESIS2ML) --xml $< $@
+
+
+clean:
+	rm -f *.esis *.esis1
+
+clobber: clean
+	rm -f *.xml
commit	897bc2556fed43b76f6d1b14470c3e806df15af8	[log] [tgz]
author	Jean-Paul Calderone <exarkun@boson>	Mon Feb 18 20:50:23 2008 -0500
committer	Jean-Paul Calderone <exarkun@boson>	Mon Feb 18 20:50:23 2008 -0500
tree	bdd09f33c3fadb7efecca918e06e3dbef0a82bb9