initial source import
diff --git a/doc/tools/sgmlconv/Makefile b/doc/tools/sgmlconv/Makefile
new file mode 100644
index 0000000..30a846e
--- /dev/null
+++ b/doc/tools/sgmlconv/Makefile
@@ -0,0 +1,67 @@
+# Simple makefile to control XML generation for the entire document tree.
+# This should be used from the top-level directory (Doc/), not the directory
+# that actually contains this file:
+#
+# $ pwd
+# .../Doc
+# $ make -f tools/sgmlconv/Makefile
+
+TOPDIR=.
+TOOLSDIR=tools
+
+SGMLRULES=../$(TOOLSDIR)/sgmlconv/make.rules
+# The 'inst' directory breaks the conversion, so skip it for now.
+SUBDIRS=api dist ext lib mac ref tut
+SUBMAKE=$(MAKE) -f $(SGMLRULES) TOOLSDIR=../$(TOOLSDIR)
+
+all: xml
+
+.PHONY: esis xml
+.PHONY: $(SUBDIRS)
+
+xml:
+ for DIR in $(SUBDIRS) ; do \
+ (cd $$DIR; $(SUBMAKE) xml) || exit $$? ; done
+
+esis:
+ for DIR in $(SUBDIRS) ; do \
+ (cd $$DIR; $(SUBMAKE) esis) || exit $$? ; done
+
+esis1:
+ for DIR in $(SUBDIRS) ; do \
+ (cd $$DIR; $(SUBMAKE) esis1) || exit $$? ; done
+
+tarball: xml
+ tar cf - tools/sgmlconv */*.xml | gzip -9 >xml-1.5.2b2.tgz
+
+api:
+ cd api; $(SUBMAKE)
+
+dist:
+ cd dist; $(SUBMAKE)
+
+ext:
+ cd ext; $(SUBMAKE)
+
+inst:
+ cd inst; $(SUBMAKE)
+
+lib:
+ cd lib; $(SUBMAKE)
+
+mac:
+ cd mac; $(SUBMAKE)
+
+ref:
+ cd ref; $(SUBMAKE)
+
+tut:
+ cd tut; $(SUBMAKE)
+
+clean:
+ for DIR in $(SUBDIRS) ; do \
+ (cd $$DIR; $(SUBMAKE) clean) ; done
+
+clobber:
+ for DIR in $(SUBDIRS) ; do \
+ (cd $$DIR; $(SUBMAKE) clobber) ; done
diff --git a/doc/tools/sgmlconv/README b/doc/tools/sgmlconv/README
new file mode 100644
index 0000000..1546293
--- /dev/null
+++ b/doc/tools/sgmlconv/README
@@ -0,0 +1,58 @@
+These scripts and Makefile fragment are used to convert the Python
+documentation in LaTeX format to XML.
+
+This material is preliminary and incomplete. Python 2.0 is required.
+
+To convert all documents to XML:
+
+ cd Doc/
+ make -f tools/sgmlconv/Makefile
+
+To convert one document to XML:
+
+ cd Doc/<document-dir>
+ make -f ../tools/sgmlconv/make.rules TOOLSDIR=../tools
+
+Please send comments and bug reports to python-docs@python.org.
+
+
+What do the tools do?
+---------------------
+
+latex2esis.py
+ Reads in a conversion specification written in XML
+ (conversion.xml), reads a LaTeX document fragment, and interprets
+ the markup according to the specification. The output is a stream
+ of ESIS events like those created by the nsgmls SGML parser, but
+ is *not* guaranteed to represent a single tree! This is done to
+ allow conversion per entity rather than per document. Since many
+ of the LaTeX files for the Python documentation contain two
+ sections on closely related modules, it is important to allow both
+ of the resulting <section> elements to exist in the same output
+ stream. Additionally, since comments are not supported in ESIS,
+ comments are converted to <COMMENT> elements, which might exist at
+ the same level as the top-level content elements.
+
+ The output of latex2esis.py gets saved as <filename>.esis1.
+
+docfixer.py
+ This is the really painful part of the conversion. Well, it's the
+ second really painful part, but more of the pain is specific to
+ the structure of the Python documentation and desired output
+ rather than to the parsing of LaTeX markup.
+
+ This script loads the ESIS data created by latex2esis.py into a
+ DOM document *fragment* (remember, the latex2esis.py output may
+ not be well-formed). Once loaded, it walks over the tree many
+ times looking for a variety of possible specific
+ micro-conversions. Most of the code is not in any way "general".
+ After processing the fragment, a new ESIS data stream is written
+ out. Like the input, it may not represent a well-formed
+ document, but does represent a parsed entity.
+
+ The output of docfixer.py is what gets saved in <filename>.esis.
+
+esis2sgml.py
+ Reads an ESIS stream and convert to SGML or XML. This also
+ converts <COMMENT> elements to real comments. This works quickly
+ because there's not much to actually do.
diff --git a/doc/tools/sgmlconv/conversion.xml b/doc/tools/sgmlconv/conversion.xml
new file mode 100644
index 0000000..7759bad
--- /dev/null
+++ b/doc/tools/sgmlconv/conversion.xml
@@ -0,0 +1,757 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<conversion>
+ <!-- Miscellaneous. -->
+ <macro name="declaremodule">
+ <attribute name="id" optional="yes"/>
+ <attribute name="type"/>
+ <attribute name="name"/>
+ </macro>
+ <macro name="modulesynopsis">
+ <content/>
+ </macro>
+ <macro name="platform">
+ <content/>
+ </macro>
+ <macro name="deprecated">
+ <attribute name="version"/>
+ <content/>
+ </macro>
+ <macro name="label">
+ <attribute name="id"/>
+ </macro>
+ <macro name="nodename" outputname="label">
+ <attribute name="id"/>
+ </macro>
+ <macro name="localmoduletable"/>
+ <macro name="manpage">
+ <attribute name="name"/>
+ <attribute name="section"/>
+ </macro>
+ <macro name="module">
+ <content/>
+ </macro>
+ <macro name="moduleauthor">
+ <attribute name="name"/>
+ <attribute name="email"/>
+ </macro>
+ <macro name="citetitle">
+ <attribute name="href" optional="yes"/>
+ <content/>
+ </macro>
+ <macro name="rfc">
+ <attribute name="num"/>
+ </macro>
+ <macro name="sectionauthor" outputname="author">
+ <attribute name="name"/>
+ <attribute name="email"/>
+ </macro>
+ <macro name="author">
+ <attribute name="name"/>
+ </macro>
+ <macro name="authoraddress">
+ <content/>
+ </macro>
+ <macro name="shortversion"/>
+ <macro name="versionadded">
+ <attribute name="version"/>
+ </macro>
+ <!-- This is broken: we need to re-order the optional and required
+ parameters, making the optional parameter the content for the
+ element. The processor is not powerful enough to handle this.
+ -->
+ <macro name="versionchanged">
+ <attribute name="how" optional="yes"/>
+ <attribute name="version"/>
+ </macro>
+
+ <!-- Module referencing. -->
+ <macro name="refmodule" outputname="module">
+ <attribute name="" optional="yes"/>
+ <attribute name="link">yes</attribute>
+ <content/>
+ </macro>
+
+ <!-- Information units. -->
+ <!-- C things. -->
+ <environment name="cfuncdesc">
+ <attribute name="type"/>
+ <attribute name="name"/>
+ <child name="args"/>
+ </environment>
+ <environment name="ctypedesc">
+ <attribute name="tag" optional="yes"/>
+ <attribute name="name"/>
+ </environment>
+ <environment name="cvardesc">
+ <attribute name="type"/>
+ <attribute name="name"/>
+ </environment>
+
+ <!-- Python things. -->
+ <macro name="optional">
+ <content/>
+ </macro>
+ <macro name="unspecified"/>
+ <macro name="moreargs"/>
+ <environment name="classdesc">
+ <attribute name="name"/>
+ <child name="args"/>
+ </environment>
+ <environment name="datadesc">
+ <attribute name="name"/>
+ </environment>
+ <macro name="dataline">
+ <attribute name="name"/>
+ </macro>
+ <environment name="excdesc">
+ <attribute name="name"/>
+ </environment>
+
+ <environment name="funcdesc">
+ <attribute name="name"/>
+ <child name="args"/>
+ </environment>
+ <macro name="funcline">
+ <attribute name="name"/>
+ <child name="args"/>
+ </macro>
+ <environment name="funcdescni" outputname="funcdesc">
+ <attribute name="index">no</attribute>
+ <attribute name="name"/>
+ <child name="args"/>
+ </environment>
+ <macro name="funclineni" outputname="funcline">
+ <attribute name="index">no</attribute>
+ <attribute name="name"/>
+ <child name="args"/>
+ </macro>
+
+ <environment name="memberdesc">
+ <attribute name="class" optional="yes"/>
+ <attribute name="name"/>
+ </environment>
+ <environment name="memberdescni" outputname="memberdesc">
+ <attribute name="index">no</attribute>
+ <attribute name="class" optional="yes"/>
+ <attribute name="name"/>
+ </environment>
+
+ <environment name="methoddesc">
+ <attribute name="class" optional="yes"/>
+ <attribute name="name"/>
+ <child name="args"/>
+ </environment>
+ <macro name="methodline">
+ <attribute name="class" optional="yes"/>
+ <attribute name="name"/>
+ <child name="args"/>
+ </macro>
+ <environment name="methoddescni">
+ <attribute name="index">no</attribute>
+ <attribute name="class" optional="yes"/>
+ <attribute name="name"/>
+ <child name="args"/>
+ </environment>
+ <macro name="methodlineni" outputname="methodline">
+ <attribute name="index">no</attribute>
+ <attribute name="class" optional="yes"/>
+ <attribute name="name"/>
+ <child name="args"/>
+ </macro>
+
+ <environment name="opcodedesc">
+ <attribute name="name"/>
+ <attribute name="var"/>
+ </environment>
+
+ <!-- "See also:" sections. -->
+ <macro name="seemodule">
+ <attribute name="ref" optional="yes"/>
+ <attribute name="name"/>
+ <child name="description"/>
+ </macro>
+ <macro name="seepep">
+ <attribute name="number"/>
+ <child name="title"/>
+ <child name="description"/>
+ </macro>
+ <macro name="seerfc">
+ <attribute name="number"/>
+ <child name="title"/>
+ <child name="description"/>
+ </macro>
+ <macro name="seetext">
+ <child name="description"/>
+ </macro>
+ <macro name="seetitle">
+ <attribute name="href" optional="yes"/>
+ <child name="title"/>
+ <child name="description"/>
+ </macro>
+ <macro name="seeurl">
+ <attribute name="href"/>
+ <child name="description"/>
+ </macro>
+
+ <!-- Index-generating markup. -->
+ <macro name="index" outputname="indexterm">
+ <attribute name="term1"/>
+ </macro>
+ <macro name="indexii" outputname="indexterm">
+ <attribute name="term1"/>
+ <attribute name="term2"/>
+ </macro>
+ <macro name="indexiii" outputname="indexterm">
+ <attribute name="term1"/>
+ <attribute name="term2"/>
+ <attribute name="term3"/>
+ </macro>
+ <macro name="indexiv" outputname="indexterm">
+ <attribute name="term1"/>
+ <attribute name="term2"/>
+ <attribute name="term3"/>
+ <attribute name="term4"/>
+ </macro>
+
+ <macro name="ttindex" outputname="indexterm">
+ <attribute name="style">tt</attribute>
+ <attribute name="term1"/>
+ </macro>
+
+ <macro name="refmodindex">
+ <attribute name="module"/>
+ </macro>
+ <macro name="stmodindex">
+ <attribute name="module"/>
+ </macro>
+ <macro name="refbimodindex" outputname="refmodindex">
+ <attribute name="module"/>
+ </macro>
+ <macro name="refexmodindex" outputname="refmodindex">
+ <attribute name="module"/>
+ </macro>
+ <macro name="refstmodindex" outputname="refmodindex">
+ <attribute name="module"/>
+ </macro>
+
+ <macro name="bifuncindex">
+ <attribute name="name"/>
+ </macro>
+ <macro name="exindex">
+ <attribute name="name"/>
+ </macro>
+ <macro name="obindex">
+ <attribute name="name"/>
+ </macro>
+ <macro name="kwindex">
+ <attribute name="name"/>
+ </macro>
+ <macro name="opindex">
+ <attribute name="type"/>
+ </macro>
+ <macro name="stindex">
+ <attribute name="type"/>
+ </macro>
+ <macro name="withsubitem">
+ <attribute name="text"/>
+ <content/>
+ </macro>
+ <macro name="setindexsubitem">
+ <attribute name="text"/>
+ </macro>
+
+ <!-- Entity management. -->
+ <macro name="include">
+ <attribute name="source"/>
+ </macro>
+ <macro name="input">
+ <attribute name="source"/>
+ </macro>
+
+ <!-- Large-scale document structure. -->
+ <macro name="documentclass">
+ <attribute name="classname"/>
+ </macro>
+
+ <macro name="usepackage">
+ <attribute name="options" optional="yes"/>
+ <attribute name="pkg"/>
+ </macro>
+
+ <environment name="document"
+ endcloses="chapter chapter* section section*
+ subsection subsection*
+ subsubsection subsubsection*
+ paragraph paragraph* subparagraph subparagraph*"/>
+
+ <macro name="chapter"
+ closes="chapter chapter* section section* subsection subsection*
+ subsubsection subsubsection*
+ paragraph paragraph* subparagraph subparagraph*">
+ <text>
+</text>
+ <child name="title"/>
+ <content implied="yes"/>
+ </macro>
+ <macro name="chapter*" outputname="chapter"
+ closes="chapter chapter* section section* subsection subsection*
+ subsubsection subsubsection*
+ paragraph paragraph* subparagraph subparagraph*">
+ <attribute name="numbered">no</attribute>
+ <text>
+</text>
+ <child name="title"/>
+ <content implied="yes"/>
+ </macro>
+
+ <macro name="section"
+ closes="section section* subsection subsection*
+ subsubsection subsubsection*
+ paragraph paragraph* subparagraph subparagraph*">
+ <text>
+</text>
+ <child name="title"/>
+ <content implied="yes"/>
+ </macro>
+ <macro name="section*" outputname="section"
+ closes="section section* subsection subsection*
+ subsubsection subsubsection*
+ paragraph paragraph* subparagraph subparagraph*">
+ <attribute name="numbered">no</attribute>
+ <text>
+</text>
+ <child name="title"/>
+ <content implied="yes"/>
+ </macro>
+
+ <macro name="subsection"
+ closes="subsection subsection* subsubsection subsubsection*
+ paragraph paragraph* subparagraph subparagraph*">
+ <text>
+</text>
+ <child name="title"/>
+ <content implied="yes"/>
+ </macro>
+ <macro name="subsection*" outputname="subsection"
+ closes="subsection subsection* subsubsection subsubsection*
+ paragraph paragraph* subparagraph subparagraph*">
+ <attribute name="numbered">no</attribute>
+ <text>
+</text>
+ <child name="title"/>
+ <content implied="yes"/>
+ </macro>
+
+ <macro name="subsubsection"
+ closes="subsubsection subsubsection*
+ paragraph paragraph* subparagraph subparagraph*">
+ <text>
+</text>
+ <child name="title"/>
+ <content implied="yes"/>
+ </macro>
+ <macro name="subsubsection*" outputname="subsubsection"
+ closes="subsubsection subsubsection*
+ paragraph paragraph* subparagraph subparagraph*">
+ <attribute name="numbered">no</attribute>
+ <text>
+</text>
+ <child name="title"/>
+ <content implied="yes"/>
+ </macro>
+
+ <macro name="paragraph"
+ closes="paragraph paragraph* subparagraph subparagraph*">
+ <text>
+</text>
+ <child name="title"/>
+ <content implied="yes"/>
+ </macro>
+ <macro name="paragraph*" outputname="paragraph"
+ closes="paragraph paragraph* subparagraph subparagraph*">
+ <attribute name="numbered">no</attribute>
+ <text>
+</text>
+ <child name="title"/>
+ <content implied="yes"/>
+ </macro>
+
+ <macro name="subparagraph"
+ closes="subparagraph subparagraph*">
+ <text>
+</text>
+ <child name="title"/>
+ <content implied="yes"/>
+ </macro>
+ <macro name="subparagraph*" outputname="subparagraph"
+ closes="subparagraph subparagraph*">
+ <attribute name="numbered">no</attribute>
+ <text>
+</text>
+ <child name="title"/>
+ <content implied="yes"/>
+ </macro>
+ <macro name="title">
+ <content/>
+ </macro>
+
+ <macro name="appendix" outputname="back-matter"
+ closes="chapter chapter* section subsection subsubsection
+ paragraph subparagraph"/>
+
+ <environment name="list"
+ endcloses="item">
+ <attribute name="bullet"/>
+ <attribute name="init"/>
+ </environment>
+ <macro name="item" closes="item">
+ <child name="leader" optional="yes"/>
+ <content implied="yes"/>
+ </macro>
+
+ <macro name="ref">
+ <attribute name="ref"/>
+ </macro>
+
+ <environment name="description" outputname="descriptionlist"
+ endcloses="item"/>
+
+ <environment name="enumerate" outputname="enumeration"
+ endcloses="item"/>
+
+ <environment name="fulllineitems"
+ endcloses="item"/>
+
+ <environment name="itemize"
+ endcloses="item"/>
+
+ <environment name="definitions" outputname="definitionlist"
+ encloses="term"/>
+ <macro name="term" closes="definition">
+ <!-- not really optional, but uses the [] syntax -->
+ <child name="term" optional="yes"/>
+ <child name="definition" implied="yes"/>
+ </macro>
+
+ <environment name="alltt" outputname="verbatim"/>
+ <environment name="comment" verbatim="yes"/>
+ <environment name="verbatim" verbatim="yes"/>
+ <environment name="verbatim*" verbatim="yes">
+ <!-- not used anywhere, but it's a standard LaTeXism -->
+ <attribute name="spaces">visible</attribute>
+ </environment>
+
+ <!-- Table markup. -->
+ <macro name="hline"/>
+ <environment name="tableii" outputname="table">
+ <attribute name="cols">2</attribute>
+ <attribute name="colspec"/>
+ <attribute name="style"/>
+ <child name="entry"/>
+ <text>
+ </text>
+ <child name="entry"/>
+ </environment>
+ <environment name="longtableii" outputname="table">
+ <attribute name="cols">2</attribute>
+ <attribute name="colspec"/>
+ <attribute name="style"/>
+ <child name="entry"/>
+ <text>
+ </text>
+ <child name="entry"/>
+ </environment>
+ <macro name="lineii" outputname="row">
+ <child name="entry"/>
+ <text>
+ </text>
+ <child name="entry"/>
+ </macro>
+
+ <environment name="tableiii" outputname="table">
+ <attribute name="cols">3</attribute>
+ <attribute name="colspec"/>
+ <attribute name="style"/>
+ <child name="entry"/>
+ <text>
+ </text>
+ <child name="entry"/>
+ <text>
+ </text>
+ <child name="entry"/>
+ </environment>
+ <environment name="longtableiii" outputname="table">
+ <attribute name="cols">3</attribute>
+ <attribute name="colspec"/>
+ <attribute name="style"/>
+ <child name="entry"/>
+ <text>
+ </text>
+ <child name="entry"/>
+ <text>
+ </text>
+ <child name="entry"/>
+ </environment>
+ <macro name="lineiii" outputname="row">
+ <child name="entry"/>
+ <text>
+ </text>
+ <child name="entry"/>
+ <text>
+ </text>
+ <child name="entry"/>
+ </macro>
+
+ <environment name="tableiv" outputname="table">
+ <attribute name="cols">4</attribute>
+ <attribute name="colspec"/>
+ <attribute name="style"/>
+ <child name="entry"/>
+ <text>
+ </text>
+ <child name="entry"/>
+ <text>
+ </text>
+ <child name="entry"/>
+ <text>
+ </text>
+ <child name="entry"/>
+ </environment>
+ <environment name="longtableiv" outputname="table">
+ <attribute name="cols">4</attribute>
+ <attribute name="colspec"/>
+ <attribute name="style"/>
+ <child name="entry"/>
+ <text>
+ </text>
+ <child name="entry"/>
+ <text>
+ </text>
+ <child name="entry"/>
+ <text>
+ </text>
+ <child name="entry"/>
+ </environment>
+ <macro name="lineiv" outputname="row">
+ <child name="entry"/>
+ <text>
+ </text>
+ <child name="entry"/>
+ <text>
+ </text>
+ <child name="entry"/>
+ <text>
+ </text>
+ <child name="entry"/>
+ </macro>
+
+ <!-- These are handled at a later translation stage, at least for now. -->
+ <macro name="Cpp" outputname="">
+ <text>C++</text>
+ </macro>
+ <macro name="geq" outputname="">
+ <entityref name="geq"/>
+ </macro>
+ <macro name="LaTeX" outputname="">
+ <text>LaTeX</text>
+ </macro>
+ <macro name="ldots" outputname="">
+ <text>...</text>
+ </macro>
+ <macro name="leq" outputname="">
+ <entityref name="leq"/>
+ </macro>
+ <macro name="TeX" outputname="">
+ <text>TeX</text>
+ </macro>
+ <macro name="version"/>
+
+ <!-- Distutils things. -->
+ <macro name="command">
+ <content/>
+ </macro>
+ <macro name="option">
+ <content/>
+ </macro>
+ <macro name="filevar" outputname="var">
+ <content/>
+ </macro>
+ <macro name="XXX" outputname="editorial-comment">
+ <content/>
+ </macro>
+
+ <!-- Misc. -->
+ <macro name="emph">
+ <content/>
+ </macro>
+ <macro name="strong">
+ <content/>
+ </macro>
+ <macro name="textrm">
+ <content/>
+ </macro>
+ <macro name="texttt">
+ <content/>
+ </macro>
+ <macro name="code">
+ <content/>
+ </macro>
+ <macro name="exception">
+ <content/>
+ </macro>
+ <macro name="keyword">
+ <content/>
+ </macro>
+ <macro name="samp">
+ <content/>
+ </macro>
+ <macro name="class">
+ <content/>
+ </macro>
+ <macro name="cdata">
+ <content/>
+ </macro>
+ <macro name="cfunction">
+ <content/>
+ </macro>
+ <macro name="ctype">
+ <content/>
+ </macro>
+ <macro name="pytype">
+ <content/>
+ </macro>
+ <macro name="character">
+ <content/>
+ </macro>
+ <macro name="constant">
+ <content/>
+ </macro>
+ <macro name="envvar" outputname="envar">
+ <content/>
+ </macro>
+ <macro name="file" outputname="filename">
+ <content/>
+ </macro>
+ <macro name="filenq" outputname="filename">
+ <attribute name="quote">no</attribute>
+ <content/>
+ </macro>
+ <macro name="function">
+ <content/>
+ </macro>
+ <macro name="kbd">
+ <content/>
+ </macro>
+ <macro name="makevar">
+ <content/>
+ </macro>
+ <macro name="method">
+ <content/>
+ </macro>
+ <macro name="member">
+ <content/>
+ </macro>
+ <macro name="mimetype">
+ <content/>
+ </macro>
+ <macro name="newsgroup">
+ <content/>
+ </macro>
+ <macro name="program" outputname="command">
+ <content/>
+ </macro>
+ <macro name="programopt" outputname="option">
+ <content/>
+ </macro>
+ <macro name="longprogramopt" outputname="longoption">
+ <content/>
+ </macro>
+ <macro name="regexp">
+ <content/>
+ </macro>
+ <macro name="var">
+ <content/>
+ </macro>
+ <macro name="email">
+ <content/>
+ </macro>
+ <macro name="url">
+ <content/>
+ </macro>
+ <macro name="footnote">
+ <content/>
+ </macro>
+ <macro name="dfn" outputname="definedterm">
+ <content/>
+ </macro>
+
+ <macro name="mbox">
+ <content/>
+ </macro>
+
+ <!-- minimal math stuff to get by -->
+ <macro name="pi"/>
+ <macro name="sqrt">
+ <content/>
+ </macro>
+ <macro name="frac" outputname="fraction">
+ <child name="numerator"/>
+ <child name="denominator"/>
+ </macro>
+ <macro name="sum">
+ <content/>
+ </macro>
+
+ <!-- Conversions to text; perhaps could be different? There's -->
+ <!-- no way for a style sheet to work with these this way. -->
+ <macro name="ABC" outputname="">
+ <text>ABC</text>
+ </macro>
+ <macro name="ASCII" outputname="">
+ <text>ASCII</text>
+ </macro>
+ <macro name="C" outputname="">
+ <text>C</text>
+ </macro>
+ <macro name="EOF" outputname="">
+ <text>EOF</text>
+ </macro>
+ <macro name="e" outputname="">
+ <text>\</text>
+ </macro>
+ <macro name="NULL" outputname="constant">
+ <text>NULL</text>
+ </macro>
+ <macro name="POSIX" outputname="">
+ <text>POSIX</text>
+ </macro>
+ <macro name="UNIX" outputname="">
+ <text>Unix</text>
+ </macro>
+ <macro name="textasciitilde" outputname="">
+ <text>~</text>
+ </macro>
+
+ <!-- These will end up disappearing as well! -->
+ <macro name="catcode" outputname=""/>
+ <macro name="fi" outputname=""/>
+ <macro name="ifhtml" outputname=""/>
+ <macro name="indexname" outputname=""/>
+ <macro name="labelwidth" outputname=""/>
+ <macro name="large" outputname=""/>
+ <macro name="leftmargin" outputname=""/>
+ <macro name="makeindex" outputname=""/>
+ <macro name="makemodindex" outputname=""/>
+ <macro name="maketitle" outputname=""/>
+ <macro name="noindent" outputname=""/>
+ <macro name="protect" outputname=""/>
+ <macro name="renewcommand">
+ <attribute name="macro"/>
+ <attribute name="nargs" optional="yes"/>
+ <content/>
+ </macro>
+ <macro name="tableofcontents" outputname=""/>
+ <macro name="vspace">
+ <attribute name="size"/>
+ </macro>
+</conversion>
diff --git a/doc/tools/sgmlconv/docfixer.py b/doc/tools/sgmlconv/docfixer.py
new file mode 100755
index 0000000..463276b
--- /dev/null
+++ b/doc/tools/sgmlconv/docfixer.py
@@ -0,0 +1,1033 @@
+#! /usr/bin/env python
+
+"""Perform massive transformations on a document tree created from the LaTeX
+of the Python documentation, and dump the ESIS data for the transformed tree.
+"""
+
+
+import errno
+import esistools
+import re
+import string
+import sys
+import xml.dom
+import xml.dom.minidom
+
+ELEMENT = xml.dom.Node.ELEMENT_NODE
+ENTITY_REFERENCE = xml.dom.Node.ENTITY_REFERENCE_NODE
+TEXT = xml.dom.Node.TEXT_NODE
+
+
+class ConversionError(Exception):
+ pass
+
+
+ewrite = sys.stderr.write
+try:
+ # We can only do this trick on Unix (if tput is on $PATH)!
+ if sys.platform != "posix" or not sys.stderr.isatty():
+ raise ImportError
+ import commands
+except ImportError:
+ bwrite = ewrite
+else:
+ def bwrite(s, BOLDON=commands.getoutput("tput bold"),
+ BOLDOFF=commands.getoutput("tput sgr0")):
+ ewrite("%s%s%s" % (BOLDON, s, BOLDOFF))
+
+
+PARA_ELEMENT = "para"
+
+DEBUG_PARA_FIXER = 0
+
+if DEBUG_PARA_FIXER:
+ def para_msg(s):
+ ewrite("*** %s\n" % s)
+else:
+ def para_msg(s):
+ pass
+
+
+def get_first_element(doc, gi):
+ for n in doc.childNodes:
+ if n.nodeName == gi:
+ return n
+
+def extract_first_element(doc, gi):
+ node = get_first_element(doc, gi)
+ if node is not None:
+ doc.removeChild(node)
+ return node
+
+
+def get_documentElement(node):
+ result = None
+ for child in node.childNodes:
+ if child.nodeType == ELEMENT:
+ result = child
+ return result
+
+
+def set_tagName(elem, gi):
+ elem.nodeName = elem.tagName = gi
+
+
+def find_all_elements(doc, gi):
+ nodes = []
+ if doc.nodeName == gi:
+ nodes.append(doc)
+ for child in doc.childNodes:
+ if child.nodeType == ELEMENT:
+ if child.tagName == gi:
+ nodes.append(child)
+ for node in child.getElementsByTagName(gi):
+ nodes.append(node)
+ return nodes
+
+def find_all_child_elements(doc, gi):
+ nodes = []
+ for child in doc.childNodes:
+ if child.nodeName == gi:
+ nodes.append(child)
+ return nodes
+
+
+def find_all_elements_from_set(doc, gi_set):
+ return __find_all_elements_from_set(doc, gi_set, [])
+
+def __find_all_elements_from_set(doc, gi_set, nodes):
+ if doc.nodeName in gi_set:
+ nodes.append(doc)
+ for child in doc.childNodes:
+ if child.nodeType == ELEMENT:
+ __find_all_elements_from_set(child, gi_set, nodes)
+ return nodes
+
+
+def simplify(doc, fragment):
+ # Try to rationalize the document a bit, since these things are simply
+ # not valid SGML/XML documents as they stand, and need a little work.
+ documentclass = "document"
+ inputs = []
+ node = extract_first_element(fragment, "documentclass")
+ if node is not None:
+ documentclass = node.getAttribute("classname")
+ node = extract_first_element(fragment, "title")
+ if node is not None:
+ inputs.append(node)
+ # update the name of the root element
+ node = get_first_element(fragment, "document")
+ if node is not None:
+ set_tagName(node, documentclass)
+ while 1:
+ node = extract_first_element(fragment, "input")
+ if node is None:
+ break
+ inputs.append(node)
+ if inputs:
+ docelem = get_documentElement(fragment)
+ inputs.reverse()
+ for node in inputs:
+ text = doc.createTextNode("\n")
+ docelem.insertBefore(text, docelem.firstChild)
+ docelem.insertBefore(node, text)
+ docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
+ while fragment.firstChild and fragment.firstChild.nodeType == TEXT:
+ fragment.removeChild(fragment.firstChild)
+
+
+def cleanup_root_text(doc):
+ discards = []
+ skip = 0
+ for n in doc.childNodes:
+ prevskip = skip
+ skip = 0
+ if n.nodeType == TEXT and not prevskip:
+ discards.append(n)
+ elif n.nodeName == "COMMENT":
+ skip = 1
+ for node in discards:
+ doc.removeChild(node)
+
+
+DESCRIPTOR_ELEMENTS = (
+ "cfuncdesc", "cvardesc", "ctypedesc",
+ "classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni",
+ "excdesc", "funcdesc", "funcdescni", "opcodedesc",
+ "datadesc", "datadescni",
+ )
+
+def fixup_descriptors(doc, fragment):
+ sections = find_all_elements(fragment, "section")
+ for section in sections:
+ find_and_fix_descriptors(doc, section)
+
+
+def find_and_fix_descriptors(doc, container):
+ children = container.childNodes
+ for child in children:
+ if child.nodeType == ELEMENT:
+ tagName = child.tagName
+ if tagName in DESCRIPTOR_ELEMENTS:
+ rewrite_descriptor(doc, child)
+ elif tagName == "subsection":
+ find_and_fix_descriptors(doc, child)
+
+
+def rewrite_descriptor(doc, descriptor):
+ #
+ # Do these things:
+ # 1. Add an "index='no'" attribute to the element if the tagName
+ # ends in 'ni', removing the 'ni' from the name.
+ # 2. Create a <signature> from the name attribute
+ # 2a.Create an <args> if it appears to be available.
+ # 3. Create additional <signature>s from <*line{,ni}> elements,
+ # if found.
+ # 4. If a <versionadded> is found, move it to an attribute on the
+ # descriptor.
+ # 5. Move remaining child nodes to a <description> element.
+ # 6. Put it back together.
+ #
+ # 1.
+ descname = descriptor.tagName
+ index = 1
+ if descname[-2:] == "ni":
+ descname = descname[:-2]
+ descriptor.setAttribute("index", "no")
+ set_tagName(descriptor, descname)
+ index = 0
+ desctype = descname[:-4] # remove 'desc'
+ linename = desctype + "line"
+ if not index:
+ linename = linename + "ni"
+ # 2.
+ signature = doc.createElement("signature")
+ name = doc.createElement("name")
+ signature.appendChild(doc.createTextNode("\n "))
+ signature.appendChild(name)
+ name.appendChild(doc.createTextNode(descriptor.getAttribute("name")))
+ descriptor.removeAttribute("name")
+ # 2a.
+ if descriptor.hasAttribute("var"):
+ if descname != "opcodedesc":
+ raise RuntimeError, \
+ "got 'var' attribute on descriptor other than opcodedesc"
+ variable = descriptor.getAttribute("var")
+ if variable:
+ args = doc.createElement("args")
+ args.appendChild(doc.createTextNode(variable))
+ signature.appendChild(doc.createTextNode("\n "))
+ signature.appendChild(args)
+ descriptor.removeAttribute("var")
+ newchildren = [signature]
+ children = descriptor.childNodes
+ pos = skip_leading_nodes(children)
+ if pos < len(children):
+ child = children[pos]
+ if child.nodeName == "args":
+ # move <args> to <signature>, or remove if empty:
+ child.parentNode.removeChild(child)
+ if len(child.childNodes):
+ signature.appendChild(doc.createTextNode("\n "))
+ signature.appendChild(child)
+ signature.appendChild(doc.createTextNode("\n "))
+ # 3, 4.
+ pos = skip_leading_nodes(children, pos)
+ while pos < len(children) \
+ and children[pos].nodeName in (linename, "versionadded"):
+ if children[pos].tagName == linename:
+ # this is really a supplemental signature, create <signature>
+ oldchild = children[pos].cloneNode(1)
+ try:
+ sig = methodline_to_signature(doc, children[pos])
+ except KeyError:
+ print oldchild.toxml()
+ raise
+ newchildren.append(sig)
+ else:
+ # <versionadded added=...>
+ descriptor.setAttribute(
+ "added", children[pos].getAttribute("version"))
+ pos = skip_leading_nodes(children, pos + 1)
+ # 5.
+ description = doc.createElement("description")
+ description.appendChild(doc.createTextNode("\n"))
+ newchildren.append(description)
+ move_children(descriptor, description, pos)
+ last = description.childNodes[-1]
+ if last.nodeType == TEXT:
+ last.data = string.rstrip(last.data) + "\n "
+ # 6.
+ # should have nothing but whitespace and signature lines in <descriptor>;
+ # discard them
+ while descriptor.childNodes:
+ descriptor.removeChild(descriptor.childNodes[0])
+ for node in newchildren:
+ descriptor.appendChild(doc.createTextNode("\n "))
+ descriptor.appendChild(node)
+ descriptor.appendChild(doc.createTextNode("\n"))
+
+
+def methodline_to_signature(doc, methodline):
+ signature = doc.createElement("signature")
+ signature.appendChild(doc.createTextNode("\n "))
+ name = doc.createElement("name")
+ name.appendChild(doc.createTextNode(methodline.getAttribute("name")))
+ methodline.removeAttribute("name")
+ signature.appendChild(name)
+ if len(methodline.childNodes):
+ args = doc.createElement("args")
+ signature.appendChild(doc.createTextNode("\n "))
+ signature.appendChild(args)
+ move_children(methodline, args)
+ signature.appendChild(doc.createTextNode("\n "))
+ return signature
+
+
+def move_children(origin, dest, start=0):
+ children = origin.childNodes
+ while start < len(children):
+ node = children[start]
+ origin.removeChild(node)
+ dest.appendChild(node)
+
+
+def handle_appendix(doc, fragment):
+ # must be called after simplfy() if document is multi-rooted to begin with
+ docelem = get_documentElement(fragment)
+ toplevel = docelem.tagName == "manual" and "chapter" or "section"
+ appendices = 0
+ nodes = []
+ for node in docelem.childNodes:
+ if appendices:
+ nodes.append(node)
+ elif node.nodeType == ELEMENT:
+ appnodes = node.getElementsByTagName("appendix")
+ if appnodes:
+ appendices = 1
+ parent = appnodes[0].parentNode
+ parent.removeChild(appnodes[0])
+ parent.normalize()
+ if nodes:
+ map(docelem.removeChild, nodes)
+ docelem.appendChild(doc.createTextNode("\n\n\n"))
+ back = doc.createElement("back-matter")
+ docelem.appendChild(back)
+ back.appendChild(doc.createTextNode("\n"))
+ while nodes and nodes[0].nodeType == TEXT \
+ and not string.strip(nodes[0].data):
+ del nodes[0]
+ map(back.appendChild, nodes)
+ docelem.appendChild(doc.createTextNode("\n"))
+
+
+def handle_labels(doc, fragment):
+ for label in find_all_elements(fragment, "label"):
+ id = label.getAttribute("id")
+ if not id:
+ continue
+ parent = label.parentNode
+ parentTagName = parent.tagName
+ if parentTagName == "title":
+ parent.parentNode.setAttribute("id", id)
+ else:
+ parent.setAttribute("id", id)
+ # now, remove <label id="..."/> from parent:
+ parent.removeChild(label)
+ if parentTagName == "title":
+ parent.normalize()
+ children = parent.childNodes
+ if children[-1].nodeType == TEXT:
+ children[-1].data = string.rstrip(children[-1].data)
+
+
+def fixup_trailing_whitespace(doc, wsmap):
+ queue = [doc]
+ while queue:
+ node = queue[0]
+ del queue[0]
+ if wsmap.has_key(node.nodeName):
+ ws = wsmap[node.tagName]
+ children = node.childNodes
+ children.reverse()
+ if children[0].nodeType == TEXT:
+ data = string.rstrip(children[0].data) + ws
+ children[0].data = data
+ children.reverse()
+ # hack to get the title in place:
+ if node.tagName == "title" \
+ and node.parentNode.firstChild.nodeType == ELEMENT:
+ node.parentNode.insertBefore(doc.createText("\n "),
+ node.parentNode.firstChild)
+ for child in node.childNodes:
+ if child.nodeType == ELEMENT:
+ queue.append(child)
+
+
+def normalize(doc):
+ for node in doc.childNodes:
+ if node.nodeType == ELEMENT:
+ node.normalize()
+
+
+def cleanup_trailing_parens(doc, element_names):
+ d = {}
+ for gi in element_names:
+ d[gi] = gi
+ rewrite_element = d.has_key
+ queue = []
+ for node in doc.childNodes:
+ if node.nodeType == ELEMENT:
+ queue.append(node)
+ while queue:
+ node = queue[0]
+ del queue[0]
+ if rewrite_element(node.tagName):
+ children = node.childNodes
+ if len(children) == 1 \
+ and children[0].nodeType == TEXT:
+ data = children[0].data
+ if data[-2:] == "()":
+ children[0].data = data[:-2]
+ else:
+ for child in node.childNodes:
+ if child.nodeType == ELEMENT:
+ queue.append(child)
+
+
+def contents_match(left, right):
+ left_children = left.childNodes
+ right_children = right.childNodes
+ if len(left_children) != len(right_children):
+ return 0
+ for l, r in map(None, left_children, right_children):
+ nodeType = l.nodeType
+ if nodeType != r.nodeType:
+ return 0
+ if nodeType == ELEMENT:
+ if l.tagName != r.tagName:
+ return 0
+ # should check attributes, but that's not a problem here
+ if not contents_match(l, r):
+ return 0
+ elif nodeType == TEXT:
+ if l.data != r.data:
+ return 0
+ else:
+ # not quite right, but good enough
+ return 0
+ return 1
+
+
+def create_module_info(doc, section):
+ # Heavy.
+ node = extract_first_element(section, "modulesynopsis")
+ if node is None:
+ return
+ set_tagName(node, "synopsis")
+ lastchild = node.childNodes[-1]
+ if lastchild.nodeType == TEXT \
+ and lastchild.data[-1:] == ".":
+ lastchild.data = lastchild.data[:-1]
+ modauthor = extract_first_element(section, "moduleauthor")
+ if modauthor:
+ set_tagName(modauthor, "author")
+ modauthor.appendChild(doc.createTextNode(
+ modauthor.getAttribute("name")))
+ modauthor.removeAttribute("name")
+ platform = extract_first_element(section, "platform")
+ if section.tagName == "section":
+ modinfo_pos = 2
+ modinfo = doc.createElement("moduleinfo")
+ moddecl = extract_first_element(section, "declaremodule")
+ name = None
+ if moddecl:
+ modinfo.appendChild(doc.createTextNode("\n "))
+ name = moddecl.attributes["name"].value
+ namenode = doc.createElement("name")
+ namenode.appendChild(doc.createTextNode(name))
+ modinfo.appendChild(namenode)
+ type = moddecl.attributes.get("type")
+ if type:
+ type = type.value
+ modinfo.appendChild(doc.createTextNode("\n "))
+ typenode = doc.createElement("type")
+ typenode.appendChild(doc.createTextNode(type))
+ modinfo.appendChild(typenode)
+ versionadded = extract_first_element(section, "versionadded")
+ if versionadded:
+ modinfo.setAttribute("added", versionadded.getAttribute("version"))
+ title = get_first_element(section, "title")
+ if title:
+ children = title.childNodes
+ if len(children) >= 2 \
+ and children[0].nodeName == "module" \
+ and children[0].childNodes[0].data == name:
+ # this is it; morph the <title> into <short-synopsis>
+ first_data = children[1]
+ if first_data.data[:4] == " ---":
+ first_data.data = string.lstrip(first_data.data[4:])
+ set_tagName(title, "short-synopsis")
+ if children[-1].nodeType == TEXT \
+ and children[-1].data[-1:] == ".":
+ children[-1].data = children[-1].data[:-1]
+ section.removeChild(title)
+ section.removeChild(section.childNodes[0])
+ title.removeChild(children[0])
+ modinfo_pos = 0
+ else:
+ ewrite("module name in title doesn't match"
+ " <declaremodule/>; no <short-synopsis/>\n")
+ else:
+ ewrite("Unexpected condition: <section/> without <title/>\n")
+ modinfo.appendChild(doc.createTextNode("\n "))
+ modinfo.appendChild(node)
+ if title and not contents_match(title, node):
+ # The short synopsis is actually different,
+ # and needs to be stored:
+ modinfo.appendChild(doc.createTextNode("\n "))
+ modinfo.appendChild(title)
+ if modauthor:
+ modinfo.appendChild(doc.createTextNode("\n "))
+ modinfo.appendChild(modauthor)
+ if platform:
+ modinfo.appendChild(doc.createTextNode("\n "))
+ modinfo.appendChild(platform)
+ modinfo.appendChild(doc.createTextNode("\n "))
+ section.insertBefore(modinfo, section.childNodes[modinfo_pos])
+ section.insertBefore(doc.createTextNode("\n "), modinfo)
+ #
+ # The rest of this removes extra newlines from where we cut out
+ # a lot of elements. A lot of code for minimal value, but keeps
+ # keeps the generated *ML from being too funny looking.
+ #
+ section.normalize()
+ children = section.childNodes
+ for i in range(len(children)):
+ node = children[i]
+ if node.nodeName == "moduleinfo":
+ nextnode = children[i+1]
+ if nextnode.nodeType == TEXT:
+ data = nextnode.data
+ if len(string.lstrip(data)) < (len(data) - 4):
+ nextnode.data = "\n\n\n" + string.lstrip(data)
+
+
+def cleanup_synopses(doc, fragment):
+ for node in find_all_elements(fragment, "section"):
+ create_module_info(doc, node)
+
+
+def fixup_table_structures(doc, fragment):
+ for table in find_all_elements(fragment, "table"):
+ fixup_table(doc, table)
+
+
+def fixup_table(doc, table):
+ # create the table head
+ thead = doc.createElement("thead")
+ row = doc.createElement("row")
+ move_elements_by_name(doc, table, row, "entry")
+ thead.appendChild(doc.createTextNode("\n "))
+ thead.appendChild(row)
+ thead.appendChild(doc.createTextNode("\n "))
+ # create the table body
+ tbody = doc.createElement("tbody")
+ prev_row = None
+ last_was_hline = 0
+ children = table.childNodes
+ for child in children:
+ if child.nodeType == ELEMENT:
+ tagName = child.tagName
+ if tagName == "hline" and prev_row is not None:
+ prev_row.setAttribute("rowsep", "1")
+ elif tagName == "row":
+ prev_row = child
+ # save the rows:
+ tbody.appendChild(doc.createTextNode("\n "))
+ move_elements_by_name(doc, table, tbody, "row", sep="\n ")
+ # and toss the rest:
+ while children:
+ child = children[0]
+ nodeType = child.nodeType
+ if nodeType == TEXT:
+ if string.strip(child.data):
+ raise ConversionError("unexpected free data in <%s>: %r"
+ % (table.tagName, child.data))
+ table.removeChild(child)
+ continue
+ if nodeType == ELEMENT:
+ if child.tagName != "hline":
+ raise ConversionError(
+ "unexpected <%s> in table" % child.tagName)
+ table.removeChild(child)
+ continue
+ raise ConversionError(
+ "unexpected %s node in table" % child.__class__.__name__)
+ # nothing left in the <table>; add the <thead> and <tbody>
+ tgroup = doc.createElement("tgroup")
+ tgroup.appendChild(doc.createTextNode("\n "))
+ tgroup.appendChild(thead)
+ tgroup.appendChild(doc.createTextNode("\n "))
+ tgroup.appendChild(tbody)
+ tgroup.appendChild(doc.createTextNode("\n "))
+ table.appendChild(tgroup)
+ # now make the <entry>s look nice:
+ for row in table.getElementsByTagName("row"):
+ fixup_row(doc, row)
+
+
+def fixup_row(doc, row):
+ entries = []
+ map(entries.append, row.childNodes[1:])
+ for entry in entries:
+ row.insertBefore(doc.createTextNode("\n "), entry)
+# row.appendChild(doc.createTextNode("\n "))
+
+
+def move_elements_by_name(doc, source, dest, name, sep=None):
+ nodes = []
+ for child in source.childNodes:
+ if child.nodeName == name:
+ nodes.append(child)
+ for node in nodes:
+ source.removeChild(node)
+ dest.appendChild(node)
+ if sep:
+ dest.appendChild(doc.createTextNode(sep))
+
+
+RECURSE_INTO_PARA_CONTAINERS = (
+ "chapter", "abstract", "enumerate",
+ "section", "subsection", "subsubsection",
+ "paragraph", "subparagraph", "back-matter",
+ "howto", "manual",
+ "item", "itemize", "fulllineitems", "enumeration", "descriptionlist",
+ "definitionlist", "definition",
+ )
+
+PARA_LEVEL_ELEMENTS = (
+ "moduleinfo", "title", "verbatim", "enumerate", "item",
+ "interpreter-session", "back-matter", "interactive-session",
+ "opcodedesc", "classdesc", "datadesc",
+ "funcdesc", "methoddesc", "excdesc", "memberdesc", "membderdescni",
+ "funcdescni", "methoddescni", "excdescni",
+ "tableii", "tableiii", "tableiv", "localmoduletable",
+ "sectionauthor", "seealso", "itemize",
+ # include <para>, so we can just do it again to get subsequent paras:
+ PARA_ELEMENT,
+ )
+
+PARA_LEVEL_PRECEEDERS = (
+ "setindexsubitem", "author",
+ "stindex", "obindex", "COMMENT", "label", "input", "title",
+ "versionadded", "versionchanged", "declaremodule", "modulesynopsis",
+ "moduleauthor", "indexterm", "leader",
+ )
+
+
+def fixup_paras(doc, fragment):
+ for child in fragment.childNodes:
+ if child.nodeName in RECURSE_INTO_PARA_CONTAINERS:
+ fixup_paras_helper(doc, child)
+ descriptions = find_all_elements(fragment, "description")
+ for description in descriptions:
+ fixup_paras_helper(doc, description)
+
+
+def fixup_paras_helper(doc, container, depth=0):
+ # document is already normalized
+ children = container.childNodes
+ start = skip_leading_nodes(children)
+ while len(children) > start:
+ if children[start].nodeName in RECURSE_INTO_PARA_CONTAINERS:
+ # Something to recurse into:
+ fixup_paras_helper(doc, children[start])
+ else:
+ # Paragraph material:
+ build_para(doc, container, start, len(children))
+ if DEBUG_PARA_FIXER and depth == 10:
+ sys.exit(1)
+ start = skip_leading_nodes(children, start + 1)
+
+
+def build_para(doc, parent, start, i):
+ children = parent.childNodes
+ after = start + 1
+ have_last = 0
+ BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + RECURSE_INTO_PARA_CONTAINERS
+ # Collect all children until \n\n+ is found in a text node or a
+ # member of BREAK_ELEMENTS is found.
+ for j in range(start, i):
+ after = j + 1
+ child = children[j]
+ nodeType = child.nodeType
+ if nodeType == ELEMENT:
+ if child.tagName in BREAK_ELEMENTS:
+ after = j
+ break
+ elif nodeType == TEXT:
+ pos = string.find(child.data, "\n\n")
+ if pos == 0:
+ after = j
+ break
+ if pos >= 1:
+ child.splitText(pos)
+ break
+ else:
+ have_last = 1
+ if (start + 1) > after:
+ raise ConversionError(
+ "build_para() could not identify content to turn into a paragraph")
+ if children[after - 1].nodeType == TEXT:
+ # we may need to split off trailing white space:
+ child = children[after - 1]
+ data = child.data
+ if string.rstrip(data) != data:
+ have_last = 0
+ child.splitText(len(string.rstrip(data)))
+ para = doc.createElement(PARA_ELEMENT)
+ prev = None
+ indexes = range(start, after)
+ indexes.reverse()
+ for j in indexes:
+ node = parent.childNodes[j]
+ parent.removeChild(node)
+ para.insertBefore(node, prev)
+ prev = node
+ if have_last:
+ parent.appendChild(para)
+ parent.appendChild(doc.createTextNode("\n\n"))
+ return len(parent.childNodes)
+ else:
+ nextnode = parent.childNodes[start]
+ if nextnode.nodeType == TEXT:
+ if nextnode.data and nextnode.data[0] != "\n":
+ nextnode.data = "\n" + nextnode.data
+ else:
+ newnode = doc.createTextNode("\n")
+ parent.insertBefore(newnode, nextnode)
+ nextnode = newnode
+ start = start + 1
+ parent.insertBefore(para, nextnode)
+ return start + 1
+
+
+def skip_leading_nodes(children, start=0):
+ """Return index into children of a node at which paragraph building should
+ begin or a recursive call to fixup_paras_helper() should be made (for
+ subsections, etc.).
+
+ When the return value >= len(children), we've built all the paras we can
+ from this list of children.
+ """
+ i = len(children)
+ while i > start:
+ # skip over leading comments and whitespace:
+ child = children[start]
+ nodeType = child.nodeType
+ if nodeType == TEXT:
+ data = child.data
+ shortened = string.lstrip(data)
+ if shortened:
+ if data != shortened:
+ # break into two nodes: whitespace and non-whitespace
+ child.splitText(len(data) - len(shortened))
+ return start + 1
+ return start
+ # all whitespace, just skip
+ elif nodeType == ELEMENT:
+ tagName = child.tagName
+ if tagName in RECURSE_INTO_PARA_CONTAINERS:
+ return start
+ if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
+ return start
+ start = start + 1
+ return start
+
+
+def fixup_rfc_references(doc, fragment):
+ for rfcnode in find_all_elements(fragment, "rfc"):
+ rfcnode.appendChild(doc.createTextNode(
+ "RFC " + rfcnode.getAttribute("num")))
+
+
+def fixup_signatures(doc, fragment):
+ for child in fragment.childNodes:
+ if child.nodeType == ELEMENT:
+ args = child.getElementsByTagName("args")
+ for arg in args:
+ fixup_args(doc, arg)
+ arg.normalize()
+ args = child.getElementsByTagName("constructor-args")
+ for arg in args:
+ fixup_args(doc, arg)
+ arg.normalize()
+
+
+def fixup_args(doc, arglist):
+ for child in arglist.childNodes:
+ if child.nodeName == "optional":
+ # found it; fix and return
+ arglist.insertBefore(doc.createTextNode("["), child)
+ optkids = child.childNodes
+ while optkids:
+ k = optkids[0]
+ child.removeChild(k)
+ arglist.insertBefore(k, child)
+ arglist.insertBefore(doc.createTextNode("]"), child)
+ arglist.removeChild(child)
+ return fixup_args(doc, arglist)
+
+
+def fixup_sectionauthors(doc, fragment):
+ for sectauth in find_all_elements(fragment, "sectionauthor"):
+ section = sectauth.parentNode
+ section.removeChild(sectauth)
+ set_tagName(sectauth, "author")
+ sectauth.appendChild(doc.createTextNode(
+ sectauth.getAttribute("name")))
+ sectauth.removeAttribute("name")
+ after = section.childNodes[2]
+ title = section.childNodes[1]
+ if title.nodeName != "title":
+ after = section.childNodes[0]
+ section.insertBefore(doc.createTextNode("\n "), after)
+ section.insertBefore(sectauth, after)
+
+
+def fixup_verbatims(doc):
+ for verbatim in find_all_elements(doc, "verbatim"):
+ child = verbatim.childNodes[0]
+ if child.nodeType == TEXT \
+ and string.lstrip(child.data)[:3] == ">>>":
+ set_tagName(verbatim, "interactive-session")
+
+
+def add_node_ids(fragment, counter=0):
+ fragment.node_id = counter
+ for node in fragment.childNodes:
+ counter = counter + 1
+ if node.nodeType == ELEMENT:
+ counter = add_node_ids(node, counter)
+ else:
+ node.node_id = counter
+ return counter + 1
+
+
+REFMODINDEX_ELEMENTS = ('refmodindex', 'refbimodindex',
+ 'refexmodindex', 'refstmodindex')
+
+def fixup_refmodindexes(fragment):
+ # Locate <ref*modindex>...</> co-located with <module>...</>, and
+ # remove the <ref*modindex>, replacing it with index=index on the
+ # <module> element.
+ nodes = find_all_elements_from_set(fragment, REFMODINDEX_ELEMENTS)
+ d = {}
+ for node in nodes:
+ parent = node.parentNode
+ d[parent.node_id] = parent
+ del nodes
+ map(fixup_refmodindexes_chunk, d.values())
+
+
+def fixup_refmodindexes_chunk(container):
+ # node is probably a <para>; let's see how often it isn't:
+ if container.tagName != PARA_ELEMENT:
+ bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container)
+ module_entries = find_all_elements(container, "module")
+ if not module_entries:
+ return
+ index_entries = find_all_elements_from_set(container, REFMODINDEX_ELEMENTS)
+ removes = []
+ for entry in index_entries:
+ children = entry.childNodes
+ if len(children) != 0:
+ bwrite("--- unexpected number of children for %s node:\n"
+ % entry.tagName)
+ ewrite(entry.toxml() + "\n")
+ continue
+ found = 0
+ module_name = entry.getAttribute("module")
+ for node in module_entries:
+ if len(node.childNodes) != 1:
+ continue
+ this_name = node.childNodes[0].data
+ if this_name == module_name:
+ found = 1
+ node.setAttribute("index", "yes")
+ if found:
+ removes.append(entry)
+ for node in removes:
+ container.removeChild(node)
+
+
+def fixup_bifuncindexes(fragment):
+ nodes = find_all_elements(fragment, 'bifuncindex')
+ d = {}
+ # make sure that each parent is only processed once:
+ for node in nodes:
+ parent = node.parentNode
+ d[parent.node_id] = parent
+ del nodes
+ map(fixup_bifuncindexes_chunk, d.values())
+
+
+def fixup_bifuncindexes_chunk(container):
+ removes = []
+ entries = find_all_child_elements(container, "bifuncindex")
+ function_entries = find_all_child_elements(container, "function")
+ for entry in entries:
+ function_name = entry.getAttribute("name")
+ found = 0
+ for func_entry in function_entries:
+ t2 = func_entry.childNodes[0].data
+ if t2[-2:] != "()":
+ continue
+ t2 = t2[:-2]
+ if t2 == function_name:
+ func_entry.setAttribute("index", "yes")
+ func_entry.setAttribute("module", "__builtin__")
+ if not found:
+ found = 1
+ removes.append(entry)
+ for entry in removes:
+ container.removeChild(entry)
+
+
+def join_adjacent_elements(container, gi):
+ queue = [container]
+ while queue:
+ parent = queue.pop()
+ i = 0
+ children = parent.childNodes
+ nchildren = len(children)
+ while i < (nchildren - 1):
+ child = children[i]
+ if child.nodeName == gi:
+ if children[i+1].nodeName == gi:
+ ewrite("--- merging two <%s/> elements\n" % gi)
+ child = children[i]
+ nextchild = children[i+1]
+ nextchildren = nextchild.childNodes
+ while len(nextchildren):
+ node = nextchildren[0]
+ nextchild.removeChild(node)
+ child.appendChild(node)
+ parent.removeChild(nextchild)
+ continue
+ if child.nodeType == ELEMENT:
+ queue.append(child)
+ i = i + 1
+
+
+_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
+
+def write_esis(doc, ofp, knownempty):
+ for node in doc.childNodes:
+ nodeType = node.nodeType
+ if nodeType == ELEMENT:
+ gi = node.tagName
+ if knownempty(gi):
+ if node.hasChildNodes():
+ raise ValueError, \
+ "declared-empty node <%s> has children" % gi
+ ofp.write("e\n")
+ for k, value in node.attributes.items():
+ if _token_rx.match(value):
+ dtype = "TOKEN"
+ else:
+ dtype = "CDATA"
+ ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
+ ofp.write("(%s\n" % gi)
+ write_esis(node, ofp, knownempty)
+ ofp.write(")%s\n" % gi)
+ elif nodeType == TEXT:
+ ofp.write("-%s\n" % esistools.encode(node.data))
+ elif nodeType == ENTITY_REFERENCE:
+ ofp.write("&%s\n" % node.nodeName)
+ else:
+ raise RuntimeError, "unsupported node type: %s" % nodeType
+
+
+def convert(ifp, ofp):
+ events = esistools.parse(ifp)
+ toktype, doc = events.getEvent()
+ fragment = doc.createDocumentFragment()
+ events.expandNode(fragment)
+
+ normalize(fragment)
+ simplify(doc, fragment)
+ handle_labels(doc, fragment)
+ handle_appendix(doc, fragment)
+ fixup_trailing_whitespace(doc, {
+ "abstract": "\n",
+ "title": "",
+ "chapter": "\n\n",
+ "section": "\n\n",
+ "subsection": "\n\n",
+ "subsubsection": "\n\n",
+ "paragraph": "\n\n",
+ "subparagraph": "\n\n",
+ })
+ cleanup_root_text(doc)
+ cleanup_trailing_parens(fragment, ["function", "method", "cfunction"])
+ cleanup_synopses(doc, fragment)
+ fixup_descriptors(doc, fragment)
+ fixup_verbatims(fragment)
+ normalize(fragment)
+ fixup_paras(doc, fragment)
+ fixup_sectionauthors(doc, fragment)
+ fixup_table_structures(doc, fragment)
+ fixup_rfc_references(doc, fragment)
+ fixup_signatures(doc, fragment)
+ add_node_ids(fragment)
+ fixup_refmodindexes(fragment)
+ fixup_bifuncindexes(fragment)
+ # Take care of ugly hacks in the LaTeX markup to avoid LaTeX and
+ # LaTeX2HTML screwing with GNU-style long options (the '--' problem).
+ join_adjacent_elements(fragment, "option")
+ #
+ d = {}
+ for gi in events.parser.get_empties():
+ d[gi] = gi
+ if d.has_key("author"):
+ del d["author"]
+ if d.has_key("rfc"):
+ del d["rfc"]
+ knownempty = d.has_key
+ #
+ try:
+ write_esis(fragment, ofp, knownempty)
+ except IOError, (err, msg):
+ # Ignore EPIPE; it just means that whoever we're writing to stopped
+ # reading. The rest of the output would be ignored. All other errors
+ # should still be reported,
+ if err != errno.EPIPE:
+ raise
+
+
+def main():
+ if len(sys.argv) == 1:
+ ifp = sys.stdin
+ ofp = sys.stdout
+ elif len(sys.argv) == 2:
+ ifp = open(sys.argv[1])
+ ofp = sys.stdout
+ elif len(sys.argv) == 3:
+ ifp = open(sys.argv[1])
+ import StringIO
+ ofp = StringIO.StringIO()
+ else:
+ usage()
+ sys.exit(2)
+ convert(ifp, ofp)
+ if len(sys.argv) == 3:
+ fp = open(sys.argv[2], "w")
+ fp.write(ofp.getvalue())
+ fp.close()
+ ofp.close()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/doc/tools/sgmlconv/esis2sgml.py b/doc/tools/sgmlconv/esis2sgml.py
new file mode 100755
index 0000000..7bda929
--- /dev/null
+++ b/doc/tools/sgmlconv/esis2sgml.py
@@ -0,0 +1,263 @@
+#! /usr/bin/env python
+
+"""Convert ESIS events to SGML or XML markup.
+
+This is limited, but seems sufficient for the ESIS generated by the
+latex2esis.py script when run over the Python documentation.
+"""
+
+# This should have an explicit option to indicate whether the *INPUT* was
+# generated from an SGML or an XML application.
+
+import errno
+import esistools
+import os
+import re
+import string
+
+from xml.sax.saxutils import escape
+
+
+AUTOCLOSE = ()
+
+EMPTIES_FILENAME = "../sgml/empties.dat"
+LIST_EMPTIES = 0
+
+
+_elem_map = {}
+_attr_map = {}
+_token_map = {}
+
+_normalize_case = str
+
+def map_gi(sgmlgi, map):
+ uncased = _normalize_case(sgmlgi)
+ try:
+ return map[uncased]
+ except IndexError:
+ map[uncased] = sgmlgi
+ return sgmlgi
+
+def null_map_gi(sgmlgi, map):
+ return sgmlgi
+
+
+def format_attrs(attrs, xml=0):
+ attrs = attrs.items()
+ attrs.sort()
+ parts = []
+ append = parts.append
+ for name, value in attrs:
+ if xml:
+ append('%s="%s"' % (name, escape(value)))
+ else:
+ # this is a little bogus, but should do for now
+ if name == value and isnmtoken(value):
+ append(value)
+ elif istoken(value):
+ if value == "no" + name:
+ append(value)
+ else:
+ append("%s=%s" % (name, value))
+ else:
+ append('%s="%s"' % (name, escape(value)))
+ if parts:
+ parts.insert(0, '')
+ return string.join(parts)
+
+
+_nmtoken_rx = re.compile("[a-z][-._a-z0-9]*$", re.IGNORECASE)
+def isnmtoken(s):
+ return _nmtoken_rx.match(s) is not None
+
+_token_rx = re.compile("[a-z0-9][-._a-z0-9]*$", re.IGNORECASE)
+def istoken(s):
+ return _token_rx.match(s) is not None
+
+
+def convert(ifp, ofp, xml=0, autoclose=(), verbatims=()):
+ if xml:
+ autoclose = ()
+ attrs = {}
+ lastopened = None
+ knownempties = []
+ knownempty = 0
+ lastempty = 0
+ inverbatim = 0
+ while 1:
+ line = ifp.readline()
+ if not line:
+ break
+
+ type = line[0]
+ data = line[1:]
+ if data and data[-1] == "\n":
+ data = data[:-1]
+ if type == "-":
+ data = esistools.decode(data)
+ data = escape(data)
+ if not inverbatim:
+ data = string.replace(data, "---", "—")
+ ofp.write(data)
+ if "\n" in data:
+ lastopened = None
+ knownempty = 0
+ lastempty = 0
+ elif type == "(":
+ if data == "COMMENT":
+ ofp.write("<!--")
+ continue
+ data = map_gi(data, _elem_map)
+ if knownempty and xml:
+ ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml)))
+ else:
+ ofp.write("<%s%s>" % (data, format_attrs(attrs, xml)))
+ if knownempty and data not in knownempties:
+ # accumulate knowledge!
+ knownempties.append(data)
+ attrs = {}
+ lastopened = data
+ lastempty = knownempty
+ knownempty = 0
+ inverbatim = data in verbatims
+ elif type == ")":
+ if data == "COMMENT":
+ ofp.write("-->")
+ continue
+ data = map_gi(data, _elem_map)
+ if xml:
+ if not lastempty:
+ ofp.write("</%s>" % data)
+ elif data not in knownempties:
+ if data in autoclose:
+ pass
+ elif lastopened == data:
+ ofp.write("</>")
+ else:
+ ofp.write("</%s>" % data)
+ lastopened = None
+ lastempty = 0
+ inverbatim = 0
+ elif type == "A":
+ name, type, value = string.split(data, " ", 2)
+ name = map_gi(name, _attr_map)
+ attrs[name] = esistools.decode(value)
+ elif type == "e":
+ knownempty = 1
+ elif type == "&":
+ ofp.write("&%s;" % data)
+ knownempty = 0
+ else:
+ raise RuntimeError, "unrecognized ESIS event type: '%s'" % type
+
+ if LIST_EMPTIES:
+ dump_empty_element_names(knownempties)
+
+
+def dump_empty_element_names(knownempties):
+ d = {}
+ for gi in knownempties:
+ d[gi] = gi
+ knownempties.append("")
+ if os.path.isfile(EMPTIES_FILENAME):
+ fp = open(EMPTIES_FILENAME)
+ while 1:
+ line = fp.readline()
+ if not line:
+ break
+ gi = string.strip(line)
+ if gi:
+ d[gi] = gi
+ fp = open(EMPTIES_FILENAME, "w")
+ gilist = d.keys()
+ gilist.sort()
+ fp.write(string.join(gilist, "\n"))
+ fp.write("\n")
+ fp.close()
+
+
+def update_gi_map(map, names, fromsgml=1):
+ for name in string.split(names, ","):
+ if fromsgml:
+ uncased = string.lower(name)
+ else:
+ uncased = name
+ map[uncased] = name
+
+
+def main():
+ import getopt
+ import sys
+ #
+ autoclose = AUTOCLOSE
+ xml = 1
+ xmldecl = 0
+ elem_names = ''
+ attr_names = ''
+ value_names = ''
+ verbatims = ('verbatim', 'interactive-session')
+ opts, args = getopt.getopt(sys.argv[1:], "adesx",
+ ["autoclose=", "declare", "sgml", "xml",
+ "elements-map=", "attributes-map",
+ "values-map="])
+ for opt, arg in opts:
+ if opt in ("-d", "--declare"):
+ xmldecl = 1
+ elif opt == "-e":
+ global LIST_EMPTIES
+ LIST_EMPTIES = 1
+ elif opt in ("-s", "--sgml"):
+ xml = 0
+ elif opt in ("-x", "--xml"):
+ xml = 1
+ elif opt in ("-a", "--autoclose"):
+ autoclose = string.split(arg, ",")
+ elif opt == "--elements-map":
+ elem_names = ("%s,%s" % (elem_names, arg))[1:]
+ elif opt == "--attributes-map":
+ attr_names = ("%s,%s" % (attr_names, arg))[1:]
+ elif opt == "--values-map":
+ value_names = ("%s,%s" % (value_names, arg))[1:]
+ #
+ # open input streams:
+ #
+ if len(args) == 0:
+ ifp = sys.stdin
+ ofp = sys.stdout
+ elif len(args) == 1:
+ ifp = open(args[0])
+ ofp = sys.stdout
+ elif len(args) == 2:
+ ifp = open(args[0])
+ ofp = open(args[1], "w")
+ else:
+ usage()
+ sys.exit(2)
+ #
+ # setup the name maps:
+ #
+ if elem_names or attr_names or value_names:
+ # assume the origin was SGML; ignore case of the names from the ESIS
+ # stream but set up conversion tables to get the case right on output
+ global _normalize_case
+ _normalize_case = string.lower
+ update_gi_map(_elem_map, string.split(elem_names, ","))
+ update_gi_map(_attr_map, string.split(attr_names, ","))
+ update_gi_map(_values_map, string.split(value_names, ","))
+ else:
+ global map_gi
+ map_gi = null_map_gi
+ #
+ # run the conversion:
+ #
+ try:
+ if xml and xmldecl:
+ opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n')
+ convert(ifp, ofp, xml=xml, autoclose=autoclose, verbatims=verbatims)
+ except IOError, (err, msg):
+ if err != errno.EPIPE:
+ raise
+
+
+if __name__ == "__main__":
+ main()
diff --git a/doc/tools/sgmlconv/esistools.py b/doc/tools/sgmlconv/esistools.py
new file mode 100644
index 0000000..893af76
--- /dev/null
+++ b/doc/tools/sgmlconv/esistools.py
@@ -0,0 +1,309 @@
+"""Miscellaneous utility functions useful for dealing with ESIS streams."""
+
+import re
+import string
+
+import xml.dom.pulldom
+
+import xml.sax
+import xml.sax.handler
+import xml.sax.xmlreader
+
+
+_data_match = re.compile(r"[^\\][^\\]*").match
+
+def decode(s):
+ r = ''
+ while s:
+ m = _data_match(s)
+ if m:
+ r = r + m.group()
+ s = s[m.end():]
+ elif s[1] == "\\":
+ r = r + "\\"
+ s = s[2:]
+ elif s[1] == "n":
+ r = r + "\n"
+ s = s[2:]
+ elif s[1] == "%":
+ s = s[2:]
+ n, s = s.split(";", 1)
+ r = r + unichr(int(n))
+ else:
+ raise ValueError, "can't handle " + `s`
+ return r
+
+
+_charmap = {}
+for c in map(chr, range(256)):
+ _charmap[c] = c
+_charmap["\n"] = r"\n"
+_charmap["\\"] = r"\\"
+del c
+
+_null_join = ''.join
+def encode(s):
+ return _null_join(map(_charmap.get, s))
+
+
+class ESISReader(xml.sax.xmlreader.XMLReader):
+ """SAX Reader which reads from an ESIS stream.
+
+ No verification of the document structure is performed by the
+ reader; a general verifier could be used as the target
+ ContentHandler instance.
+
+ """
+ _decl_handler = None
+ _lexical_handler = None
+
+ _public_id = None
+ _system_id = None
+
+ _buffer = ""
+ _is_empty = 0
+ _lineno = 0
+ _started = 0
+
+ def __init__(self, contentHandler=None, errorHandler=None):
+ xml.sax.xmlreader.XMLReader.__init__(self)
+ self._attrs = {}
+ self._attributes = Attributes(self._attrs)
+ self._locator = Locator()
+ self._empties = {}
+ if contentHandler:
+ self.setContentHandler(contentHandler)
+ if errorHandler:
+ self.setErrorHandler(errorHandler)
+
+ def get_empties(self):
+ return self._empties.keys()
+
+ #
+ # XMLReader interface
+ #
+
+ def parse(self, source):
+ raise RuntimeError
+ self._locator._public_id = source.getPublicId()
+ self._locator._system_id = source.getSystemId()
+ fp = source.getByteStream()
+ handler = self.getContentHandler()
+ if handler:
+ handler.startDocument()
+ lineno = 0
+ while 1:
+ token, data = self._get_token(fp)
+ if token is None:
+ break
+ lineno = lineno + 1
+ self._locator._lineno = lineno
+ self._handle_token(token, data)
+ handler = self.getContentHandler()
+ if handler:
+ handler.startDocument()
+
+ def feed(self, data):
+ if not self._started:
+ handler = self.getContentHandler()
+ if handler:
+ handler.startDocument()
+ self._started = 1
+ data = self._buffer + data
+ self._buffer = None
+ lines = data.split("\n")
+ if lines:
+ for line in lines[:-1]:
+ self._lineno = self._lineno + 1
+ self._locator._lineno = self._lineno
+ if not line:
+ e = xml.sax.SAXParseException(
+ "ESIS input line contains no token type mark",
+ None, self._locator)
+ self.getErrorHandler().error(e)
+ else:
+ self._handle_token(line[0], line[1:])
+ self._buffer = lines[-1]
+ else:
+ self._buffer = ""
+
+ def close(self):
+ handler = self.getContentHandler()
+ if handler:
+ handler.endDocument()
+ self._buffer = ""
+
+ def _get_token(self, fp):
+ try:
+ line = fp.readline()
+ except IOError, e:
+ e = SAXException("I/O error reading input stream", e)
+ self.getErrorHandler().fatalError(e)
+ return
+ if not line:
+ return None, None
+ if line[-1] == "\n":
+ line = line[:-1]
+ if not line:
+ e = xml.sax.SAXParseException(
+ "ESIS input line contains no token type mark",
+ None, self._locator)
+ self.getErrorHandler().error(e)
+ return
+ return line[0], line[1:]
+
+ def _handle_token(self, token, data):
+ handler = self.getContentHandler()
+ if token == '-':
+ if data and handler:
+ handler.characters(decode(data))
+ elif token == ')':
+ if handler:
+ handler.endElement(decode(data))
+ elif token == '(':
+ if self._is_empty:
+ self._empties[data] = 1
+ if handler:
+ handler.startElement(data, self._attributes)
+ self._attrs.clear()
+ self._is_empty = 0
+ elif token == 'A':
+ name, value = data.split(' ', 1)
+ if value != "IMPLIED":
+ type, value = value.split(' ', 1)
+ self._attrs[name] = (decode(value), type)
+ elif token == '&':
+ # entity reference in SAX?
+ pass
+ elif token == '?':
+ if handler:
+ if ' ' in data:
+ target, data = string.split(data, None, 1)
+ else:
+ target, data = data, ""
+ handler.processingInstruction(target, decode(data))
+ elif token == 'N':
+ handler = self.getDTDHandler()
+ if handler:
+ handler.notationDecl(data, self._public_id, self._system_id)
+ self._public_id = None
+ self._system_id = None
+ elif token == 'p':
+ self._public_id = decode(data)
+ elif token == 's':
+ self._system_id = decode(data)
+ elif token == 'e':
+ self._is_empty = 1
+ elif token == 'C':
+ pass
+ else:
+ e = SAXParseException("unknown ESIS token in event stream",
+ None, self._locator)
+ self.getErrorHandler().error(e)
+
+ def setContentHandler(self, handler):
+ old = self.getContentHandler()
+ if old:
+ old.setDocumentLocator(None)
+ if handler:
+ handler.setDocumentLocator(self._locator)
+ xml.sax.xmlreader.XMLReader.setContentHandler(self, handler)
+
+ def getProperty(self, property):
+ if property == xml.sax.handler.property_lexical_handler:
+ return self._lexical_handler
+
+ elif property == xml.sax.handler.property_declaration_handler:
+ return self._decl_handler
+
+ else:
+ raise xml.sax.SAXNotRecognizedException("unknown property %s"
+ % `property`)
+
+ def setProperty(self, property, value):
+ if property == xml.sax.handler.property_lexical_handler:
+ if self._lexical_handler:
+ self._lexical_handler.setDocumentLocator(None)
+ if value:
+ value.setDocumentLocator(self._locator)
+ self._lexical_handler = value
+
+ elif property == xml.sax.handler.property_declaration_handler:
+ if self._decl_handler:
+ self._decl_handler.setDocumentLocator(None)
+ if value:
+ value.setDocumentLocator(self._locator)
+ self._decl_handler = value
+
+ else:
+ raise xml.sax.SAXNotRecognizedException()
+
+ def getFeature(self, feature):
+ if feature == xml.sax.handler.feature_namespaces:
+ return 1
+ else:
+ return xml.sax.xmlreader.XMLReader.getFeature(self, feature)
+
+ def setFeature(self, feature, enabled):
+ if feature == xml.sax.handler.feature_namespaces:
+ pass
+ else:
+ xml.sax.xmlreader.XMLReader.setFeature(self, feature, enabled)
+
+
+class Attributes(xml.sax.xmlreader.AttributesImpl):
+ # self._attrs has the form {name: (value, type)}
+
+ def getType(self, name):
+ return self._attrs[name][1]
+
+ def getValue(self, name):
+ return self._attrs[name][0]
+
+ def getValueByQName(self, name):
+ return self._attrs[name][0]
+
+ def __getitem__(self, name):
+ return self._attrs[name][0]
+
+ def get(self, name, default=None):
+ if self._attrs.has_key(name):
+ return self._attrs[name][0]
+ return default
+
+ def items(self):
+ L = []
+ for name, (value, type) in self._attrs.items():
+ L.append((name, value))
+ return L
+
+ def values(self):
+ L = []
+ for value, type in self._attrs.values():
+ L.append(value)
+ return L
+
+
+class Locator(xml.sax.xmlreader.Locator):
+ _lineno = -1
+ _public_id = None
+ _system_id = None
+
+ def getLineNumber(self):
+ return self._lineno
+
+ def getPublicId(self):
+ return self._public_id
+
+ def getSystemId(self):
+ return self._system_id
+
+
+def parse(stream_or_string, parser=None):
+ if type(stream_or_string) in [type(""), type(u"")]:
+ stream = open(stream_or_string)
+ else:
+ stream = stream_or_string
+ if not parser:
+ parser = ESISReader()
+ return xml.dom.pulldom.DOMEventStream(stream, parser, (2 ** 14) - 20)
diff --git a/doc/tools/sgmlconv/latex2esis.py b/doc/tools/sgmlconv/latex2esis.py
new file mode 100755
index 0000000..74e1dc7
--- /dev/null
+++ b/doc/tools/sgmlconv/latex2esis.py
@@ -0,0 +1,555 @@
+#! /usr/bin/env python
+
+"""Generate ESIS events based on a LaTeX source document and
+configuration data.
+
+The conversion is not strong enough to work with arbitrary LaTeX
+documents; it has only been designed to work with the highly stylized
+markup used in the standard Python documentation. A lot of
+information about specific markup is encoded in the control table
+passed to the convert() function; changing this table can allow this
+tool to support additional LaTeX markups.
+
+The format of the table is largely undocumented; see the commented
+headers where the table is specified in main(). There is no provision
+to load an alternate table from an external file.
+"""
+
+import errno
+import getopt
+import os
+import re
+import string
+import sys
+import UserList
+import xml.sax.saxutils
+
+from types import ListType, StringType, TupleType
+
+try:
+ from xml.parsers.xmllib import XMLParser
+except ImportError:
+ from xmllib import XMLParser
+
+
+from esistools import encode
+
+
+DEBUG = 0
+
+
+class LaTeXFormatError(Exception):
+ pass
+
+
+class LaTeXStackError(LaTeXFormatError):
+ def __init__(self, found, stack):
+ msg = "environment close for %s doesn't match;\n stack = %s" \
+ % (found, stack)
+ self.found = found
+ self.stack = stack[:]
+ LaTeXFormatError.__init__(self, msg)
+
+
+_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
+_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
+_begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)")
+_comment_rx = re.compile("%+ ?(.*)\n[ \t]*")
+_text_rx = re.compile(r"[^]~%\\{}]+")
+_optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
+# _parameter_rx is this complicated to allow {...} inside a parameter;
+# this is useful to match tabular layout specifications like {c|p{24pt}}
+_parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
+_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
+_start_group_rx = re.compile("[ \n]*{")
+_start_optional_rx = re.compile("[ \n]*[[]")
+
+
+ESCAPED_CHARS = "$%#^ {}&~"
+
+
+def dbgmsg(msg):
+ if DEBUG:
+ sys.stderr.write(msg + "\n")
+
+def pushing(name, point, depth):
+ dbgmsg("pushing <%s> at %s" % (name, point))
+
+def popping(name, point, depth):
+ dbgmsg("popping </%s> at %s" % (name, point))
+
+
+class _Stack(UserList.UserList):
+ def append(self, entry):
+ if type(entry) is not StringType:
+ raise LaTeXFormatError("cannot push non-string on stack: "
+ + `entry`)
+ #dbgmsg("%s<%s>" % (" "*len(self.data), entry))
+ self.data.append(entry)
+
+ def pop(self, index=-1):
+ entry = self.data[index]
+ del self.data[index]
+ #dbgmsg("%s</%s>" % (" "*len(self.data), entry))
+
+ def __delitem__(self, index):
+ entry = self.data[index]
+ del self.data[index]
+ #dbgmsg("%s</%s>" % (" "*len(self.data), entry))
+
+
+def new_stack():
+ if DEBUG:
+ return _Stack()
+ return []
+
+
+class Conversion:
+ def __init__(self, ifp, ofp, table):
+ self.write = ofp.write
+ self.ofp = ofp
+ self.table = table
+ self.line = string.join(map(string.rstrip, ifp.readlines()), "\n")
+ self.preamble = 1
+
+ def convert(self):
+ self.subconvert()
+
+ def subconvert(self, endchar=None, depth=0):
+ #
+ # Parses content, including sub-structures, until the character
+ # 'endchar' is found (with no open structures), or until the end
+ # of the input data is endchar is None.
+ #
+ stack = new_stack()
+ line = self.line
+ while line:
+ if line[0] == endchar and not stack:
+ self.line = line
+ return line
+ m = _comment_rx.match(line)
+ if m:
+ text = m.group(1)
+ if text:
+ self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
+ % encode(text))
+ line = line[m.end():]
+ continue
+ m = _begin_env_rx.match(line)
+ if m:
+ name = m.group(1)
+ entry = self.get_env_entry(name)
+ # re-write to use the macro handler
+ line = r"\%s %s" % (name, line[m.end():])
+ continue
+ m = _end_env_rx.match(line)
+ if m:
+ # end of environment
+ envname = m.group(1)
+ entry = self.get_entry(envname)
+ while stack and envname != stack[-1] \
+ and stack[-1] in entry.endcloses:
+ self.write(")%s\n" % stack.pop())
+ if stack and envname == stack[-1]:
+ self.write(")%s\n" % entry.outputname)
+ del stack[-1]
+ else:
+ raise LaTeXStackError(envname, stack)
+ line = line[m.end():]
+ continue
+ m = _begin_macro_rx.match(line)
+ if m:
+ # start of macro
+ macroname = m.group(1)
+ if macroname == "c":
+ # Ugh! This is a combining character...
+ endpos = m.end()
+ self.combining_char("c", line[endpos])
+ line = line[endpos + 1:]
+ continue
+ entry = self.get_entry(macroname)
+ if entry.verbatim:
+ # magic case!
+ pos = string.find(line, "\\end{%s}" % macroname)
+ text = line[m.end(1):pos]
+ stack.append(entry.name)
+ self.write("(%s\n" % entry.outputname)
+ self.write("-%s\n" % encode(text))
+ self.write(")%s\n" % entry.outputname)
+ stack.pop()
+ line = line[pos + len("\\end{%s}" % macroname):]
+ continue
+ while stack and stack[-1] in entry.closes:
+ top = stack.pop()
+ topentry = self.get_entry(top)
+ if topentry.outputname:
+ self.write(")%s\n-\\n\n" % topentry.outputname)
+ #
+ if entry.outputname:
+ if entry.empty:
+ self.write("e\n")
+ #
+ params, optional, empty, environ = self.start_macro(macroname)
+ # rip off the macroname
+ if params:
+ line = line[m.end(1):]
+ elif empty:
+ line = line[m.end(1):]
+ else:
+ line = line[m.end():]
+ opened = 0
+ implied_content = 0
+
+ # handle attribute mappings here:
+ for pentry in params:
+ if pentry.type == "attribute":
+ if pentry.optional:
+ m = _optional_rx.match(line)
+ if m and entry.outputname:
+ line = line[m.end():]
+ self.dump_attr(pentry, m.group(1))
+ elif pentry.text and entry.outputname:
+ # value supplied by conversion spec:
+ self.dump_attr(pentry, pentry.text)
+ else:
+ m = _parameter_rx.match(line)
+ if not m:
+ raise LaTeXFormatError(
+ "could not extract parameter %s for %s: %s"
+ % (pentry.name, macroname, `line[:100]`))
+ if entry.outputname:
+ self.dump_attr(pentry, m.group(1))
+ line = line[m.end():]
+ elif pentry.type == "child":
+ if pentry.optional:
+ m = _optional_rx.match(line)
+ if m:
+ line = line[m.end():]
+ if entry.outputname and not opened:
+ opened = 1
+ self.write("(%s\n" % entry.outputname)
+ stack.append(macroname)
+ stack.append(pentry.name)
+ self.write("(%s\n" % pentry.name)
+ self.write("-%s\n" % encode(m.group(1)))
+ self.write(")%s\n" % pentry.name)
+ stack.pop()
+ else:
+ if entry.outputname and not opened:
+ opened = 1
+ self.write("(%s\n" % entry.outputname)
+ stack.append(entry.name)
+ self.write("(%s\n" % pentry.name)
+ stack.append(pentry.name)
+ self.line = skip_white(line)[1:]
+ line = self.subconvert(
+ "}", len(stack) + depth + 1)[1:]
+ self.write(")%s\n" % stack.pop())
+ elif pentry.type == "content":
+ if pentry.implied:
+ implied_content = 1
+ else:
+ if entry.outputname and not opened:
+ opened = 1
+ self.write("(%s\n" % entry.outputname)
+ stack.append(entry.name)
+ line = skip_white(line)
+ if line[0] != "{":
+ raise LaTeXFormatError(
+ "missing content for " + macroname)
+ self.line = line[1:]
+ line = self.subconvert("}", len(stack) + depth + 1)
+ if line and line[0] == "}":
+ line = line[1:]
+ elif pentry.type == "text" and pentry.text:
+ if entry.outputname and not opened:
+ opened = 1
+ stack.append(entry.name)
+ self.write("(%s\n" % entry.outputname)
+ #dbgmsg("--- text: %s" % `pentry.text`)
+ self.write("-%s\n" % encode(pentry.text))
+ elif pentry.type == "entityref":
+ self.write("&%s\n" % pentry.name)
+ if entry.outputname:
+ if not opened:
+ self.write("(%s\n" % entry.outputname)
+ stack.append(entry.name)
+ if not implied_content:
+ self.write(")%s\n" % entry.outputname)
+ stack.pop()
+ continue
+ if line[0] == endchar and not stack:
+ self.line = line[1:]
+ return self.line
+ if line[0] == "}":
+ # end of macro or group
+ macroname = stack[-1]
+ if macroname:
+ conversion = self.table[macroname]
+ if conversion.outputname:
+ # otherwise, it was just a bare group
+ self.write(")%s\n" % conversion.outputname)
+ del stack[-1]
+ line = line[1:]
+ continue
+ if line[0] == "~":
+ # don't worry about the "tie" aspect of this command
+ line = line[1:]
+ self.write("- \n")
+ continue
+ if line[0] == "{":
+ stack.append("")
+ line = line[1:]
+ continue
+ if line[0] == "\\" and line[1] in ESCAPED_CHARS:
+ self.write("-%s\n" % encode(line[1]))
+ line = line[2:]
+ continue
+ if line[:2] == r"\\":
+ self.write("(BREAK\n)BREAK\n")
+ line = line[2:]
+ continue
+ if line[:2] == r"\_":
+ line = "_" + line[2:]
+ continue
+ if line[:2] in (r"\'", r'\"'):
+ # combining characters...
+ self.combining_char(line[1], line[2])
+ line = line[3:]
+ continue
+ m = _text_rx.match(line)
+ if m:
+ text = encode(m.group())
+ self.write("-%s\n" % text)
+ line = line[m.end():]
+ continue
+ # special case because of \item[]
+ # XXX can we axe this???
+ if line[0] == "]":
+ self.write("-]\n")
+ line = line[1:]
+ continue
+ # avoid infinite loops
+ extra = ""
+ if len(line) > 100:
+ extra = "..."
+ raise LaTeXFormatError("could not identify markup: %s%s"
+ % (`line[:100]`, extra))
+ while stack:
+ entry = self.get_entry(stack[-1])
+ if entry.closes:
+ self.write(")%s\n-%s\n" % (entry.outputname, encode("\n")))
+ del stack[-1]
+ else:
+ break
+ if stack:
+ raise LaTeXFormatError("elements remain on stack: "
+ + string.join(stack, ", "))
+ # otherwise we just ran out of input here...
+
+ # This is a really limited table of combinations, but it will have
+ # to do for now.
+ _combinations = {
+ ("c", "c"): 0x00E7,
+ ("'", "e"): 0x00E9,
+ ('"', "o"): 0x00F6,
+ }
+
+ def combining_char(self, prefix, char):
+ ordinal = self._combinations[(prefix, char)]
+ self.write("-\\%%%d;\n" % ordinal)
+
+ def start_macro(self, name):
+ conversion = self.get_entry(name)
+ parameters = conversion.parameters
+ optional = parameters and parameters[0].optional
+ return parameters, optional, conversion.empty, conversion.environment
+
+ def get_entry(self, name):
+ entry = self.table.get(name)
+ if entry is None:
+ dbgmsg("get_entry(%s) failing; building default entry!" % `name`)
+ # not defined; build a default entry:
+ entry = TableEntry(name)
+ entry.has_content = 1
+ entry.parameters.append(Parameter("content"))
+ self.table[name] = entry
+ return entry
+
+ def get_env_entry(self, name):
+ entry = self.table.get(name)
+ if entry is None:
+ # not defined; build a default entry:
+ entry = TableEntry(name, 1)
+ entry.has_content = 1
+ entry.parameters.append(Parameter("content"))
+ entry.parameters[-1].implied = 1
+ self.table[name] = entry
+ elif not entry.environment:
+ raise LaTeXFormatError(
+ name + " is defined as a macro; expected environment")
+ return entry
+
+ def dump_attr(self, pentry, value):
+ if not (pentry.name and value):
+ return
+ if _token_rx.match(value):
+ dtype = "TOKEN"
+ else:
+ dtype = "CDATA"
+ self.write("A%s %s %s\n" % (pentry.name, dtype, encode(value)))
+
+
+def convert(ifp, ofp, table):
+ c = Conversion(ifp, ofp, table)
+ try:
+ c.convert()
+ except IOError, (err, msg):
+ if err != errno.EPIPE:
+ raise
+
+
+def skip_white(line):
+ while line and line[0] in " %\n\t\r":
+ line = string.lstrip(line[1:])
+ return line
+
+
+
+class TableEntry:
+ def __init__(self, name, environment=0):
+ self.name = name
+ self.outputname = name
+ self.environment = environment
+ self.empty = not environment
+ self.has_content = 0
+ self.verbatim = 0
+ self.auto_close = 0
+ self.parameters = []
+ self.closes = []
+ self.endcloses = []
+
+class Parameter:
+ def __init__(self, type, name=None, optional=0):
+ self.type = type
+ self.name = name
+ self.optional = optional
+ self.text = ''
+ self.implied = 0
+
+
+class TableParser(XMLParser):
+ def __init__(self, table=None):
+ if table is None:
+ table = {}
+ self.__table = table
+ self.__current = None
+ self.__buffer = ''
+ XMLParser.__init__(self)
+
+ def get_table(self):
+ for entry in self.__table.values():
+ if entry.environment and not entry.has_content:
+ p = Parameter("content")
+ p.implied = 1
+ entry.parameters.append(p)
+ entry.has_content = 1
+ return self.__table
+
+ def start_environment(self, attrs):
+ name = attrs["name"]
+ self.__current = TableEntry(name, environment=1)
+ self.__current.verbatim = attrs.get("verbatim") == "yes"
+ if attrs.has_key("outputname"):
+ self.__current.outputname = attrs.get("outputname")
+ self.__current.endcloses = string.split(attrs.get("endcloses", ""))
+ def end_environment(self):
+ self.end_macro()
+
+ def start_macro(self, attrs):
+ name = attrs["name"]
+ self.__current = TableEntry(name)
+ self.__current.closes = string.split(attrs.get("closes", ""))
+ if attrs.has_key("outputname"):
+ self.__current.outputname = attrs.get("outputname")
+ def end_macro(self):
+ self.__table[self.__current.name] = self.__current
+ self.__current = None
+
+ def start_attribute(self, attrs):
+ name = attrs.get("name")
+ optional = attrs.get("optional") == "yes"
+ if name:
+ p = Parameter("attribute", name, optional=optional)
+ else:
+ p = Parameter("attribute", optional=optional)
+ self.__current.parameters.append(p)
+ self.__buffer = ''
+ def end_attribute(self):
+ self.__current.parameters[-1].text = self.__buffer
+
+ def start_entityref(self, attrs):
+ name = attrs["name"]
+ p = Parameter("entityref", name)
+ self.__current.parameters.append(p)
+
+ def start_child(self, attrs):
+ name = attrs["name"]
+ p = Parameter("child", name, attrs.get("optional") == "yes")
+ self.__current.parameters.append(p)
+ self.__current.empty = 0
+
+ def start_content(self, attrs):
+ p = Parameter("content")
+ p.implied = attrs.get("implied") == "yes"
+ if self.__current.environment:
+ p.implied = 1
+ self.__current.parameters.append(p)
+ self.__current.has_content = 1
+ self.__current.empty = 0
+
+ def start_text(self, attrs):
+ self.__current.empty = 0
+ self.__buffer = ''
+ def end_text(self):
+ p = Parameter("text")
+ p.text = self.__buffer
+ self.__current.parameters.append(p)
+
+ def handle_data(self, data):
+ self.__buffer = self.__buffer + data
+
+
+def load_table(fp, table=None):
+ parser = TableParser(table=table)
+ parser.feed(fp.read())
+ parser.close()
+ return parser.get_table()
+
+
+def main():
+ global DEBUG
+ #
+ opts, args = getopt.getopt(sys.argv[1:], "D", ["debug"])
+ for opt, arg in opts:
+ if opt in ("-D", "--debug"):
+ DEBUG = DEBUG + 1
+ if len(args) == 0:
+ ifp = sys.stdin
+ ofp = sys.stdout
+ elif len(args) == 1:
+ ifp = open(args)
+ ofp = sys.stdout
+ elif len(args) == 2:
+ ifp = open(args[0])
+ ofp = open(args[1], "w")
+ else:
+ usage()
+ sys.exit(2)
+
+ table = load_table(open(os.path.join(sys.path[0], 'conversion.xml')))
+ convert(ifp, ofp, table)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/doc/tools/sgmlconv/make.rules b/doc/tools/sgmlconv/make.rules
new file mode 100644
index 0000000..93579c5
--- /dev/null
+++ b/doc/tools/sgmlconv/make.rules
@@ -0,0 +1,48 @@
+# -*- makefile -*-
+#
+# Extra magic needed by the LaTeX->XML conversion process. This requires
+# $(TOOLSDIR) to be properly defined.
+
+DOCFIXER= $(TOOLSDIR)/sgmlconv/docfixer.py
+ESIS2ML= $(TOOLSDIR)/sgmlconv/esis2sgml.py
+LATEX2ESIS= $(TOOLSDIR)/sgmlconv/latex2esis.py
+CONVERSION= $(TOOLSDIR)/sgmlconv/conversion.xml
+
+ESISTARGETS= $(patsubst %.tex,%.esis,$(wildcard *.tex))
+ESIS1TARGETS= $(patsubst %.tex,%.esis1,$(wildcard *.tex))
+XMLTARGETS= $(patsubst %.tex,%.xml,$(wildcard *.tex))
+
+L2EFLAGS=
+
+all: xml
+
+esis: $(ESISTARGETS)
+esis1: $(ESIS1TARGETS)
+xml: $(XMLTARGETS)
+
+ESISTOOLS= $(TOOLSDIR)/sgmlconv/esistools.py
+
+$(ESISTARGETS): $(LATEX2ESIS) $(DOCFIXER) $(ESISTOOLS) $(CONVERSION)
+$(ESIS1TARGETS): $(LATEX2ESIS) $(CONVERSION)
+# This variant is easier to work with while debugging the conversion spec:
+#$(ESISTARGETS): $(LATEX2ESIS) $(DOCFIXER) $(ESISTOOLS)
+$(XMLTARGETS): $(ESIS2ML)
+
+
+.SUFFIXES: .esis .esis1 .tex .xml
+
+.tex.esis1:
+ $(LATEX2ESIS) $(L2EFLAGS) $< $@
+
+.esis1.esis:
+ $(DOCFIXER) $< $@
+
+.esis.xml:
+ $(ESIS2ML) --xml $< $@
+
+
+clean:
+ rm -f *.esis *.esis1
+
+clobber: clean
+ rm -f *.xml