initial source import
diff --git a/doc/tools/sgmlconv/latex2esis.py b/doc/tools/sgmlconv/latex2esis.py
new file mode 100755
index 0000000..74e1dc7
--- /dev/null
+++ b/doc/tools/sgmlconv/latex2esis.py
@@ -0,0 +1,555 @@
+#! /usr/bin/env python
+
+"""Generate ESIS events based on a LaTeX source document and
+configuration data.
+
+The conversion is not strong enough to work with arbitrary LaTeX
+documents; it has only been designed to work with the highly stylized
+markup used in the standard Python documentation.  A lot of
+information about specific markup is encoded in the control table
+passed to the convert() function; changing this table can allow this
+tool to support additional LaTeX markups.
+
+The format of the table is largely undocumented; see the commented
+headers where the table is specified in main().  There is no provision 
+to load an alternate table from an external file.
+"""
+
+import errno
+import getopt
+import os
+import re
+import string
+import sys
+import UserList
+import xml.sax.saxutils
+
+from types import ListType, StringType, TupleType
+
+try:
+    from xml.parsers.xmllib import XMLParser
+except ImportError:
+    from xmllib import XMLParser
+
+
+from esistools import encode
+
+
+DEBUG = 0
+
+
+class LaTeXFormatError(Exception):
+    pass
+
+
+class LaTeXStackError(LaTeXFormatError):
+    def __init__(self, found, stack):
+        msg = "environment close for %s doesn't match;\n  stack = %s" \
+              % (found, stack)
+        self.found = found
+        self.stack = stack[:]
+        LaTeXFormatError.__init__(self, msg)
+
+
+_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
+_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
+_begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)")
+_comment_rx = re.compile("%+ ?(.*)\n[ \t]*")
+_text_rx = re.compile(r"[^]~%\\{}]+")
+_optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
+# _parameter_rx is this complicated to allow {...} inside a parameter;
+# this is useful to match tabular layout specifications like {c|p{24pt}}
+_parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
+_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
+_start_group_rx = re.compile("[ \n]*{")
+_start_optional_rx = re.compile("[ \n]*[[]")
+
+
+ESCAPED_CHARS = "$%#^ {}&~"
+
+
+def dbgmsg(msg):
+    if DEBUG:
+        sys.stderr.write(msg + "\n")
+
+def pushing(name, point, depth):
+    dbgmsg("pushing <%s> at %s" % (name, point))
+
+def popping(name, point, depth):
+    dbgmsg("popping </%s> at %s" % (name, point))
+
+
+class _Stack(UserList.UserList):
+    def append(self, entry):
+        if type(entry) is not StringType:
+            raise LaTeXFormatError("cannot push non-string on stack: "
+                                   + `entry`)
+        #dbgmsg("%s<%s>" % (" "*len(self.data), entry))
+        self.data.append(entry)
+
+    def pop(self, index=-1):
+        entry = self.data[index]
+        del self.data[index]
+        #dbgmsg("%s</%s>" % (" "*len(self.data), entry))
+
+    def __delitem__(self, index):
+        entry = self.data[index]
+        del self.data[index]
+        #dbgmsg("%s</%s>" % (" "*len(self.data), entry))
+
+
+def new_stack():
+    if DEBUG:
+        return _Stack()
+    return []
+
+
+class Conversion:
+    def __init__(self, ifp, ofp, table):
+        self.write = ofp.write
+        self.ofp = ofp
+        self.table = table
+        self.line = string.join(map(string.rstrip, ifp.readlines()), "\n")
+        self.preamble = 1
+
+    def convert(self):
+        self.subconvert()
+
+    def subconvert(self, endchar=None, depth=0):
+        #
+        # Parses content, including sub-structures, until the character
+        # 'endchar' is found (with no open structures), or until the end
+        # of the input data is endchar is None.
+        #
+        stack = new_stack()
+        line = self.line
+        while line:
+            if line[0] == endchar and not stack:
+                self.line = line
+                return line
+            m = _comment_rx.match(line)
+            if m:
+                text = m.group(1)
+                if text:
+                    self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
+                               % encode(text))
+                line = line[m.end():]
+                continue
+            m = _begin_env_rx.match(line)
+            if m:
+                name = m.group(1)
+                entry = self.get_env_entry(name)
+                # re-write to use the macro handler
+                line = r"\%s %s" % (name, line[m.end():])
+                continue
+            m = _end_env_rx.match(line)
+            if m:
+                # end of environment
+                envname = m.group(1)
+                entry = self.get_entry(envname)
+                while stack and envname != stack[-1] \
+                      and stack[-1] in entry.endcloses:
+                    self.write(")%s\n" % stack.pop())
+                if stack and envname == stack[-1]:
+                    self.write(")%s\n" % entry.outputname)
+                    del stack[-1]
+                else:
+                    raise LaTeXStackError(envname, stack)
+                line = line[m.end():]
+                continue
+            m = _begin_macro_rx.match(line)
+            if m:
+                # start of macro
+                macroname = m.group(1)
+                if macroname == "c":
+                    # Ugh!  This is a combining character...
+                    endpos = m.end()
+                    self.combining_char("c", line[endpos])
+                    line = line[endpos + 1:]
+                    continue
+                entry = self.get_entry(macroname)
+                if entry.verbatim:
+                    # magic case!
+                    pos = string.find(line, "\\end{%s}" % macroname)
+                    text = line[m.end(1):pos]
+                    stack.append(entry.name)
+                    self.write("(%s\n" % entry.outputname)
+                    self.write("-%s\n" % encode(text))
+                    self.write(")%s\n" % entry.outputname)
+                    stack.pop()
+                    line = line[pos + len("\\end{%s}" % macroname):]
+                    continue
+                while stack and stack[-1] in entry.closes:
+                    top = stack.pop()
+                    topentry = self.get_entry(top)
+                    if topentry.outputname:
+                        self.write(")%s\n-\\n\n" % topentry.outputname)
+                #
+                if entry.outputname:
+                    if entry.empty:
+                        self.write("e\n")
+                #
+                params, optional, empty, environ = self.start_macro(macroname)
+                # rip off the macroname
+                if params:
+                    line = line[m.end(1):]
+                elif empty:
+                    line = line[m.end(1):]
+                else:
+                    line = line[m.end():]
+                opened = 0
+                implied_content = 0
+
+                # handle attribute mappings here:
+                for pentry in params:
+                    if pentry.type == "attribute":
+                        if pentry.optional:
+                            m = _optional_rx.match(line)
+                            if m and entry.outputname:
+                                line = line[m.end():]
+                                self.dump_attr(pentry, m.group(1))
+                        elif pentry.text and entry.outputname:
+                            # value supplied by conversion spec:
+                            self.dump_attr(pentry, pentry.text)
+                        else:
+                            m = _parameter_rx.match(line)
+                            if not m:
+                                raise LaTeXFormatError(
+                                    "could not extract parameter %s for %s: %s"
+                                    % (pentry.name, macroname, `line[:100]`))
+                            if entry.outputname:
+                                self.dump_attr(pentry, m.group(1))
+                            line = line[m.end():]
+                    elif pentry.type == "child":
+                        if pentry.optional:
+                            m = _optional_rx.match(line)
+                            if m:
+                                line = line[m.end():]
+                                if entry.outputname and not opened:
+                                    opened = 1
+                                    self.write("(%s\n" % entry.outputname)
+                                    stack.append(macroname)
+                                stack.append(pentry.name)
+                                self.write("(%s\n" % pentry.name)
+                                self.write("-%s\n" % encode(m.group(1)))
+                                self.write(")%s\n" % pentry.name)
+                                stack.pop()
+                        else:
+                            if entry.outputname and not opened:
+                                opened = 1
+                                self.write("(%s\n" % entry.outputname)
+                                stack.append(entry.name)
+                            self.write("(%s\n" % pentry.name)
+                            stack.append(pentry.name)
+                            self.line = skip_white(line)[1:]
+                            line = self.subconvert(
+                                "}", len(stack) + depth + 1)[1:]
+                            self.write(")%s\n" % stack.pop())
+                    elif pentry.type == "content":
+                        if pentry.implied:
+                            implied_content = 1
+                        else:
+                            if entry.outputname and not opened:
+                                opened = 1
+                                self.write("(%s\n" % entry.outputname)
+                                stack.append(entry.name)
+                            line = skip_white(line)
+                            if line[0] != "{":
+                                raise LaTeXFormatError(
+                                    "missing content for " + macroname)
+                            self.line = line[1:]
+                            line = self.subconvert("}", len(stack) + depth + 1)
+                            if line and line[0] == "}":
+                                line = line[1:]
+                    elif pentry.type == "text" and pentry.text:
+                        if entry.outputname and not opened:
+                            opened = 1
+                            stack.append(entry.name)
+                            self.write("(%s\n" % entry.outputname)
+                        #dbgmsg("--- text: %s" % `pentry.text`)
+                        self.write("-%s\n" % encode(pentry.text))
+                    elif pentry.type == "entityref":
+                        self.write("&%s\n" % pentry.name)
+                if entry.outputname:
+                    if not opened:
+                        self.write("(%s\n" % entry.outputname)
+                        stack.append(entry.name)
+                    if not implied_content:
+                        self.write(")%s\n" % entry.outputname)
+                        stack.pop()
+                continue
+            if line[0] == endchar and not stack:
+                self.line = line[1:]
+                return self.line
+            if line[0] == "}":
+                # end of macro or group
+                macroname = stack[-1]
+                if macroname:
+                    conversion = self.table[macroname]
+                    if conversion.outputname:
+                        # otherwise, it was just a bare group
+                        self.write(")%s\n" % conversion.outputname)
+                del stack[-1]
+                line = line[1:]
+                continue
+            if line[0] == "~":
+                # don't worry about the "tie" aspect of this command
+                line = line[1:]
+                self.write("- \n")
+                continue
+            if line[0] == "{":
+                stack.append("")
+                line = line[1:]
+                continue
+            if line[0] == "\\" and line[1] in ESCAPED_CHARS:
+                self.write("-%s\n" % encode(line[1]))
+                line = line[2:]
+                continue
+            if line[:2] == r"\\":
+                self.write("(BREAK\n)BREAK\n")
+                line = line[2:]
+                continue
+            if line[:2] == r"\_":
+                line = "_" + line[2:]
+                continue
+            if line[:2] in (r"\'", r'\"'):
+                # combining characters...
+                self.combining_char(line[1], line[2])
+                line = line[3:]
+                continue
+            m = _text_rx.match(line)
+            if m:
+                text = encode(m.group())
+                self.write("-%s\n" % text)
+                line = line[m.end():]
+                continue
+            # special case because of \item[]
+            # XXX can we axe this???
+            if line[0] == "]":
+                self.write("-]\n")
+                line = line[1:]
+                continue
+            # avoid infinite loops
+            extra = ""
+            if len(line) > 100:
+                extra = "..."
+            raise LaTeXFormatError("could not identify markup: %s%s"
+                                   % (`line[:100]`, extra))
+        while stack:
+            entry = self.get_entry(stack[-1])
+            if entry.closes:
+                self.write(")%s\n-%s\n" % (entry.outputname, encode("\n")))
+                del stack[-1]
+            else:
+                break
+        if stack:
+            raise LaTeXFormatError("elements remain on stack: "
+                                   + string.join(stack, ", "))
+        # otherwise we just ran out of input here...
+
+    # This is a really limited table of combinations, but it will have
+    # to do for now.
+    _combinations = {
+        ("c", "c"): 0x00E7,
+        ("'", "e"): 0x00E9,
+        ('"', "o"): 0x00F6,
+        }
+
+    def combining_char(self, prefix, char):
+        ordinal = self._combinations[(prefix, char)]
+        self.write("-\\%%%d;\n" % ordinal)
+
+    def start_macro(self, name):
+        conversion = self.get_entry(name)
+        parameters = conversion.parameters
+        optional = parameters and parameters[0].optional
+        return parameters, optional, conversion.empty, conversion.environment
+
+    def get_entry(self, name):
+        entry = self.table.get(name)
+        if entry is None:
+            dbgmsg("get_entry(%s) failing; building default entry!" % `name`)
+            # not defined; build a default entry:
+            entry = TableEntry(name)
+            entry.has_content = 1
+            entry.parameters.append(Parameter("content"))
+            self.table[name] = entry
+        return entry
+
+    def get_env_entry(self, name):
+        entry = self.table.get(name)
+        if entry is None:
+            # not defined; build a default entry:
+            entry = TableEntry(name, 1)
+            entry.has_content = 1
+            entry.parameters.append(Parameter("content"))
+            entry.parameters[-1].implied = 1
+            self.table[name] = entry
+        elif not entry.environment:
+            raise LaTeXFormatError(
+                name + " is defined as a macro; expected environment")
+        return entry
+
+    def dump_attr(self, pentry, value):
+        if not (pentry.name and value):
+            return
+        if _token_rx.match(value):
+            dtype = "TOKEN"
+        else:
+            dtype = "CDATA"
+        self.write("A%s %s %s\n" % (pentry.name, dtype, encode(value)))
+
+
+def convert(ifp, ofp, table):
+    c = Conversion(ifp, ofp, table)
+    try:
+        c.convert()
+    except IOError, (err, msg):
+        if err != errno.EPIPE:
+            raise
+
+
+def skip_white(line):
+    while line and line[0] in " %\n\t\r":
+        line = string.lstrip(line[1:])
+    return line
+
+
+
+class TableEntry:
+    def __init__(self, name, environment=0):
+        self.name = name
+        self.outputname = name
+        self.environment = environment
+        self.empty = not environment
+        self.has_content = 0
+        self.verbatim = 0
+        self.auto_close = 0
+        self.parameters = []
+        self.closes = []
+        self.endcloses = []
+
+class Parameter:
+    def __init__(self, type, name=None, optional=0):
+        self.type = type
+        self.name = name
+        self.optional = optional
+        self.text = ''
+        self.implied = 0
+
+
+class TableParser(XMLParser):
+    def __init__(self, table=None):
+        if table is None:
+            table = {}
+        self.__table = table
+        self.__current = None
+        self.__buffer = ''
+        XMLParser.__init__(self)
+
+    def get_table(self):
+        for entry in self.__table.values():
+            if entry.environment and not entry.has_content:
+                p = Parameter("content")
+                p.implied = 1
+                entry.parameters.append(p)
+                entry.has_content = 1
+        return self.__table
+
+    def start_environment(self, attrs):
+        name = attrs["name"]
+        self.__current = TableEntry(name, environment=1)
+        self.__current.verbatim = attrs.get("verbatim") == "yes"
+        if attrs.has_key("outputname"):
+            self.__current.outputname = attrs.get("outputname")
+        self.__current.endcloses = string.split(attrs.get("endcloses", ""))
+    def end_environment(self):
+        self.end_macro()
+
+    def start_macro(self, attrs):
+        name = attrs["name"]
+        self.__current = TableEntry(name)
+        self.__current.closes = string.split(attrs.get("closes", ""))
+        if attrs.has_key("outputname"):
+            self.__current.outputname = attrs.get("outputname")
+    def end_macro(self):
+        self.__table[self.__current.name] = self.__current
+        self.__current = None
+
+    def start_attribute(self, attrs):
+        name = attrs.get("name")
+        optional = attrs.get("optional") == "yes"
+        if name:
+            p = Parameter("attribute", name, optional=optional)
+        else:
+            p = Parameter("attribute", optional=optional)
+        self.__current.parameters.append(p)
+        self.__buffer = ''
+    def end_attribute(self):
+        self.__current.parameters[-1].text = self.__buffer
+
+    def start_entityref(self, attrs):
+        name = attrs["name"]
+        p = Parameter("entityref", name)
+        self.__current.parameters.append(p)
+
+    def start_child(self, attrs):
+        name = attrs["name"]
+        p = Parameter("child", name, attrs.get("optional") == "yes")
+        self.__current.parameters.append(p)
+        self.__current.empty = 0
+
+    def start_content(self, attrs):
+        p = Parameter("content")
+        p.implied = attrs.get("implied") == "yes"
+        if self.__current.environment:
+            p.implied = 1
+        self.__current.parameters.append(p)
+        self.__current.has_content = 1
+        self.__current.empty = 0
+
+    def start_text(self, attrs):
+        self.__current.empty = 0
+        self.__buffer = ''
+    def end_text(self):
+        p = Parameter("text")
+        p.text = self.__buffer
+        self.__current.parameters.append(p)
+
+    def handle_data(self, data):
+        self.__buffer = self.__buffer + data
+
+
+def load_table(fp, table=None):
+    parser = TableParser(table=table)
+    parser.feed(fp.read())
+    parser.close()
+    return parser.get_table()
+
+
+def main():
+    global DEBUG
+    #
+    opts, args = getopt.getopt(sys.argv[1:], "D", ["debug"])
+    for opt, arg in opts:
+        if opt in ("-D", "--debug"):
+            DEBUG = DEBUG + 1
+    if len(args) == 0:
+        ifp = sys.stdin
+        ofp = sys.stdout
+    elif len(args) == 1:
+        ifp = open(args)
+        ofp = sys.stdout
+    elif len(args) == 2:
+        ifp = open(args[0])
+        ofp = open(args[1], "w")
+    else:
+        usage()
+        sys.exit(2)
+
+    table = load_table(open(os.path.join(sys.path[0], 'conversion.xml')))
+    convert(ifp, ofp, table)
+
+
+if __name__ == "__main__":
+    main()