| #! /usr/bin/env python |
| |
| """Generate ESIS events based on a LaTeX source document and configuration |
| data. |
| """ |
| __version__ = '$Revision$' |
| |
| import errno |
| import re |
| import string |
| import StringIO |
| import sys |
| |
| from esistools import encode |
| |
| |
| DEBUG = 0 |
| |
| |
| class Error(Exception): |
| pass |
| |
| class LaTeXFormatError(Error): |
| pass |
| |
| |
| _begin_env_rx = re.compile(r"[\\]begin{([^}]*)}") |
| _end_env_rx = re.compile(r"[\\]end{([^}]*)}") |
| _begin_macro_rx = re.compile("[\\\\]([a-zA-Z]+[*]?)({|\\s*\n?)") |
| _comment_rx = re.compile("%+ ?(.*)\n *") |
| _text_rx = re.compile(r"[^]%\\{}]+") |
| _optional_rx = re.compile(r"\s*[[]([^]]*)[]]") |
| # _parameter_rx is this complicated to allow {...} inside a parameter; |
| # this is useful to match tabular layout specifications like {c|p{24pt}} |
| _parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}") |
| _token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$") |
| _start_group_rx = re.compile("[ \n]*{") |
| _start_optional_rx = re.compile("[ \n]*[[]") |
| |
| |
| ESCAPED_CHARS = "$%#^ {}&~" |
| |
| |
| def pushing(name, point, depth): |
| if DEBUG: |
| sys.stderr.write("%s<%s> at %s\n" % (" "*depth, name, point)) |
| |
| def popping(name, point, depth): |
| if DEBUG: |
| sys.stderr.write("%s</%s> at %s\n" % (" "*depth, name, point)) |
| |
| |
| def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0): |
| if DEBUG and endchar: |
| sys.stderr.write("subconvert(%s, ..., endchar=%s)\n" |
| % (`line[:20]`, `endchar`)) |
| stack = [] |
| while line: |
| if line[0] == endchar and not stack: |
| if DEBUG: |
| sys.stderr.write("subconvert() --> %s\n" % `line[1:21]`) |
| return line[1:] |
| m = _comment_rx.match(line) |
| if m: |
| text = m.group(1) |
| if text: |
| ofp.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n" % encode(text)) |
| line = line[m.end():] |
| continue |
| m = _begin_env_rx.match(line) |
| if m: |
| # re-write to use the macro handler |
| line = r"\%s %s" % (m.group(1), line[m.end():]) |
| continue |
| m = _end_env_rx.match(line) |
| if m: |
| # end of environment |
| envname = m.group(1) |
| if envname == "document": |
| # special magic |
| for n in stack[1:]: |
| if n not in autoclosing: |
| raise LaTeXFormatError("open element on stack: " + `n`) |
| # should be more careful, but this is easier to code: |
| stack = [] |
| ofp.write(")document\n") |
| elif envname == stack[-1]: |
| ofp.write(")%s\n" % envname) |
| del stack[-1] |
| popping(envname, "a", len(stack) + depth) |
| else: |
| sys.stderr.write("stack: %s\n" % `stack`) |
| raise LaTeXFormatError( |
| "environment close for %s doesn't match" % envname) |
| line = line[m.end():] |
| continue |
| m = _begin_macro_rx.match(line) |
| if m: |
| # start of macro |
| macroname = m.group(1) |
| if macroname == "verbatim": |
| # really magic case! |
| pos = string.find(line, "\\end{verbatim}") |
| text = line[m.end(1):pos] |
| ofp.write("(verbatim\n") |
| ofp.write("-%s\n" % encode(text)) |
| ofp.write(")verbatim\n") |
| line = line[pos + len("\\end{verbatim}"):] |
| continue |
| numbered = 1 |
| if macroname[-1] == "*": |
| macroname = macroname[:-1] |
| numbered = 0 |
| if macroname in autoclosing and macroname in stack: |
| while stack[-1] != macroname: |
| if stack[-1] and stack[-1] not in discards: |
| ofp.write(")%s\n-\\n\n" % stack[-1]) |
| popping(stack[-1], "b", len(stack) + depth - 1) |
| del stack[-1] |
| if macroname not in discards: |
| ofp.write("-\\n\n)%s\n-\\n\n" % macroname) |
| popping(macroname, "c", len(stack) + depth - 1) |
| del stack[-1] |
| real_ofp = ofp |
| if macroname in discards: |
| ofp = StringIO.StringIO() |
| # |
| conversion = table.get(macroname, ([], 0, 0, 0, 0)) |
| params, optional, empty, environ, nocontent = conversion |
| if empty: |
| ofp.write("e\n") |
| elif nocontent: |
| empty = 1 |
| if not numbered: |
| ofp.write("Anumbered TOKEN no\n") |
| opened = 0 |
| # rip off the macroname |
| if params: |
| if optional and len(params) == 1: |
| line = line = line[m.end():] |
| else: |
| line = line[m.end(1):] |
| elif empty: |
| line = line[m.end(1):] |
| else: |
| line = line[m.end():] |
| # |
| # Very ugly special case to deal with \item[]. The catch is that |
| # this needs to occur outside the for loop that handles attribute |
| # parsing so we can 'continue' the outer loop. |
| # |
| if optional and type(params[0]) is type(()): |
| # the attribute name isn't used in this special case |
| pushing(macroname, "a", depth + len(stack)) |
| stack.append(macroname) |
| ofp.write("(%s\n" % macroname) |
| m = _start_optional_rx.match(line) |
| if m: |
| line = line[m.end():] |
| line = subconvert(line, ofp, table, discards, |
| autoclosing, endchar="]", |
| depth=depth + len(stack)) |
| line = "}" + line |
| continue |
| # handle attribute mappings here: |
| for attrname in params: |
| if optional: |
| optional = 0 |
| if type(attrname) is type(""): |
| m = _optional_rx.match(line) |
| if m: |
| line = line[m.end():] |
| ofp.write("A%s TOKEN %s\n" |
| % (attrname, encode(m.group(1)))) |
| elif type(attrname) is type(()): |
| # This is a sub-element; but don't place the |
| # element we found on the stack (\section-like) |
| pushing(macroname, "b", len(stack) + depth) |
| stack.append(macroname) |
| ofp.write("(%s\n" % macroname) |
| macroname = attrname[0] |
| m = _start_group_rx.match(line) |
| if m: |
| line = line[m.end():] |
| elif type(attrname) is type([]): |
| # A normal subelement. |
| attrname = attrname[0] |
| if not opened: |
| opened = 1 |
| ofp.write("(%s\n" % macroname) |
| pushing(macroname, "c", len(stack) + depth) |
| ofp.write("(%s\n" % attrname) |
| pushing(attrname, "sub-elem", len(stack) + depth + 1) |
| line = subconvert(skip_white(line)[1:], ofp, table, |
| discards, autoclosing, endchar="}", |
| depth=depth + len(stack) + 2) |
| popping(attrname, "sub-elem", len(stack) + depth + 1) |
| ofp.write(")%s\n" % attrname) |
| else: |
| m = _parameter_rx.match(line) |
| if not m: |
| raise LaTeXFormatError( |
| "could not extract parameter %s for %s: %s" |
| % (attrname, macroname, `line[:100]`)) |
| value = m.group(1) |
| if _token_rx.match(value): |
| dtype = "TOKEN" |
| else: |
| dtype = "CDATA" |
| ofp.write("A%s %s %s\n" |
| % (attrname, dtype, encode(value))) |
| line = line[m.end():] |
| if params and type(params[-1]) is type('') \ |
| and (not empty) and not environ: |
| # attempt to strip off next '{' |
| m = _start_group_rx.match(line) |
| if not m: |
| raise LaTeXFormatError( |
| "non-empty element '%s' has no content: %s" |
| % (macroname, line[:12])) |
| line = line[m.end():] |
| if not opened: |
| ofp.write("(%s\n" % macroname) |
| pushing(macroname, "d", len(stack) + depth) |
| if empty: |
| line = "}" + line |
| stack.append(macroname) |
| ofp = real_ofp |
| continue |
| if line[0] == endchar and not stack: |
| if DEBUG: |
| sys.stderr.write("subconvert() --> %s\n" % `line[1:21]`) |
| return line[1:] |
| if line[0] == "}": |
| # end of macro |
| macroname = stack[-1] |
| conversion = table.get(macroname) |
| if macroname \ |
| and macroname not in discards \ |
| and type(conversion) is not type(""): |
| # otherwise, it was just a bare group |
| ofp.write(")%s\n" % stack[-1]) |
| popping(macroname, "d", len(stack) + depth - 1) |
| del stack[-1] |
| line = line[1:] |
| continue |
| if line[0] == "{": |
| pushing("", "e", len(stack) + depth) |
| stack.append("") |
| line = line[1:] |
| continue |
| if line[0] == "\\" and line[1] in ESCAPED_CHARS: |
| ofp.write("-%s\n" % encode(line[1])) |
| line = line[2:] |
| continue |
| if line[:2] == r"\\": |
| ofp.write("(BREAK\n)BREAK\n") |
| line = line[2:] |
| continue |
| m = _text_rx.match(line) |
| if m: |
| text = encode(m.group()) |
| ofp.write("-%s\n" % text) |
| line = line[m.end():] |
| continue |
| # special case because of \item[] |
| if line[0] == "]": |
| ofp.write("-]\n") |
| line = line[1:] |
| continue |
| # avoid infinite loops |
| extra = "" |
| if len(line) > 100: |
| extra = "..." |
| raise LaTeXFormatError("could not identify markup: %s%s" |
| % (`line[:100]`, extra)) |
| while stack and stack[-1] in autoclosing: |
| ofp.write("-\\n\n") |
| ofp.write(")%s\n" % stack[-1]) |
| popping(stack[-1], "e", len(stack) + depth - 1) |
| del stack[-1] |
| if stack: |
| raise LaTeXFormatError("elements remain on stack: " |
| + string.join(stack)) |
| # otherwise we just ran out of input here... |
| |
| |
| def convert(ifp, ofp, table={}, discards=(), autoclosing=()): |
| lines = string.split(ifp.read(), "\n") |
| for i in range(len(lines)): |
| lines[i] = string.rstrip(lines[i]) |
| data = string.join(lines, "\n") |
| try: |
| subconvert(data, ofp, table, discards, autoclosing) |
| except IOError, (err, msg): |
| if err != errno.EPIPE: |
| raise |
| |
| |
| def skip_white(line): |
| while line and line[0] in " %\n\t": |
| line = string.lstrip(line[1:]) |
| return line |
| |
| |
| def main(): |
| if len(sys.argv) == 2: |
| ifp = open(sys.argv[1]) |
| ofp = sys.stdout |
| elif len(sys.argv) == 3: |
| ifp = open(sys.argv[1]) |
| ofp = open(sys.argv[2], "w") |
| else: |
| usage() |
| sys.exit(2) |
| convert(ifp, ofp, { |
| # entries have the form: |
| # name: ([attribute names], first_is_optional, empty, isenv, nocontent) |
| "appendix": ([], 0, 1, 0, 0), |
| "bifuncindex": (["name"], 0, 1, 0, 0), |
| "catcode": ([], 0, 1, 0, 0), |
| "cfuncdesc": (["type", "name", ("args",)], 0, 0, 1, 0), |
| "chapter": ([("title",)], 0, 0, 0, 0), |
| "chapter*": ([("title",)], 0, 0, 0, 0), |
| "classdesc": (["name", ("constructor-args",)], 0, 0, 1, 0), |
| "ctypedesc": (["name"], 0, 0, 1, 0), |
| "cvardesc": (["type", "name"], 0, 0, 1, 0), |
| "datadesc": (["name"], 0, 0, 1, 0), |
| "declaremodule": (["id", "type", "name"], 1, 1, 0, 0), |
| "deprecated": (["release"], 0, 0, 0, 0), |
| "documentclass": (["classname"], 0, 1, 0, 0), |
| "excdesc": (["name"], 0, 0, 1, 0), |
| "funcdesc": (["name", ("args",)], 0, 0, 1, 0), |
| "funcdescni": (["name", ("args",)], 0, 0, 1, 0), |
| "geq": ([], 0, 1, 0, 0), |
| "hline": ([], 0, 1, 0, 0), |
| "indexii": (["ie1", "ie2"], 0, 1, 0, 0), |
| "indexiii": (["ie1", "ie2", "ie3"], 0, 1, 0, 0), |
| "indexiv": (["ie1", "ie2", "ie3", "ie4"], 0, 1, 0, 0), |
| "indexname": ([], 0, 0, 0, 0), |
| "input": (["source"], 0, 1, 0, 0), |
| "item": ([("leader",)], 1, 0, 0, 0), |
| "label": (["id"], 0, 1, 0, 0), |
| "labelwidth": ([], 0, 1, 0, 0), |
| "LaTeX": ([], 0, 1, 0, 0), |
| "leftmargin": ([], 0, 1, 0, 0), |
| "leq": ([], 0, 1, 0, 0), |
| "lineii": ([["entry"], ["entry"]], 0, 0, 0, 1), |
| "lineiii": ([["entry"], ["entry"], ["entry"]], 0, 0, 0, 1), |
| "lineiv": ([["entry"], ["entry"], ["entry"], ["entry"]], 0, 0, 0, 1), |
| "localmoduletable": ([], 0, 1, 0, 0), |
| "makeindex": ([], 0, 1, 0, 0), |
| "makemodindex": ([], 0, 1, 0, 0), |
| "maketitle": ([], 0, 1, 0, 0), |
| "manpage": (["name", "section"], 0, 1, 0, 0), |
| "memberdesc": (["class", "name"], 1, 0, 1, 0), |
| "methoddesc": (["class", "name", ("args",)], 1, 0, 1, 0), |
| "methoddescni": (["class", "name", ("args",)], 1, 0, 1, 0), |
| "moduleauthor": (["name", "email"], 0, 1, 0, 0), |
| "opcodedesc": (["name", "var"], 0, 0, 1, 0), |
| "par": ([], 0, 1, 0, 0), |
| "paragraph": ([("title",)], 0, 0, 0, 0), |
| "renewcommand": (["macro"], 0, 0, 0, 0), |
| "rfc": (["number"], 0, 1, 0, 0), |
| "section": ([("title",)], 0, 0, 0, 0), |
| "sectionauthor": (["name", "email"], 0, 1, 0, 0), |
| "seemodule": (["ref", "name"], 1, 0, 0, 0), |
| "stindex": (["type"], 0, 1, 0, 0), |
| "subparagraph": ([("title",)], 0, 0, 0, 0), |
| "subsection": ([("title",)], 0, 0, 0, 0), |
| "subsubsection": ([("title",)], 0, 0, 0, 0), |
| "list": (["bullet", "init"], 0, 0, 1, 0), |
| "tableii": (["colspec", "style", |
| ["entry"], ["entry"]], 0, 0, 1, 0), |
| "tableiii": (["colspec", "style", |
| ["entry"], ["entry"], ["entry"]], 0, 0, 1, 0), |
| "tableiv": (["colspec", "style", |
| ["entry"], ["entry"], ["entry"], ["entry"]], 0, 0, 1, 0), |
| "version": ([], 0, 1, 0, 0), |
| "versionadded": (["version"], 0, 1, 0, 0), |
| "versionchanged": (["version"], 0, 1, 0, 0), |
| "withsubitem": (["text"], 0, 0, 0, 0), |
| # |
| "ABC": ([], 0, 1, 0, 0), |
| "ASCII": ([], 0, 1, 0, 0), |
| "C": ([], 0, 1, 0, 0), |
| "Cpp": ([], 0, 1, 0, 0), |
| "EOF": ([], 0, 1, 0, 0), |
| "e": ([], 0, 1, 0, 0), |
| "ldots": ([], 0, 1, 0, 0), |
| "NULL": ([], 0, 1, 0, 0), |
| "POSIX": ([], 0, 1, 0, 0), |
| "UNIX": ([], 0, 1, 0, 0), |
| # |
| # Things that will actually be going away! |
| # |
| "fi": ([], 0, 1, 0, 0), |
| "ifhtml": ([], 0, 1, 0, 0), |
| "makeindex": ([], 0, 1, 0, 0), |
| "makemodindex": ([], 0, 1, 0, 0), |
| "maketitle": ([], 0, 1, 0, 0), |
| "noindent": ([], 0, 1, 0, 0), |
| "protect": ([], 0, 1, 0, 0), |
| "tableofcontents": ([], 0, 1, 0, 0), |
| }, |
| discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle", |
| "noindent", "tableofcontents"], |
| autoclosing=["chapter", "section", "subsection", "subsubsection", |
| "paragraph", "subparagraph", ]) |
| |
| |
| if __name__ == "__main__": |
| main() |