| #! /usr/bin/env python |
| |
| """Generate ESIS events based on a LaTeX source document and configuration |
| data. |
| """ |
| __version__ = '$Revision$' |
| |
| import errno |
| import re |
| import string |
| import StringIO |
| import sys |
| |
| from esistools import encode |
| from types import ListType, StringType, TupleType |
| |
| |
| DEBUG = 0 |
| |
| |
| class Error(Exception): |
| pass |
| |
| class LaTeXFormatError(Error): |
| pass |
| |
| |
| _begin_env_rx = re.compile(r"[\\]begin{([^}]*)}") |
| _end_env_rx = re.compile(r"[\\]end{([^}]*)}") |
| _begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?)({|\s*\n?)") |
| _comment_rx = re.compile("%+ ?(.*)\n[ \t]*") |
| _text_rx = re.compile(r"[^]%\\{}]+") |
| _optional_rx = re.compile(r"\s*[[]([^]]*)[]]") |
| # _parameter_rx is this complicated to allow {...} inside a parameter; |
| # this is useful to match tabular layout specifications like {c|p{24pt}} |
| _parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}") |
| _token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$") |
| _start_group_rx = re.compile("[ \n]*{") |
| _start_optional_rx = re.compile("[ \n]*[[]") |
| |
| |
| ESCAPED_CHARS = "$%#^ {}&~" |
| |
| |
| def dbgmsg(msg): |
| if DEBUG: |
| sys.stderr.write(msg + "\n") |
| |
| def pushing(name, point, depth): |
| dbgmsg("%s<%s> at %s" % (" "*depth, name, point)) |
| |
| def popping(name, point, depth): |
| dbgmsg("%s</%s> at %s" % (" "*depth, name, point)) |
| |
| |
| class Conversion: |
| def __init__(self, ifp, ofp, table=None, discards=(), autoclosing=()): |
| self.ofp_stack = [ofp] |
| self.pop_output() |
| self.table = table |
| self.discards = discards |
| self.autoclosing = autoclosing |
| self.line = string.join(map(string.rstrip, ifp.readlines()), "\n") |
| self.err_write = sys.stderr.write |
| self.preamble = 1 |
| |
| def push_output(self, ofp): |
| self.ofp_stack.append(self.ofp) |
| self.ofp = ofp |
| self.write = ofp.write |
| |
| def pop_output(self): |
| self.ofp = self.ofp_stack.pop() |
| self.write = self.ofp.write |
| |
| def subconvert(self, endchar=None, depth=0): |
| stack = [] |
| line = self.line |
| if DEBUG and endchar: |
| self.err_write( |
| "subconvert(%s)\n line = %s\n" % (`endchar`, `line[:20]`)) |
| while line: |
| if line[0] == endchar and not stack: |
| if DEBUG: |
| self.err_write("subconvert() --> %s\n" % `line[1:21]`) |
| self.line = line |
| return line |
| m = _comment_rx.match(line) |
| if m: |
| text = m.group(1) |
| if text: |
| self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n" |
| % encode(text)) |
| line = line[m.end():] |
| continue |
| m = _begin_env_rx.match(line) |
| if m: |
| # re-write to use the macro handler |
| line = r"\%s %s" % (m.group(1), line[m.end():]) |
| continue |
| m = _end_env_rx.match(line) |
| if m: |
| # end of environment |
| envname = m.group(1) |
| if envname == "document": |
| # special magic |
| for n in stack[1:]: |
| if n not in self.autoclosing: |
| raise LaTeXFormatError( |
| "open element on stack: " + `n`) |
| # should be more careful, but this is easier to code: |
| stack = [] |
| self.write(")document\n") |
| elif stack and envname == stack[-1]: |
| self.write(")%s\n" % envname) |
| del stack[-1] |
| popping(envname, "a", len(stack) + depth) |
| else: |
| self.err_write("stack: %s\n" % `stack`) |
| raise LaTeXFormatError( |
| "environment close for %s doesn't match" % envname) |
| line = line[m.end():] |
| continue |
| m = _begin_macro_rx.match(line) |
| if m: |
| # start of macro |
| macroname = m.group(1) |
| if macroname == "verbatim": |
| # really magic case! |
| pos = string.find(line, "\\end{verbatim}") |
| text = line[m.end(1):pos] |
| self.write("(verbatim\n") |
| self.write("-%s\n" % encode(text)) |
| self.write(")verbatim\n") |
| line = line[pos + len("\\end{verbatim}"):] |
| continue |
| numbered = 1 |
| opened = 0 |
| if macroname[-1] == "*": |
| macroname = macroname[:-1] |
| numbered = 0 |
| if macroname in self.autoclosing and macroname in stack: |
| while stack[-1] != macroname: |
| top = stack.pop() |
| if top and top not in self.discards: |
| self.write(")%s\n-\\n\n" % top) |
| popping(top, "b", len(stack) + depth) |
| if macroname not in self.discards: |
| self.write("-\\n\n)%s\n-\\n\n" % macroname) |
| popping(macroname, "c", len(stack) + depth - 1) |
| del stack[-1] |
| # |
| if macroname in self.discards: |
| self.push_output(StringIO.StringIO()) |
| else: |
| self.push_output(self.ofp) |
| # |
| params, optional, empty, environ = self.start_macro(macroname) |
| if not numbered: |
| self.write("Anumbered TOKEN no\n") |
| # rip off the macroname |
| if params: |
| ## if optional and len(params) == 1: |
| ## line = line[m.end():] |
| ## else: |
| line = line[m.end(1):] |
| elif empty: |
| line = line[m.end(1):] |
| else: |
| line = line[m.end():] |
| # |
| # Very ugly special case to deal with \item[]. The catch |
| # is that this needs to occur outside the for loop that |
| # handles attribute parsing so we can 'continue' the outer |
| # loop. |
| # |
| if optional and type(params[0]) is TupleType: |
| # the attribute name isn't used in this special case |
| pushing(macroname, "a", depth + len(stack)) |
| stack.append(macroname) |
| self.write("(%s\n" % macroname) |
| m = _start_optional_rx.match(line) |
| if m: |
| self.line = line[m.end():] |
| line = self.subconvert("]", depth + len(stack)) |
| line = "}" + line |
| continue |
| # handle attribute mappings here: |
| for attrname in params: |
| if optional: |
| optional = 0 |
| if type(attrname) is StringType: |
| m = _optional_rx.match(line) |
| if m: |
| line = line[m.end():] |
| self.write("A%s TOKEN %s\n" |
| % (attrname, encode(m.group(1)))) |
| elif type(attrname) is TupleType: |
| # This is a sub-element; but don't place the |
| # element we found on the stack (\section-like) |
| pushing(macroname, "b", len(stack) + depth) |
| stack.append(macroname) |
| self.write("(%s\n" % macroname) |
| macroname = attrname[0] |
| m = _start_group_rx.match(line) |
| if m: |
| line = line[m.end():] |
| elif type(attrname) is ListType: |
| # A normal subelement: <macroname><attrname>...</>... |
| attrname = attrname[0] |
| if not opened: |
| opened = 1 |
| self.write("(%s\n" % macroname) |
| pushing(macroname, "c", len(stack) + depth) |
| self.write("(%s\n" % attrname) |
| pushing(attrname, "sub-elem", len(stack) + depth + 1) |
| self.line = skip_white(line)[1:] |
| line = self.subconvert("}", len(stack) + depth + 1)[1:] |
| popping(attrname, "sub-elem", len(stack) + depth + 1) |
| self.write(")%s\n" % attrname) |
| else: |
| m = _parameter_rx.match(line) |
| if not m: |
| raise LaTeXFormatError( |
| "could not extract parameter %s for %s: %s" |
| % (attrname, macroname, `line[:100]`)) |
| value = m.group(1) |
| if _token_rx.match(value): |
| dtype = "TOKEN" |
| else: |
| dtype = "CDATA" |
| self.write("A%s %s %s\n" |
| % (attrname, dtype, encode(value))) |
| line = line[m.end():] |
| if params and type(params[-1]) is StringType \ |
| and (not empty) and not environ: |
| # attempt to strip off next '{' |
| m = _start_group_rx.match(line) |
| if not m: |
| raise LaTeXFormatError( |
| "non-empty element '%s' has no content: %s" |
| % (macroname, line[:12])) |
| line = line[m.end():] |
| if not opened: |
| self.write("(%s\n" % macroname) |
| pushing(macroname, "d", len(stack) + depth) |
| if empty: |
| line = "}" + line |
| stack.append(macroname) |
| self.pop_output() |
| continue |
| if line[0] == endchar and not stack: |
| if DEBUG: |
| self.err_write("subconvert() --> %s\n" % `line[1:21]`) |
| self.line = line[1:] |
| return self.line |
| if line[0] == "}": |
| # end of macro or group |
| macroname = stack[-1] |
| conversion = self.table.get(macroname) |
| if macroname \ |
| and macroname not in self.discards \ |
| and type(conversion) is not StringType: |
| # otherwise, it was just a bare group |
| self.write(")%s\n" % stack[-1]) |
| popping(macroname, "d", len(stack) + depth - 1) |
| del stack[-1] |
| line = line[1:] |
| continue |
| if line[0] == "{": |
| pushing("", "e", len(stack) + depth) |
| stack.append("") |
| line = line[1:] |
| continue |
| if line[0] == "\\" and line[1] in ESCAPED_CHARS: |
| self.write("-%s\n" % encode(line[1])) |
| line = line[2:] |
| continue |
| if line[:2] == r"\\": |
| self.write("(BREAK\n)BREAK\n") |
| line = line[2:] |
| continue |
| m = _text_rx.match(line) |
| if m: |
| text = encode(m.group()) |
| self.write("-%s\n" % text) |
| line = line[m.end():] |
| continue |
| # special case because of \item[] |
| if line[0] == "]": |
| self.write("-]\n") |
| line = line[1:] |
| continue |
| # avoid infinite loops |
| extra = "" |
| if len(line) > 100: |
| extra = "..." |
| raise LaTeXFormatError("could not identify markup: %s%s" |
| % (`line[:100]`, extra)) |
| while stack and stack[-1] in self.autoclosing: |
| self.write("-\\n\n") |
| self.write(")%s\n" % stack[-1]) |
| popping(stack.pop(), "e", len(stack) + depth - 1) |
| if stack: |
| raise LaTeXFormatError("elements remain on stack: " |
| + string.join(stack, ", ")) |
| # otherwise we just ran out of input here... |
| |
| def convert(self): |
| self.subconvert() |
| |
| def start_macro(self, name): |
| conversion = self.table.get(name, ([], 0, 0, 0, 0)) |
| params, optional, empty, environ, nocontent = conversion |
| if empty: |
| self.write("e\n") |
| elif nocontent: |
| empty = 1 |
| return params, optional, empty, environ |
| |
| |
| def convert(ifp, ofp, table={}, discards=(), autoclosing=()): |
| c = Conversion(ifp, ofp, table, discards, autoclosing) |
| try: |
| c.convert() |
| except IOError, (err, msg): |
| if err != errno.EPIPE: |
| raise |
| |
| |
| def skip_white(line): |
| while line and line[0] in " %\n\t": |
| line = string.lstrip(line[1:]) |
| return line |
| |
| |
| def main(): |
| if len(sys.argv) == 2: |
| ifp = open(sys.argv[1]) |
| ofp = sys.stdout |
| elif len(sys.argv) == 3: |
| ifp = open(sys.argv[1]) |
| ofp = open(sys.argv[2], "w") |
| else: |
| usage() |
| sys.exit(2) |
| convert(ifp, ofp, { |
| # entries have the form: |
| # name: ([attribute names], is1stOptional, isEmpty, isEnv, nocontent) |
| # attribute names can be: |
| # "string" -- normal attribute |
| # ("string",) -- sub-element with content of macro; like for \section |
| # ["string"] -- sub-element |
| "appendix": ([], 0, 1, 0, 0), |
| "bifuncindex": (["name"], 0, 1, 0, 0), |
| "catcode": ([], 0, 1, 0, 0), |
| "cfuncdesc": (["type", "name", ("args",)], 0, 0, 1, 0), |
| "chapter": ([("title",)], 0, 0, 0, 0), |
| "chapter*": ([("title",)], 0, 0, 0, 0), |
| "classdesc": (["name", ("args",)], 0, 0, 1, 0), |
| "ctypedesc": (["name"], 0, 0, 1, 0), |
| "cvardesc": (["type", "name"], 0, 0, 1, 0), |
| "datadesc": (["name"], 0, 0, 1, 0), |
| "declaremodule": (["id", "type", "name"], 1, 1, 0, 0), |
| "deprecated": (["release"], 0, 0, 0, 0), |
| "documentclass": (["classname"], 0, 1, 0, 0), |
| "excdesc": (["name"], 0, 0, 1, 0), |
| "funcdesc": (["name", ("args",)], 0, 0, 1, 0), |
| "funcdescni": (["name", ("args",)], 0, 0, 1, 0), |
| "funcline": (["name"], 0, 0, 0, 0), |
| "funclineni": (["name"], 0, 0, 0, 0), |
| "geq": ([], 0, 1, 0, 0), |
| "hline": ([], 0, 1, 0, 0), |
| "indexii": (["ie1", "ie2"], 0, 1, 0, 0), |
| "indexiii": (["ie1", "ie2", "ie3"], 0, 1, 0, 0), |
| "indexiv": (["ie1", "ie2", "ie3", "ie4"], 0, 1, 0, 0), |
| "indexname": ([], 0, 0, 0, 0), |
| "input": (["source"], 0, 1, 0, 0), |
| "item": ([("leader",)], 1, 0, 0, 0), |
| "label": (["id"], 0, 1, 0, 0), |
| "labelwidth": ([], 0, 1, 0, 0), |
| "large": ([], 0, 1, 0, 0), |
| "LaTeX": ([], 0, 1, 0, 0), |
| "leftmargin": ([], 0, 1, 0, 0), |
| "leq": ([], 0, 1, 0, 0), |
| "lineii": ([["entry"], ["entry"]], 0, 0, 0, 1), |
| "lineiii": ([["entry"], ["entry"], ["entry"]], 0, 0, 0, 1), |
| "lineiv": ([["entry"], ["entry"], ["entry"], ["entry"]], 0, 0, 0, 1), |
| "localmoduletable": ([], 0, 1, 0, 0), |
| "makeindex": ([], 0, 1, 0, 0), |
| "makemodindex": ([], 0, 1, 0, 0), |
| "maketitle": ([], 0, 1, 0, 0), |
| "manpage": (["name", "section"], 0, 1, 0, 0), |
| "memberdesc": (["class", "name"], 1, 0, 1, 0), |
| "methoddesc": (["class", "name", ("args",)], 1, 0, 1, 0), |
| "methoddescni": (["class", "name", ("args",)], 1, 0, 1, 0), |
| "methodline": (["class", "name"], 1, 0, 0, 0), |
| "methodlineni": (["class", "name"], 1, 0, 0, 0), |
| "moduleauthor": (["name", "email"], 0, 1, 0, 0), |
| "opcodedesc": (["name", "var"], 0, 0, 1, 0), |
| "par": ([], 0, 1, 0, 0), |
| "paragraph": ([("title",)], 0, 0, 0, 0), |
| "refbimodindex": (["name"], 0, 1, 0, 0), |
| "refexmodindex": (["name"], 0, 1, 0, 0), |
| "refmodindex": (["name"], 0, 1, 0, 0), |
| "refstmodindex": (["name"], 0, 1, 0, 0), |
| "refmodule": (["ref"], 1, 0, 0, 0), |
| "renewcommand": (["macro"], 0, 0, 0, 0), |
| "rfc": (["num"], 0, 1, 0, 0), |
| "section": ([("title",)], 0, 0, 0, 0), |
| "sectionauthor": (["name", "email"], 0, 1, 0, 0), |
| "seemodule": (["ref", "name"], 1, 0, 0, 0), |
| "stindex": (["type"], 0, 1, 0, 0), |
| "subparagraph": ([("title",)], 0, 0, 0, 0), |
| "subsection": ([("title",)], 0, 0, 0, 0), |
| "subsubsection": ([("title",)], 0, 0, 0, 0), |
| "list": (["bullet", "init"], 0, 0, 1, 0), |
| "tableii": (["colspec", "style", |
| ["entry"], ["entry"]], 0, 0, 1, 0), |
| "tableiii": (["colspec", "style", |
| ["entry"], ["entry"], ["entry"]], 0, 0, 1, 0), |
| "tableiv": (["colspec", "style", |
| ["entry"], ["entry"], ["entry"], ["entry"]], 0, 0, 1, 0), |
| "version": ([], 0, 1, 0, 0), |
| "versionadded": (["version"], 0, 1, 0, 0), |
| "versionchanged": (["version"], 0, 1, 0, 0), |
| "withsubitem": (["text"], 0, 0, 0, 0), |
| # |
| "ABC": ([], 0, 1, 0, 0), |
| "ASCII": ([], 0, 1, 0, 0), |
| "C": ([], 0, 1, 0, 0), |
| "Cpp": ([], 0, 1, 0, 0), |
| "EOF": ([], 0, 1, 0, 0), |
| "e": ([], 0, 1, 0, 0), |
| "ldots": ([], 0, 1, 0, 0), |
| "NULL": ([], 0, 1, 0, 0), |
| "POSIX": ([], 0, 1, 0, 0), |
| "UNIX": ([], 0, 1, 0, 0), |
| # |
| # Things that will actually be going away! |
| # |
| "fi": ([], 0, 1, 0, 0), |
| "ifhtml": ([], 0, 1, 0, 0), |
| "makeindex": ([], 0, 1, 0, 0), |
| "makemodindex": ([], 0, 1, 0, 0), |
| "maketitle": ([], 0, 1, 0, 0), |
| "noindent": ([], 0, 1, 0, 0), |
| "protect": ([], 0, 1, 0, 0), |
| "tableofcontents": ([], 0, 1, 0, 0), |
| }, |
| discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle", |
| "noindent", "tableofcontents"], |
| autoclosing=["chapter", "section", "subsection", "subsubsection", |
| "paragraph", "subparagraph", ]) |
| |
| |
| if __name__ == "__main__": |
| main() |