Fred Drake | 30a68c7 | 1998-11-23 16:59:39 +0000 | [diff] [blame] | 1 | #! /usr/bin/env python |
| 2 | |
Fred Drake | 0eb7b2a | 1999-05-19 17:37:37 +0000 | [diff] [blame] | 3 | """Generate ESIS events based on a LaTeX source document and |
| 4 | configuration data. |
| 5 | |
| 6 | The conversion is not strong enough to work with arbitrary LaTeX |
| 7 | documents; it has only been designed to work with the highly stylized |
| 8 | markup used in the standard Python documentation. A lot of |
| 9 | information about specific markup is encoded in the control table |
| 10 | passed to the convert() function; changing this table can allow this |
| 11 | tool to support additional LaTeX markups. |
| 12 | |
| 13 | The format of the table is largely undocumented; see the commented |
| 14 | headers where the table is specified in main(). There is no provision |
| 15 | to load an alternate table from an external file. |
Fred Drake | 30a68c7 | 1998-11-23 16:59:39 +0000 | [diff] [blame] | 16 | """ |
Fred Drake | 30a68c7 | 1998-11-23 16:59:39 +0000 | [diff] [blame] | 17 | |
| 18 | import errno |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 19 | import getopt |
| 20 | import os |
Fred Drake | 30a68c7 | 1998-11-23 16:59:39 +0000 | [diff] [blame] | 21 | import re |
Fred Drake | 30a68c7 | 1998-11-23 16:59:39 +0000 | [diff] [blame] | 22 | import sys |
Fred Drake | 381832e | 2001-11-30 19:30:03 +0000 | [diff] [blame] | 23 | import xml.sax |
Fred Drake | 691a5a7 | 2000-11-22 17:56:43 +0000 | [diff] [blame] | 24 | import xml.sax.saxutils |
Fred Drake | 30a68c7 | 1998-11-23 16:59:39 +0000 | [diff] [blame] | 25 | |
Fred Drake | 2262a80 | 2001-03-23 16:53:34 +0000 | [diff] [blame] | 26 | from esistools import encode |
| 27 | |
| 28 | |
Fred Drake | d7acf02 | 1999-01-14 17:38:12 +0000 | [diff] [blame] | 29 | DEBUG = 0 |
| 30 | |
| 31 | |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 32 | class LaTeXFormatError(Exception): |
Fred Drake | 30a68c7 | 1998-11-23 16:59:39 +0000 | [diff] [blame] | 33 | pass |
| 34 | |
| 35 | |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 36 | class LaTeXStackError(LaTeXFormatError): |
| 37 | def __init__(self, found, stack): |
| 38 | msg = "environment close for %s doesn't match;\n stack = %s" \ |
| 39 | % (found, stack) |
| 40 | self.found = found |
| 41 | self.stack = stack[:] |
| 42 | LaTeXFormatError.__init__(self, msg) |
| 43 | |
| 44 | |
Fred Drake | 30a68c7 | 1998-11-23 16:59:39 +0000 | [diff] [blame] | 45 | _begin_env_rx = re.compile(r"[\\]begin{([^}]*)}") |
| 46 | _end_env_rx = re.compile(r"[\\]end{([^}]*)}") |
Fred Drake | 0eb7b2a | 1999-05-19 17:37:37 +0000 | [diff] [blame] | 47 | _begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)") |
Fred Drake | 96c00b0 | 1999-05-07 19:59:02 +0000 | [diff] [blame] | 48 | _comment_rx = re.compile("%+ ?(.*)\n[ \t]*") |
Fred Drake | 691a5a7 | 2000-11-22 17:56:43 +0000 | [diff] [blame] | 49 | _text_rx = re.compile(r"[^]~%\\{}]+") |
Fred Drake | b5fc0ab | 2001-07-06 21:01:19 +0000 | [diff] [blame] | 50 | _optional_rx = re.compile(r"\s*[[]([^]]*)[]]", re.MULTILINE) |
Fred Drake | aeea981 | 1998-12-01 19:04:12 +0000 | [diff] [blame] | 51 | # _parameter_rx is this complicated to allow {...} inside a parameter; |
| 52 | # this is useful to match tabular layout specifications like {c|p{24pt}} |
| 53 | _parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}") |
Fred Drake | 30a68c7 | 1998-11-23 16:59:39 +0000 | [diff] [blame] | 54 | _token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$") |
| 55 | _start_group_rx = re.compile("[ \n]*{") |
| 56 | _start_optional_rx = re.compile("[ \n]*[[]") |
| 57 | |
| 58 | |
Fred Drake | 42f5298 | 1998-11-30 14:45:24 +0000 | [diff] [blame] | 59 | ESCAPED_CHARS = "$%#^ {}&~" |
Fred Drake | 30a68c7 | 1998-11-23 16:59:39 +0000 | [diff] [blame] | 60 | |
| 61 | |
Fred Drake | f79acbd | 1999-05-07 21:12:21 +0000 | [diff] [blame] | 62 | def dbgmsg(msg): |
Fred Drake | d7acf02 | 1999-01-14 17:38:12 +0000 | [diff] [blame] | 63 | if DEBUG: |
Fred Drake | f79acbd | 1999-05-07 21:12:21 +0000 | [diff] [blame] | 64 | sys.stderr.write(msg + "\n") |
| 65 | |
| 66 | def pushing(name, point, depth): |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 67 | dbgmsg("pushing <%s> at %s" % (name, point)) |
Fred Drake | d7acf02 | 1999-01-14 17:38:12 +0000 | [diff] [blame] | 68 | |
| 69 | def popping(name, point, depth): |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 70 | dbgmsg("popping </%s> at %s" % (name, point)) |
Fred Drake | d7acf02 | 1999-01-14 17:38:12 +0000 | [diff] [blame] | 71 | |
| 72 | |
Fred Drake | df85f0b | 2002-10-16 16:00:42 +0000 | [diff] [blame] | 73 | class _Stack(list): |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 74 | def append(self, entry): |
Fred Drake | df85f0b | 2002-10-16 16:00:42 +0000 | [diff] [blame] | 75 | if not isinstance(entry, str): |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 76 | raise LaTeXFormatError("cannot push non-string on stack: " |
| 77 | + `entry`) |
Fred Drake | 2262a80 | 2001-03-23 16:53:34 +0000 | [diff] [blame] | 78 | #dbgmsg("%s<%s>" % (" "*len(self.data), entry)) |
Fred Drake | df85f0b | 2002-10-16 16:00:42 +0000 | [diff] [blame] | 79 | list.append(self, entry) |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 80 | |
| 81 | def pop(self, index=-1): |
Fred Drake | df85f0b | 2002-10-16 16:00:42 +0000 | [diff] [blame] | 82 | entry = self[index] |
| 83 | del self[index] |
| 84 | #dbgmsg("%s</%s>" % (" " * len(self), entry)) |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 85 | |
| 86 | def __delitem__(self, index): |
Fred Drake | df85f0b | 2002-10-16 16:00:42 +0000 | [diff] [blame] | 87 | entry = self[index] |
| 88 | list.__delitem__(self, index) |
| 89 | #dbgmsg("%s</%s>" % (" " * len(self), entry)) |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 90 | |
| 91 | |
| 92 | def new_stack(): |
| 93 | if DEBUG: |
| 94 | return _Stack() |
Fred Drake | df85f0b | 2002-10-16 16:00:42 +0000 | [diff] [blame] | 95 | else: |
| 96 | return [] |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 97 | |
| 98 | |
Fred Drake | 4fbdf97 | 1999-08-02 14:35:25 +0000 | [diff] [blame] | 99 | class Conversion: |
| 100 | def __init__(self, ifp, ofp, table): |
| 101 | self.write = ofp.write |
| 102 | self.ofp = ofp |
Fred Drake | 96c00b0 | 1999-05-07 19:59:02 +0000 | [diff] [blame] | 103 | self.table = table |
Fred Drake | 00c96ae | 2001-11-19 05:27:40 +0000 | [diff] [blame] | 104 | L = [s.rstrip() for s in ifp.readlines()] |
| 105 | L.append("") |
Fred Drake | df85f0b | 2002-10-16 16:00:42 +0000 | [diff] [blame] | 106 | self.line = "\n".join(L) |
Fred Drake | 96c00b0 | 1999-05-07 19:59:02 +0000 | [diff] [blame] | 107 | self.preamble = 1 |
Fred Drake | 96c00b0 | 1999-05-07 19:59:02 +0000 | [diff] [blame] | 108 | |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 109 | def convert(self): |
| 110 | self.subconvert() |
| 111 | |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 112 | def subconvert(self, endchar=None, depth=0): |
| 113 | # |
| 114 | # Parses content, including sub-structures, until the character |
| 115 | # 'endchar' is found (with no open structures), or until the end |
| 116 | # of the input data is endchar is None. |
| 117 | # |
| 118 | stack = new_stack() |
| 119 | line = self.line |
| 120 | while line: |
| 121 | if line[0] == endchar and not stack: |
| 122 | self.line = line |
| 123 | return line |
| 124 | m = _comment_rx.match(line) |
| 125 | if m: |
| 126 | text = m.group(1) |
| 127 | if text: |
| 128 | self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n" |
| 129 | % encode(text)) |
| 130 | line = line[m.end():] |
| 131 | continue |
| 132 | m = _begin_env_rx.match(line) |
| 133 | if m: |
| 134 | name = m.group(1) |
| 135 | entry = self.get_env_entry(name) |
| 136 | # re-write to use the macro handler |
| 137 | line = r"\%s %s" % (name, line[m.end():]) |
| 138 | continue |
| 139 | m = _end_env_rx.match(line) |
| 140 | if m: |
| 141 | # end of environment |
| 142 | envname = m.group(1) |
| 143 | entry = self.get_entry(envname) |
| 144 | while stack and envname != stack[-1] \ |
| 145 | and stack[-1] in entry.endcloses: |
| 146 | self.write(")%s\n" % stack.pop()) |
| 147 | if stack and envname == stack[-1]: |
| 148 | self.write(")%s\n" % entry.outputname) |
| 149 | del stack[-1] |
| 150 | else: |
| 151 | raise LaTeXStackError(envname, stack) |
| 152 | line = line[m.end():] |
| 153 | continue |
| 154 | m = _begin_macro_rx.match(line) |
| 155 | if m: |
| 156 | # start of macro |
| 157 | macroname = m.group(1) |
Fred Drake | 691a5a7 | 2000-11-22 17:56:43 +0000 | [diff] [blame] | 158 | if macroname == "c": |
| 159 | # Ugh! This is a combining character... |
| 160 | endpos = m.end() |
| 161 | self.combining_char("c", line[endpos]) |
| 162 | line = line[endpos + 1:] |
| 163 | continue |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 164 | entry = self.get_entry(macroname) |
| 165 | if entry.verbatim: |
| 166 | # magic case! |
Fred Drake | 0f9bfd3 | 2001-09-28 16:26:13 +0000 | [diff] [blame] | 167 | pos = line.find("\\end{%s}" % macroname) |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 168 | text = line[m.end(1):pos] |
| 169 | stack.append(entry.name) |
| 170 | self.write("(%s\n" % entry.outputname) |
| 171 | self.write("-%s\n" % encode(text)) |
| 172 | self.write(")%s\n" % entry.outputname) |
| 173 | stack.pop() |
| 174 | line = line[pos + len("\\end{%s}" % macroname):] |
| 175 | continue |
| 176 | while stack and stack[-1] in entry.closes: |
| 177 | top = stack.pop() |
| 178 | topentry = self.get_entry(top) |
| 179 | if topentry.outputname: |
| 180 | self.write(")%s\n-\\n\n" % topentry.outputname) |
| 181 | # |
Fred Drake | 9eda3ae | 2001-09-25 20:57:36 +0000 | [diff] [blame] | 182 | if entry.outputname and entry.empty: |
| 183 | self.write("e\n") |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 184 | # |
Fred Drake | 9eda3ae | 2001-09-25 20:57:36 +0000 | [diff] [blame] | 185 | params, optional, empty = self.start_macro(macroname) |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 186 | # rip off the macroname |
| 187 | if params: |
| 188 | line = line[m.end(1):] |
| 189 | elif empty: |
| 190 | line = line[m.end(1):] |
| 191 | else: |
| 192 | line = line[m.end():] |
| 193 | opened = 0 |
| 194 | implied_content = 0 |
| 195 | |
| 196 | # handle attribute mappings here: |
| 197 | for pentry in params: |
| 198 | if pentry.type == "attribute": |
| 199 | if pentry.optional: |
| 200 | m = _optional_rx.match(line) |
Fred Drake | 4fbdf97 | 1999-08-02 14:35:25 +0000 | [diff] [blame] | 201 | if m and entry.outputname: |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 202 | line = line[m.end():] |
| 203 | self.dump_attr(pentry, m.group(1)) |
Fred Drake | 4fbdf97 | 1999-08-02 14:35:25 +0000 | [diff] [blame] | 204 | elif pentry.text and entry.outputname: |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 205 | # value supplied by conversion spec: |
| 206 | self.dump_attr(pentry, pentry.text) |
| 207 | else: |
| 208 | m = _parameter_rx.match(line) |
| 209 | if not m: |
| 210 | raise LaTeXFormatError( |
| 211 | "could not extract parameter %s for %s: %s" |
| 212 | % (pentry.name, macroname, `line[:100]`)) |
Fred Drake | 4fbdf97 | 1999-08-02 14:35:25 +0000 | [diff] [blame] | 213 | if entry.outputname: |
| 214 | self.dump_attr(pentry, m.group(1)) |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 215 | line = line[m.end():] |
| 216 | elif pentry.type == "child": |
| 217 | if pentry.optional: |
| 218 | m = _optional_rx.match(line) |
| 219 | if m: |
| 220 | line = line[m.end():] |
| 221 | if entry.outputname and not opened: |
| 222 | opened = 1 |
| 223 | self.write("(%s\n" % entry.outputname) |
| 224 | stack.append(macroname) |
| 225 | stack.append(pentry.name) |
| 226 | self.write("(%s\n" % pentry.name) |
| 227 | self.write("-%s\n" % encode(m.group(1))) |
| 228 | self.write(")%s\n" % pentry.name) |
| 229 | stack.pop() |
| 230 | else: |
| 231 | if entry.outputname and not opened: |
| 232 | opened = 1 |
| 233 | self.write("(%s\n" % entry.outputname) |
| 234 | stack.append(entry.name) |
| 235 | self.write("(%s\n" % pentry.name) |
| 236 | stack.append(pentry.name) |
| 237 | self.line = skip_white(line)[1:] |
| 238 | line = self.subconvert( |
| 239 | "}", len(stack) + depth + 1)[1:] |
| 240 | self.write(")%s\n" % stack.pop()) |
| 241 | elif pentry.type == "content": |
| 242 | if pentry.implied: |
| 243 | implied_content = 1 |
| 244 | else: |
| 245 | if entry.outputname and not opened: |
| 246 | opened = 1 |
| 247 | self.write("(%s\n" % entry.outputname) |
| 248 | stack.append(entry.name) |
| 249 | line = skip_white(line) |
| 250 | if line[0] != "{": |
| 251 | raise LaTeXFormatError( |
| 252 | "missing content for " + macroname) |
| 253 | self.line = line[1:] |
| 254 | line = self.subconvert("}", len(stack) + depth + 1) |
| 255 | if line and line[0] == "}": |
| 256 | line = line[1:] |
Fred Drake | 4fbdf97 | 1999-08-02 14:35:25 +0000 | [diff] [blame] | 257 | elif pentry.type == "text" and pentry.text: |
| 258 | if entry.outputname and not opened: |
| 259 | opened = 1 |
| 260 | stack.append(entry.name) |
| 261 | self.write("(%s\n" % entry.outputname) |
Fred Drake | 2262a80 | 2001-03-23 16:53:34 +0000 | [diff] [blame] | 262 | #dbgmsg("--- text: %s" % `pentry.text`) |
Fred Drake | 4fbdf97 | 1999-08-02 14:35:25 +0000 | [diff] [blame] | 263 | self.write("-%s\n" % encode(pentry.text)) |
Fred Drake | f6199ed | 1999-08-26 17:54:16 +0000 | [diff] [blame] | 264 | elif pentry.type == "entityref": |
| 265 | self.write("&%s\n" % pentry.name) |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 266 | if entry.outputname: |
| 267 | if not opened: |
| 268 | self.write("(%s\n" % entry.outputname) |
| 269 | stack.append(entry.name) |
| 270 | if not implied_content: |
| 271 | self.write(")%s\n" % entry.outputname) |
| 272 | stack.pop() |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 273 | continue |
| 274 | if line[0] == endchar and not stack: |
| 275 | self.line = line[1:] |
| 276 | return self.line |
| 277 | if line[0] == "}": |
| 278 | # end of macro or group |
| 279 | macroname = stack[-1] |
| 280 | if macroname: |
Fred Drake | 2262a80 | 2001-03-23 16:53:34 +0000 | [diff] [blame] | 281 | conversion = self.table[macroname] |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 282 | if conversion.outputname: |
| 283 | # otherwise, it was just a bare group |
| 284 | self.write(")%s\n" % conversion.outputname) |
| 285 | del stack[-1] |
| 286 | line = line[1:] |
| 287 | continue |
Fred Drake | 691a5a7 | 2000-11-22 17:56:43 +0000 | [diff] [blame] | 288 | if line[0] == "~": |
| 289 | # don't worry about the "tie" aspect of this command |
| 290 | line = line[1:] |
| 291 | self.write("- \n") |
| 292 | continue |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 293 | if line[0] == "{": |
| 294 | stack.append("") |
| 295 | line = line[1:] |
| 296 | continue |
| 297 | if line[0] == "\\" and line[1] in ESCAPED_CHARS: |
| 298 | self.write("-%s\n" % encode(line[1])) |
| 299 | line = line[2:] |
| 300 | continue |
| 301 | if line[:2] == r"\\": |
| 302 | self.write("(BREAK\n)BREAK\n") |
| 303 | line = line[2:] |
| 304 | continue |
Fred Drake | 691a5a7 | 2000-11-22 17:56:43 +0000 | [diff] [blame] | 305 | if line[:2] == r"\_": |
| 306 | line = "_" + line[2:] |
| 307 | continue |
| 308 | if line[:2] in (r"\'", r'\"'): |
| 309 | # combining characters... |
| 310 | self.combining_char(line[1], line[2]) |
| 311 | line = line[3:] |
| 312 | continue |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 313 | m = _text_rx.match(line) |
| 314 | if m: |
| 315 | text = encode(m.group()) |
| 316 | self.write("-%s\n" % text) |
| 317 | line = line[m.end():] |
| 318 | continue |
| 319 | # special case because of \item[] |
| 320 | # XXX can we axe this??? |
| 321 | if line[0] == "]": |
| 322 | self.write("-]\n") |
| 323 | line = line[1:] |
| 324 | continue |
| 325 | # avoid infinite loops |
| 326 | extra = "" |
| 327 | if len(line) > 100: |
| 328 | extra = "..." |
| 329 | raise LaTeXFormatError("could not identify markup: %s%s" |
| 330 | % (`line[:100]`, extra)) |
| 331 | while stack: |
| 332 | entry = self.get_entry(stack[-1]) |
| 333 | if entry.closes: |
| 334 | self.write(")%s\n-%s\n" % (entry.outputname, encode("\n"))) |
| 335 | del stack[-1] |
| 336 | else: |
| 337 | break |
| 338 | if stack: |
| 339 | raise LaTeXFormatError("elements remain on stack: " |
Fred Drake | df85f0b | 2002-10-16 16:00:42 +0000 | [diff] [blame] | 340 | + ", ".join(stack)) |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 341 | # otherwise we just ran out of input here... |
| 342 | |
Fred Drake | 691a5a7 | 2000-11-22 17:56:43 +0000 | [diff] [blame] | 343 | # This is a really limited table of combinations, but it will have |
| 344 | # to do for now. |
| 345 | _combinations = { |
| 346 | ("c", "c"): 0x00E7, |
| 347 | ("'", "e"): 0x00E9, |
| 348 | ('"', "o"): 0x00F6, |
| 349 | } |
| 350 | |
| 351 | def combining_char(self, prefix, char): |
| 352 | ordinal = self._combinations[(prefix, char)] |
| 353 | self.write("-\\%%%d;\n" % ordinal) |
| 354 | |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 355 | def start_macro(self, name): |
| 356 | conversion = self.get_entry(name) |
| 357 | parameters = conversion.parameters |
| 358 | optional = parameters and parameters[0].optional |
Fred Drake | 9eda3ae | 2001-09-25 20:57:36 +0000 | [diff] [blame] | 359 | return parameters, optional, conversion.empty |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 360 | |
| 361 | def get_entry(self, name): |
| 362 | entry = self.table.get(name) |
| 363 | if entry is None: |
Fred Drake | 2262a80 | 2001-03-23 16:53:34 +0000 | [diff] [blame] | 364 | dbgmsg("get_entry(%s) failing; building default entry!" % `name`) |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 365 | # not defined; build a default entry: |
| 366 | entry = TableEntry(name) |
| 367 | entry.has_content = 1 |
| 368 | entry.parameters.append(Parameter("content")) |
| 369 | self.table[name] = entry |
| 370 | return entry |
| 371 | |
| 372 | def get_env_entry(self, name): |
| 373 | entry = self.table.get(name) |
| 374 | if entry is None: |
| 375 | # not defined; build a default entry: |
| 376 | entry = TableEntry(name, 1) |
| 377 | entry.has_content = 1 |
| 378 | entry.parameters.append(Parameter("content")) |
| 379 | entry.parameters[-1].implied = 1 |
| 380 | self.table[name] = entry |
| 381 | elif not entry.environment: |
| 382 | raise LaTeXFormatError( |
| 383 | name + " is defined as a macro; expected environment") |
| 384 | return entry |
| 385 | |
| 386 | def dump_attr(self, pentry, value): |
| 387 | if not (pentry.name and value): |
| 388 | return |
| 389 | if _token_rx.match(value): |
| 390 | dtype = "TOKEN" |
| 391 | else: |
| 392 | dtype = "CDATA" |
| 393 | self.write("A%s %s %s\n" % (pentry.name, dtype, encode(value))) |
| 394 | |
| 395 | |
Fred Drake | eac8abe | 1999-07-29 22:42:27 +0000 | [diff] [blame] | 396 | def convert(ifp, ofp, table): |
| 397 | c = Conversion(ifp, ofp, table) |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 398 | try: |
| 399 | c.convert() |
| 400 | except IOError, (err, msg): |
| 401 | if err != errno.EPIPE: |
| 402 | raise |
| 403 | |
| 404 | |
Fred Drake | d7acf02 | 1999-01-14 17:38:12 +0000 | [diff] [blame] | 405 | def skip_white(line): |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 406 | while line and line[0] in " %\n\t\r": |
Fred Drake | 0f9bfd3 | 2001-09-28 16:26:13 +0000 | [diff] [blame] | 407 | line = line[1:].lstrip() |
Fred Drake | d7acf02 | 1999-01-14 17:38:12 +0000 | [diff] [blame] | 408 | return line |
| 409 | |
| 410 | |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 411 | |
| 412 | class TableEntry: |
| 413 | def __init__(self, name, environment=0): |
| 414 | self.name = name |
| 415 | self.outputname = name |
| 416 | self.environment = environment |
| 417 | self.empty = not environment |
| 418 | self.has_content = 0 |
| 419 | self.verbatim = 0 |
| 420 | self.auto_close = 0 |
| 421 | self.parameters = [] |
| 422 | self.closes = [] |
| 423 | self.endcloses = [] |
| 424 | |
| 425 | class Parameter: |
| 426 | def __init__(self, type, name=None, optional=0): |
| 427 | self.type = type |
| 428 | self.name = name |
| 429 | self.optional = optional |
| 430 | self.text = '' |
| 431 | self.implied = 0 |
| 432 | |
| 433 | |
Fred Drake | 381832e | 2001-11-30 19:30:03 +0000 | [diff] [blame] | 434 | class TableHandler(xml.sax.handler.ContentHandler): |
| 435 | def __init__(self): |
| 436 | self.__table = {} |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 437 | self.__buffer = '' |
Fred Drake | 381832e | 2001-11-30 19:30:03 +0000 | [diff] [blame] | 438 | self.__methods = {} |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 439 | |
| 440 | def get_table(self): |
| 441 | for entry in self.__table.values(): |
| 442 | if entry.environment and not entry.has_content: |
| 443 | p = Parameter("content") |
| 444 | p.implied = 1 |
| 445 | entry.parameters.append(p) |
| 446 | entry.has_content = 1 |
| 447 | return self.__table |
| 448 | |
Fred Drake | 381832e | 2001-11-30 19:30:03 +0000 | [diff] [blame] | 449 | def startElement(self, tag, attrs): |
| 450 | try: |
| 451 | start, end = self.__methods[tag] |
| 452 | except KeyError: |
| 453 | start = getattr(self, "start_" + tag, None) |
| 454 | end = getattr(self, "end_" + tag, None) |
| 455 | self.__methods[tag] = (start, end) |
| 456 | if start: |
| 457 | start(attrs) |
| 458 | |
| 459 | def endElement(self, tag): |
| 460 | start, end = self.__methods[tag] |
| 461 | if end: |
| 462 | end() |
| 463 | |
| 464 | def endDocument(self): |
| 465 | self.__methods.clear() |
| 466 | |
| 467 | def characters(self, data): |
| 468 | self.__buffer += data |
| 469 | |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 470 | def start_environment(self, attrs): |
| 471 | name = attrs["name"] |
| 472 | self.__current = TableEntry(name, environment=1) |
| 473 | self.__current.verbatim = attrs.get("verbatim") == "yes" |
| 474 | if attrs.has_key("outputname"): |
| 475 | self.__current.outputname = attrs.get("outputname") |
Fred Drake | 0f9bfd3 | 2001-09-28 16:26:13 +0000 | [diff] [blame] | 476 | self.__current.endcloses = attrs.get("endcloses", "").split() |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 477 | def end_environment(self): |
| 478 | self.end_macro() |
| 479 | |
| 480 | def start_macro(self, attrs): |
| 481 | name = attrs["name"] |
| 482 | self.__current = TableEntry(name) |
Fred Drake | 0f9bfd3 | 2001-09-28 16:26:13 +0000 | [diff] [blame] | 483 | self.__current.closes = attrs.get("closes", "").split() |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 484 | if attrs.has_key("outputname"): |
| 485 | self.__current.outputname = attrs.get("outputname") |
| 486 | def end_macro(self): |
Fred Drake | 3c1ff5c | 2002-04-10 04:20:33 +0000 | [diff] [blame] | 487 | name = self.__current.name |
| 488 | if self.__table.has_key(name): |
| 489 | raise ValueError("name %s already in use" % `name`) |
| 490 | self.__table[name] = self.__current |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 491 | self.__current = None |
| 492 | |
| 493 | def start_attribute(self, attrs): |
| 494 | name = attrs.get("name") |
| 495 | optional = attrs.get("optional") == "yes" |
| 496 | if name: |
| 497 | p = Parameter("attribute", name, optional=optional) |
| 498 | else: |
| 499 | p = Parameter("attribute", optional=optional) |
| 500 | self.__current.parameters.append(p) |
| 501 | self.__buffer = '' |
| 502 | def end_attribute(self): |
| 503 | self.__current.parameters[-1].text = self.__buffer |
| 504 | |
Fred Drake | f6199ed | 1999-08-26 17:54:16 +0000 | [diff] [blame] | 505 | def start_entityref(self, attrs): |
| 506 | name = attrs["name"] |
| 507 | p = Parameter("entityref", name) |
| 508 | self.__current.parameters.append(p) |
| 509 | |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 510 | def start_child(self, attrs): |
| 511 | name = attrs["name"] |
| 512 | p = Parameter("child", name, attrs.get("optional") == "yes") |
| 513 | self.__current.parameters.append(p) |
| 514 | self.__current.empty = 0 |
| 515 | |
| 516 | def start_content(self, attrs): |
| 517 | p = Parameter("content") |
| 518 | p.implied = attrs.get("implied") == "yes" |
| 519 | if self.__current.environment: |
| 520 | p.implied = 1 |
| 521 | self.__current.parameters.append(p) |
| 522 | self.__current.has_content = 1 |
| 523 | self.__current.empty = 0 |
| 524 | |
| 525 | def start_text(self, attrs): |
Fred Drake | 4fbdf97 | 1999-08-02 14:35:25 +0000 | [diff] [blame] | 526 | self.__current.empty = 0 |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 527 | self.__buffer = '' |
| 528 | def end_text(self): |
| 529 | p = Parameter("text") |
| 530 | p.text = self.__buffer |
| 531 | self.__current.parameters.append(p) |
| 532 | |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 533 | |
Fred Drake | 381832e | 2001-11-30 19:30:03 +0000 | [diff] [blame] | 534 | def load_table(fp): |
| 535 | ch = TableHandler() |
| 536 | xml.sax.parse(fp, ch) |
| 537 | return ch.get_table() |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 538 | |
| 539 | |
Fred Drake | 30a68c7 | 1998-11-23 16:59:39 +0000 | [diff] [blame] | 540 | def main(): |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 541 | global DEBUG |
| 542 | # |
Fred Drake | eac8abe | 1999-07-29 22:42:27 +0000 | [diff] [blame] | 543 | opts, args = getopt.getopt(sys.argv[1:], "D", ["debug"]) |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 544 | for opt, arg in opts: |
Fred Drake | eac8abe | 1999-07-29 22:42:27 +0000 | [diff] [blame] | 545 | if opt in ("-D", "--debug"): |
Fred Drake | df85f0b | 2002-10-16 16:00:42 +0000 | [diff] [blame] | 546 | DEBUG += 1 |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 547 | if len(args) == 0: |
| 548 | ifp = sys.stdin |
Fred Drake | 30a68c7 | 1998-11-23 16:59:39 +0000 | [diff] [blame] | 549 | ofp = sys.stdout |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 550 | elif len(args) == 1: |
Fred Drake | d15a0a0 | 2002-04-05 18:09:22 +0000 | [diff] [blame] | 551 | ifp = open(args[0]) |
Fred Drake | 96e4a06 | 1999-07-29 22:22:13 +0000 | [diff] [blame] | 552 | ofp = sys.stdout |
| 553 | elif len(args) == 2: |
| 554 | ifp = open(args[0]) |
| 555 | ofp = open(args[1], "w") |
Fred Drake | 30a68c7 | 1998-11-23 16:59:39 +0000 | [diff] [blame] | 556 | else: |
| 557 | usage() |
| 558 | sys.exit(2) |
Fred Drake | eac8abe | 1999-07-29 22:42:27 +0000 | [diff] [blame] | 559 | |
| 560 | table = load_table(open(os.path.join(sys.path[0], 'conversion.xml'))) |
| 561 | convert(ifp, ofp, table) |
Fred Drake | 30a68c7 | 1998-11-23 16:59:39 +0000 | [diff] [blame] | 562 | |
| 563 | |
| 564 | if __name__ == "__main__": |
| 565 | main() |