blob: b6e98229b15fdaeee7949d46bfc7de64887b45ea [file] [log] [blame]
Fred Drake30a68c71998-11-23 16:59:39 +00001#! /usr/bin/env python
2
Fred Drake0eb7b2a1999-05-19 17:37:37 +00003"""Generate ESIS events based on a LaTeX source document and
4configuration data.
5
6The conversion is not strong enough to work with arbitrary LaTeX
7documents; it has only been designed to work with the highly stylized
8markup used in the standard Python documentation. A lot of
9information about specific markup is encoded in the control table
10passed to the convert() function; changing this table can allow this
11tool to support additional LaTeX markups.
12
13The format of the table is largely undocumented; see the commented
14headers where the table is specified in main(). There is no provision
15to load an alternate table from an external file.
Fred Drake30a68c71998-11-23 16:59:39 +000016"""
17__version__ = '$Revision$'
18
19import errno
20import re
21import string
22import StringIO
23import sys
24
Fred Drakeaeea9811998-12-01 19:04:12 +000025from esistools import encode
Fred Drake54fb7fb1999-05-10 19:36:03 +000026from types import ListType, StringType, TupleType
Fred Drakeaeea9811998-12-01 19:04:12 +000027
Fred Drake30a68c71998-11-23 16:59:39 +000028
Fred Draked7acf021999-01-14 17:38:12 +000029DEBUG = 0
30
31
Fred Drake30a68c71998-11-23 16:59:39 +000032class Error(Exception):
33 pass
34
35class LaTeXFormatError(Error):
36 pass
37
38
39_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
40_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
Fred Drake0eb7b2a1999-05-19 17:37:37 +000041_begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)")
Fred Drake96c00b01999-05-07 19:59:02 +000042_comment_rx = re.compile("%+ ?(.*)\n[ \t]*")
Fred Drake30a68c71998-11-23 16:59:39 +000043_text_rx = re.compile(r"[^]%\\{}]+")
44_optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
Fred Drakeaeea9811998-12-01 19:04:12 +000045# _parameter_rx is this complicated to allow {...} inside a parameter;
46# this is useful to match tabular layout specifications like {c|p{24pt}}
47_parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
Fred Drake30a68c71998-11-23 16:59:39 +000048_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
49_start_group_rx = re.compile("[ \n]*{")
50_start_optional_rx = re.compile("[ \n]*[[]")
51
52
Fred Drake42f52981998-11-30 14:45:24 +000053ESCAPED_CHARS = "$%#^ {}&~"
Fred Drake30a68c71998-11-23 16:59:39 +000054
55
Fred Drakef79acbd1999-05-07 21:12:21 +000056def dbgmsg(msg):
Fred Draked7acf021999-01-14 17:38:12 +000057 if DEBUG:
Fred Drakef79acbd1999-05-07 21:12:21 +000058 sys.stderr.write(msg + "\n")
59
60def pushing(name, point, depth):
61 dbgmsg("%s<%s> at %s" % (" "*depth, name, point))
Fred Draked7acf021999-01-14 17:38:12 +000062
63def popping(name, point, depth):
Fred Drakef79acbd1999-05-07 21:12:21 +000064 dbgmsg("%s</%s> at %s" % (" "*depth, name, point))
Fred Draked7acf021999-01-14 17:38:12 +000065
66
Fred Drake96c00b01999-05-07 19:59:02 +000067class Conversion:
68 def __init__(self, ifp, ofp, table=None, discards=(), autoclosing=()):
69 self.ofp_stack = [ofp]
70 self.pop_output()
71 self.table = table
72 self.discards = discards
73 self.autoclosing = autoclosing
74 self.line = string.join(map(string.rstrip, ifp.readlines()), "\n")
75 self.err_write = sys.stderr.write
76 self.preamble = 1
77
78 def push_output(self, ofp):
79 self.ofp_stack.append(self.ofp)
80 self.ofp = ofp
81 self.write = ofp.write
82
83 def pop_output(self):
84 self.ofp = self.ofp_stack.pop()
85 self.write = self.ofp.write
86
87 def subconvert(self, endchar=None, depth=0):
Fred Drakef79acbd1999-05-07 21:12:21 +000088 stack = []
89 line = self.line
Fred Drake96c00b01999-05-07 19:59:02 +000090 if DEBUG and endchar:
91 self.err_write(
92 "subconvert(%s)\n line = %s\n" % (`endchar`, `line[:20]`))
Fred Drake96c00b01999-05-07 19:59:02 +000093 while line:
94 if line[0] == endchar and not stack:
95 if DEBUG:
96 self.err_write("subconvert() --> %s\n" % `line[1:21]`)
97 self.line = line
98 return line
99 m = _comment_rx.match(line)
100 if m:
101 text = m.group(1)
102 if text:
103 self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
104 % encode(text))
Fred Drake30a68c71998-11-23 16:59:39 +0000105 line = line[m.end():]
Fred Drake30a68c71998-11-23 16:59:39 +0000106 continue
Fred Drake96c00b01999-05-07 19:59:02 +0000107 m = _begin_env_rx.match(line)
108 if m:
109 # re-write to use the macro handler
110 line = r"\%s %s" % (m.group(1), line[m.end():])
111 continue
112 m = _end_env_rx.match(line)
113 if m:
114 # end of environment
115 envname = m.group(1)
116 if envname == "document":
117 # special magic
118 for n in stack[1:]:
119 if n not in self.autoclosing:
120 raise LaTeXFormatError(
121 "open element on stack: " + `n`)
122 # should be more careful, but this is easier to code:
123 stack = []
124 self.write(")document\n")
Fred Drakef79acbd1999-05-07 21:12:21 +0000125 elif stack and envname == stack[-1]:
Fred Drake96c00b01999-05-07 19:59:02 +0000126 self.write(")%s\n" % envname)
127 del stack[-1]
128 popping(envname, "a", len(stack) + depth)
129 else:
130 self.err_write("stack: %s\n" % `stack`)
131 raise LaTeXFormatError(
132 "environment close for %s doesn't match" % envname)
133 line = line[m.end():]
134 continue
135 m = _begin_macro_rx.match(line)
136 if m:
137 # start of macro
138 macroname = m.group(1)
139 if macroname == "verbatim":
140 # really magic case!
141 pos = string.find(line, "\\end{verbatim}")
142 text = line[m.end(1):pos]
143 self.write("(verbatim\n")
144 self.write("-%s\n" % encode(text))
145 self.write(")verbatim\n")
146 line = line[pos + len("\\end{verbatim}"):]
147 continue
148 numbered = 1
149 opened = 0
150 if macroname[-1] == "*":
151 macroname = macroname[:-1]
152 numbered = 0
153 if macroname in self.autoclosing and macroname in stack:
154 while stack[-1] != macroname:
155 top = stack.pop()
156 if top and top not in self.discards:
157 self.write(")%s\n-\\n\n" % top)
158 popping(top, "b", len(stack) + depth)
159 if macroname not in self.discards:
160 self.write("-\\n\n)%s\n-\\n\n" % macroname)
161 popping(macroname, "c", len(stack) + depth - 1)
162 del stack[-1]
163 #
164 if macroname in self.discards:
165 self.push_output(StringIO.StringIO())
166 else:
167 self.push_output(self.ofp)
168 #
169 params, optional, empty, environ = self.start_macro(macroname)
170 if not numbered:
171 self.write("Anumbered TOKEN no\n")
172 # rip off the macroname
173 if params:
Fred Drake96c00b01999-05-07 19:59:02 +0000174 line = line[m.end(1):]
175 elif empty:
176 line = line[m.end(1):]
177 else:
178 line = line[m.end():]
179 #
180 # Very ugly special case to deal with \item[]. The catch
181 # is that this needs to occur outside the for loop that
182 # handles attribute parsing so we can 'continue' the outer
183 # loop.
184 #
Fred Drake54fb7fb1999-05-10 19:36:03 +0000185 if optional and type(params[0]) is TupleType:
Fred Drake96c00b01999-05-07 19:59:02 +0000186 # the attribute name isn't used in this special case
187 pushing(macroname, "a", depth + len(stack))
188 stack.append(macroname)
189 self.write("(%s\n" % macroname)
190 m = _start_optional_rx.match(line)
191 if m:
192 self.line = line[m.end():]
193 line = self.subconvert("]", depth + len(stack))
194 line = "}" + line
195 continue
196 # handle attribute mappings here:
197 for attrname in params:
198 if optional:
199 optional = 0
Fred Drake54fb7fb1999-05-10 19:36:03 +0000200 if type(attrname) is StringType:
Fred Drake96c00b01999-05-07 19:59:02 +0000201 m = _optional_rx.match(line)
202 if m:
203 line = line[m.end():]
204 self.write("A%s TOKEN %s\n"
205 % (attrname, encode(m.group(1))))
Fred Drake54fb7fb1999-05-10 19:36:03 +0000206 elif type(attrname) is TupleType:
Fred Drake0eb7b2a1999-05-19 17:37:37 +0000207 # This is a sub-element; but place the and attribute
208 # we found on the stack (\section-like); the
209 # content of the macro will become the content
210 # of the attribute element, and the macro will
211 # have to be closed some other way (such as
212 # auto-closing).
Fred Drake96c00b01999-05-07 19:59:02 +0000213 pushing(macroname, "b", len(stack) + depth)
214 stack.append(macroname)
215 self.write("(%s\n" % macroname)
216 macroname = attrname[0]
217 m = _start_group_rx.match(line)
Fred Drake30a68c71998-11-23 16:59:39 +0000218 if m:
219 line = line[m.end():]
Fred Drake54fb7fb1999-05-10 19:36:03 +0000220 elif type(attrname) is ListType:
Fred Drakef79acbd1999-05-07 21:12:21 +0000221 # A normal subelement: <macroname><attrname>...</>...
Fred Drake96c00b01999-05-07 19:59:02 +0000222 attrname = attrname[0]
223 if not opened:
224 opened = 1
225 self.write("(%s\n" % macroname)
226 pushing(macroname, "c", len(stack) + depth)
227 self.write("(%s\n" % attrname)
228 pushing(attrname, "sub-elem", len(stack) + depth + 1)
229 self.line = skip_white(line)[1:]
Fred Drakef79acbd1999-05-07 21:12:21 +0000230 line = self.subconvert("}", len(stack) + depth + 1)[1:]
Fred Drake96c00b01999-05-07 19:59:02 +0000231 popping(attrname, "sub-elem", len(stack) + depth + 1)
232 self.write(")%s\n" % attrname)
233 else:
234 m = _parameter_rx.match(line)
235 if not m:
236 raise LaTeXFormatError(
237 "could not extract parameter %s for %s: %s"
238 % (attrname, macroname, `line[:100]`))
239 value = m.group(1)
240 if _token_rx.match(value):
241 dtype = "TOKEN"
242 else:
243 dtype = "CDATA"
244 self.write("A%s %s %s\n"
245 % (attrname, dtype, encode(value)))
Fred Drake30a68c71998-11-23 16:59:39 +0000246 line = line[m.end():]
Fred Drake54fb7fb1999-05-10 19:36:03 +0000247 if params and type(params[-1]) is StringType \
Fred Drake96c00b01999-05-07 19:59:02 +0000248 and (not empty) and not environ:
249 # attempt to strip off next '{'
250 m = _start_group_rx.match(line)
Fred Drake30a68c71998-11-23 16:59:39 +0000251 if not m:
252 raise LaTeXFormatError(
Fred Drake96c00b01999-05-07 19:59:02 +0000253 "non-empty element '%s' has no content: %s"
254 % (macroname, line[:12]))
Fred Drake30a68c71998-11-23 16:59:39 +0000255 line = line[m.end():]
Fred Drake96c00b01999-05-07 19:59:02 +0000256 if not opened:
257 self.write("(%s\n" % macroname)
258 pushing(macroname, "d", len(stack) + depth)
259 if empty:
260 line = "}" + line
261 stack.append(macroname)
262 self.pop_output()
263 continue
264 if line[0] == endchar and not stack:
265 if DEBUG:
266 self.err_write("subconvert() --> %s\n" % `line[1:21]`)
267 self.line = line[1:]
268 return self.line
269 if line[0] == "}":
270 # end of macro or group
271 macroname = stack[-1]
272 conversion = self.table.get(macroname)
273 if macroname \
274 and macroname not in self.discards \
Fred Drake54fb7fb1999-05-10 19:36:03 +0000275 and type(conversion) is not StringType:
Fred Drake96c00b01999-05-07 19:59:02 +0000276 # otherwise, it was just a bare group
277 self.write(")%s\n" % stack[-1])
278 popping(macroname, "d", len(stack) + depth - 1)
279 del stack[-1]
280 line = line[1:]
281 continue
282 if line[0] == "{":
283 pushing("", "e", len(stack) + depth)
284 stack.append("")
285 line = line[1:]
286 continue
287 if line[0] == "\\" and line[1] in ESCAPED_CHARS:
288 self.write("-%s\n" % encode(line[1]))
289 line = line[2:]
290 continue
291 if line[:2] == r"\\":
292 self.write("(BREAK\n)BREAK\n")
293 line = line[2:]
294 continue
295 m = _text_rx.match(line)
296 if m:
297 text = encode(m.group())
298 self.write("-%s\n" % text)
Fred Drake42f52981998-11-30 14:45:24 +0000299 line = line[m.end():]
Fred Drake96c00b01999-05-07 19:59:02 +0000300 continue
301 # special case because of \item[]
302 if line[0] == "]":
303 self.write("-]\n")
304 line = line[1:]
305 continue
306 # avoid infinite loops
307 extra = ""
308 if len(line) > 100:
309 extra = "..."
310 raise LaTeXFormatError("could not identify markup: %s%s"
311 % (`line[:100]`, extra))
312 while stack and stack[-1] in self.autoclosing:
313 self.write("-\\n\n")
314 self.write(")%s\n" % stack[-1])
315 popping(stack.pop(), "e", len(stack) + depth - 1)
316 if stack:
317 raise LaTeXFormatError("elements remain on stack: "
318 + string.join(stack, ", "))
319 # otherwise we just ran out of input here...
320
321 def convert(self):
322 self.subconvert()
323
324 def start_macro(self, name):
325 conversion = self.table.get(name, ([], 0, 0, 0, 0))
326 params, optional, empty, environ, nocontent = conversion
327 if empty:
328 self.write("e\n")
329 elif nocontent:
330 empty = 1
331 return params, optional, empty, environ
Fred Drake30a68c71998-11-23 16:59:39 +0000332
333
Fred Drakeaeea9811998-12-01 19:04:12 +0000334def convert(ifp, ofp, table={}, discards=(), autoclosing=()):
Fred Drake96c00b01999-05-07 19:59:02 +0000335 c = Conversion(ifp, ofp, table, discards, autoclosing)
Fred Drake30a68c71998-11-23 16:59:39 +0000336 try:
Fred Drake96c00b01999-05-07 19:59:02 +0000337 c.convert()
Fred Drake30a68c71998-11-23 16:59:39 +0000338 except IOError, (err, msg):
339 if err != errno.EPIPE:
340 raise
341
342
Fred Draked7acf021999-01-14 17:38:12 +0000343def skip_white(line):
344 while line and line[0] in " %\n\t":
345 line = string.lstrip(line[1:])
346 return line
347
348
Fred Drake30a68c71998-11-23 16:59:39 +0000349def main():
350 if len(sys.argv) == 2:
351 ifp = open(sys.argv[1])
352 ofp = sys.stdout
353 elif len(sys.argv) == 3:
354 ifp = open(sys.argv[1])
355 ofp = open(sys.argv[2], "w")
356 else:
357 usage()
358 sys.exit(2)
359 convert(ifp, ofp, {
Fred Drakeaeea9811998-12-01 19:04:12 +0000360 # entries have the form:
Fred Drake9d1c3b51999-01-14 18:10:09 +0000361 # name: ([attribute names], is1stOptional, isEmpty, isEnv, nocontent)
362 # attribute names can be:
363 # "string" -- normal attribute
364 # ("string",) -- sub-element with content of macro; like for \section
365 # ["string"] -- sub-element
Fred Draked7acf021999-01-14 17:38:12 +0000366 "appendix": ([], 0, 1, 0, 0),
367 "bifuncindex": (["name"], 0, 1, 0, 0),
368 "catcode": ([], 0, 1, 0, 0),
369 "cfuncdesc": (["type", "name", ("args",)], 0, 0, 1, 0),
370 "chapter": ([("title",)], 0, 0, 0, 0),
371 "chapter*": ([("title",)], 0, 0, 0, 0),
Fred Drake1453a8c1999-01-28 23:10:48 +0000372 "classdesc": (["name", ("args",)], 0, 0, 1, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000373 "ctypedesc": (["name"], 0, 0, 1, 0),
374 "cvardesc": (["type", "name"], 0, 0, 1, 0),
375 "datadesc": (["name"], 0, 0, 1, 0),
376 "declaremodule": (["id", "type", "name"], 1, 1, 0, 0),
377 "deprecated": (["release"], 0, 0, 0, 0),
378 "documentclass": (["classname"], 0, 1, 0, 0),
379 "excdesc": (["name"], 0, 0, 1, 0),
380 "funcdesc": (["name", ("args",)], 0, 0, 1, 0),
381 "funcdescni": (["name", ("args",)], 0, 0, 1, 0),
Fred Drake75930b31999-01-29 20:09:27 +0000382 "funcline": (["name"], 0, 0, 0, 0),
383 "funclineni": (["name"], 0, 0, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000384 "geq": ([], 0, 1, 0, 0),
385 "hline": ([], 0, 1, 0, 0),
Fred Drake0eb7b2a1999-05-19 17:37:37 +0000386 "include": (["source"], 0, 1, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000387 "indexii": (["ie1", "ie2"], 0, 1, 0, 0),
388 "indexiii": (["ie1", "ie2", "ie3"], 0, 1, 0, 0),
389 "indexiv": (["ie1", "ie2", "ie3", "ie4"], 0, 1, 0, 0),
390 "indexname": ([], 0, 0, 0, 0),
391 "input": (["source"], 0, 1, 0, 0),
392 "item": ([("leader",)], 1, 0, 0, 0),
393 "label": (["id"], 0, 1, 0, 0),
394 "labelwidth": ([], 0, 1, 0, 0),
Fred Drakef79acbd1999-05-07 21:12:21 +0000395 "large": ([], 0, 1, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000396 "LaTeX": ([], 0, 1, 0, 0),
397 "leftmargin": ([], 0, 1, 0, 0),
398 "leq": ([], 0, 1, 0, 0),
399 "lineii": ([["entry"], ["entry"]], 0, 0, 0, 1),
400 "lineiii": ([["entry"], ["entry"], ["entry"]], 0, 0, 0, 1),
401 "lineiv": ([["entry"], ["entry"], ["entry"], ["entry"]], 0, 0, 0, 1),
402 "localmoduletable": ([], 0, 1, 0, 0),
403 "makeindex": ([], 0, 1, 0, 0),
404 "makemodindex": ([], 0, 1, 0, 0),
405 "maketitle": ([], 0, 1, 0, 0),
406 "manpage": (["name", "section"], 0, 1, 0, 0),
407 "memberdesc": (["class", "name"], 1, 0, 1, 0),
408 "methoddesc": (["class", "name", ("args",)], 1, 0, 1, 0),
409 "methoddescni": (["class", "name", ("args",)], 1, 0, 1, 0),
Fred Drake3f3b0961999-01-28 23:49:37 +0000410 "methodline": (["class", "name"], 1, 0, 0, 0),
411 "methodlineni": (["class", "name"], 1, 0, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000412 "moduleauthor": (["name", "email"], 0, 1, 0, 0),
413 "opcodedesc": (["name", "var"], 0, 0, 1, 0),
414 "par": ([], 0, 1, 0, 0),
415 "paragraph": ([("title",)], 0, 0, 0, 0),
Fred Drake54fb7fb1999-05-10 19:36:03 +0000416 "refbimodindex": (["name"], 0, 1, 0, 0),
417 "refexmodindex": (["name"], 0, 1, 0, 0),
418 "refmodindex": (["name"], 0, 1, 0, 0),
419 "refstmodindex": (["name"], 0, 1, 0, 0),
420 "refmodule": (["ref"], 1, 0, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000421 "renewcommand": (["macro"], 0, 0, 0, 0),
Fred Drake3effeed1999-01-14 21:18:52 +0000422 "rfc": (["num"], 0, 1, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000423 "section": ([("title",)], 0, 0, 0, 0),
424 "sectionauthor": (["name", "email"], 0, 1, 0, 0),
425 "seemodule": (["ref", "name"], 1, 0, 0, 0),
426 "stindex": (["type"], 0, 1, 0, 0),
427 "subparagraph": ([("title",)], 0, 0, 0, 0),
428 "subsection": ([("title",)], 0, 0, 0, 0),
429 "subsubsection": ([("title",)], 0, 0, 0, 0),
430 "list": (["bullet", "init"], 0, 0, 1, 0),
431 "tableii": (["colspec", "style",
432 ["entry"], ["entry"]], 0, 0, 1, 0),
433 "tableiii": (["colspec", "style",
434 ["entry"], ["entry"], ["entry"]], 0, 0, 1, 0),
435 "tableiv": (["colspec", "style",
436 ["entry"], ["entry"], ["entry"], ["entry"]], 0, 0, 1, 0),
437 "version": ([], 0, 1, 0, 0),
438 "versionadded": (["version"], 0, 1, 0, 0),
439 "versionchanged": (["version"], 0, 1, 0, 0),
440 "withsubitem": (["text"], 0, 0, 0, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000441 #
Fred Draked7acf021999-01-14 17:38:12 +0000442 "ABC": ([], 0, 1, 0, 0),
443 "ASCII": ([], 0, 1, 0, 0),
444 "C": ([], 0, 1, 0, 0),
445 "Cpp": ([], 0, 1, 0, 0),
446 "EOF": ([], 0, 1, 0, 0),
447 "e": ([], 0, 1, 0, 0),
448 "ldots": ([], 0, 1, 0, 0),
449 "NULL": ([], 0, 1, 0, 0),
450 "POSIX": ([], 0, 1, 0, 0),
451 "UNIX": ([], 0, 1, 0, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000452 #
453 # Things that will actually be going away!
454 #
Fred Draked7acf021999-01-14 17:38:12 +0000455 "fi": ([], 0, 1, 0, 0),
456 "ifhtml": ([], 0, 1, 0, 0),
457 "makeindex": ([], 0, 1, 0, 0),
458 "makemodindex": ([], 0, 1, 0, 0),
459 "maketitle": ([], 0, 1, 0, 0),
460 "noindent": ([], 0, 1, 0, 0),
461 "protect": ([], 0, 1, 0, 0),
462 "tableofcontents": ([], 0, 1, 0, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000463 },
464 discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle",
465 "noindent", "tableofcontents"],
466 autoclosing=["chapter", "section", "subsection", "subsubsection",
Fred Drakeaeea9811998-12-01 19:04:12 +0000467 "paragraph", "subparagraph", ])
Fred Drake30a68c71998-11-23 16:59:39 +0000468
469
470if __name__ == "__main__":
471 main()