blob: f39b0214a6055bebf25b918842511c25ea923ded [file] [log] [blame]
Fred Drake30a68c71998-11-23 16:59:39 +00001#! /usr/bin/env python
2
3"""Generate ESIS events based on a LaTeX source document and configuration
4data.
Fred Drake30a68c71998-11-23 16:59:39 +00005"""
6__version__ = '$Revision$'
7
8import errno
9import re
10import string
11import StringIO
12import sys
13
Fred Drakeaeea9811998-12-01 19:04:12 +000014from esistools import encode
Fred Drake54fb7fb1999-05-10 19:36:03 +000015from types import ListType, StringType, TupleType
Fred Drakeaeea9811998-12-01 19:04:12 +000016
Fred Drake30a68c71998-11-23 16:59:39 +000017
Fred Draked7acf021999-01-14 17:38:12 +000018DEBUG = 0
19
20
Fred Drake30a68c71998-11-23 16:59:39 +000021class Error(Exception):
22 pass
23
24class LaTeXFormatError(Error):
25 pass
26
27
28_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
29_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
Fred Drake54fb7fb1999-05-10 19:36:03 +000030_begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?)({|\s*\n?)")
Fred Drake96c00b01999-05-07 19:59:02 +000031_comment_rx = re.compile("%+ ?(.*)\n[ \t]*")
Fred Drake30a68c71998-11-23 16:59:39 +000032_text_rx = re.compile(r"[^]%\\{}]+")
33_optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
Fred Drakeaeea9811998-12-01 19:04:12 +000034# _parameter_rx is this complicated to allow {...} inside a parameter;
35# this is useful to match tabular layout specifications like {c|p{24pt}}
36_parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
Fred Drake30a68c71998-11-23 16:59:39 +000037_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
38_start_group_rx = re.compile("[ \n]*{")
39_start_optional_rx = re.compile("[ \n]*[[]")
40
41
Fred Drake42f52981998-11-30 14:45:24 +000042ESCAPED_CHARS = "$%#^ {}&~"
Fred Drake30a68c71998-11-23 16:59:39 +000043
44
Fred Drakef79acbd1999-05-07 21:12:21 +000045def dbgmsg(msg):
Fred Draked7acf021999-01-14 17:38:12 +000046 if DEBUG:
Fred Drakef79acbd1999-05-07 21:12:21 +000047 sys.stderr.write(msg + "\n")
48
49def pushing(name, point, depth):
50 dbgmsg("%s<%s> at %s" % (" "*depth, name, point))
Fred Draked7acf021999-01-14 17:38:12 +000051
52def popping(name, point, depth):
Fred Drakef79acbd1999-05-07 21:12:21 +000053 dbgmsg("%s</%s> at %s" % (" "*depth, name, point))
Fred Draked7acf021999-01-14 17:38:12 +000054
55
Fred Drake96c00b01999-05-07 19:59:02 +000056class Conversion:
57 def __init__(self, ifp, ofp, table=None, discards=(), autoclosing=()):
58 self.ofp_stack = [ofp]
59 self.pop_output()
60 self.table = table
61 self.discards = discards
62 self.autoclosing = autoclosing
63 self.line = string.join(map(string.rstrip, ifp.readlines()), "\n")
64 self.err_write = sys.stderr.write
65 self.preamble = 1
66
67 def push_output(self, ofp):
68 self.ofp_stack.append(self.ofp)
69 self.ofp = ofp
70 self.write = ofp.write
71
72 def pop_output(self):
73 self.ofp = self.ofp_stack.pop()
74 self.write = self.ofp.write
75
76 def subconvert(self, endchar=None, depth=0):
Fred Drakef79acbd1999-05-07 21:12:21 +000077 stack = []
78 line = self.line
Fred Drake96c00b01999-05-07 19:59:02 +000079 if DEBUG and endchar:
80 self.err_write(
81 "subconvert(%s)\n line = %s\n" % (`endchar`, `line[:20]`))
Fred Drake96c00b01999-05-07 19:59:02 +000082 while line:
83 if line[0] == endchar and not stack:
84 if DEBUG:
85 self.err_write("subconvert() --> %s\n" % `line[1:21]`)
86 self.line = line
87 return line
88 m = _comment_rx.match(line)
89 if m:
90 text = m.group(1)
91 if text:
92 self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
93 % encode(text))
Fred Drake30a68c71998-11-23 16:59:39 +000094 line = line[m.end():]
Fred Drake30a68c71998-11-23 16:59:39 +000095 continue
Fred Drake96c00b01999-05-07 19:59:02 +000096 m = _begin_env_rx.match(line)
97 if m:
98 # re-write to use the macro handler
99 line = r"\%s %s" % (m.group(1), line[m.end():])
100 continue
101 m = _end_env_rx.match(line)
102 if m:
103 # end of environment
104 envname = m.group(1)
105 if envname == "document":
106 # special magic
107 for n in stack[1:]:
108 if n not in self.autoclosing:
109 raise LaTeXFormatError(
110 "open element on stack: " + `n`)
111 # should be more careful, but this is easier to code:
112 stack = []
113 self.write(")document\n")
Fred Drakef79acbd1999-05-07 21:12:21 +0000114 elif stack and envname == stack[-1]:
Fred Drake96c00b01999-05-07 19:59:02 +0000115 self.write(")%s\n" % envname)
116 del stack[-1]
117 popping(envname, "a", len(stack) + depth)
118 else:
119 self.err_write("stack: %s\n" % `stack`)
120 raise LaTeXFormatError(
121 "environment close for %s doesn't match" % envname)
122 line = line[m.end():]
123 continue
124 m = _begin_macro_rx.match(line)
125 if m:
126 # start of macro
127 macroname = m.group(1)
128 if macroname == "verbatim":
129 # really magic case!
130 pos = string.find(line, "\\end{verbatim}")
131 text = line[m.end(1):pos]
132 self.write("(verbatim\n")
133 self.write("-%s\n" % encode(text))
134 self.write(")verbatim\n")
135 line = line[pos + len("\\end{verbatim}"):]
136 continue
137 numbered = 1
138 opened = 0
139 if macroname[-1] == "*":
140 macroname = macroname[:-1]
141 numbered = 0
142 if macroname in self.autoclosing and macroname in stack:
143 while stack[-1] != macroname:
144 top = stack.pop()
145 if top and top not in self.discards:
146 self.write(")%s\n-\\n\n" % top)
147 popping(top, "b", len(stack) + depth)
148 if macroname not in self.discards:
149 self.write("-\\n\n)%s\n-\\n\n" % macroname)
150 popping(macroname, "c", len(stack) + depth - 1)
151 del stack[-1]
152 #
153 if macroname in self.discards:
154 self.push_output(StringIO.StringIO())
155 else:
156 self.push_output(self.ofp)
157 #
158 params, optional, empty, environ = self.start_macro(macroname)
159 if not numbered:
160 self.write("Anumbered TOKEN no\n")
161 # rip off the macroname
162 if params:
Fred Drake54fb7fb1999-05-10 19:36:03 +0000163## if optional and len(params) == 1:
164## line = line[m.end():]
165## else:
Fred Drake96c00b01999-05-07 19:59:02 +0000166 line = line[m.end(1):]
167 elif empty:
168 line = line[m.end(1):]
169 else:
170 line = line[m.end():]
171 #
172 # Very ugly special case to deal with \item[]. The catch
173 # is that this needs to occur outside the for loop that
174 # handles attribute parsing so we can 'continue' the outer
175 # loop.
176 #
Fred Drake54fb7fb1999-05-10 19:36:03 +0000177 if optional and type(params[0]) is TupleType:
Fred Drake96c00b01999-05-07 19:59:02 +0000178 # the attribute name isn't used in this special case
179 pushing(macroname, "a", depth + len(stack))
180 stack.append(macroname)
181 self.write("(%s\n" % macroname)
182 m = _start_optional_rx.match(line)
183 if m:
184 self.line = line[m.end():]
185 line = self.subconvert("]", depth + len(stack))
186 line = "}" + line
187 continue
188 # handle attribute mappings here:
189 for attrname in params:
190 if optional:
191 optional = 0
Fred Drake54fb7fb1999-05-10 19:36:03 +0000192 if type(attrname) is StringType:
Fred Drake96c00b01999-05-07 19:59:02 +0000193 m = _optional_rx.match(line)
194 if m:
195 line = line[m.end():]
196 self.write("A%s TOKEN %s\n"
197 % (attrname, encode(m.group(1))))
Fred Drake54fb7fb1999-05-10 19:36:03 +0000198 elif type(attrname) is TupleType:
Fred Drake96c00b01999-05-07 19:59:02 +0000199 # This is a sub-element; but don't place the
200 # element we found on the stack (\section-like)
201 pushing(macroname, "b", len(stack) + depth)
202 stack.append(macroname)
203 self.write("(%s\n" % macroname)
204 macroname = attrname[0]
205 m = _start_group_rx.match(line)
Fred Drake30a68c71998-11-23 16:59:39 +0000206 if m:
207 line = line[m.end():]
Fred Drake54fb7fb1999-05-10 19:36:03 +0000208 elif type(attrname) is ListType:
Fred Drakef79acbd1999-05-07 21:12:21 +0000209 # A normal subelement: <macroname><attrname>...</>...
Fred Drake96c00b01999-05-07 19:59:02 +0000210 attrname = attrname[0]
211 if not opened:
212 opened = 1
213 self.write("(%s\n" % macroname)
214 pushing(macroname, "c", len(stack) + depth)
215 self.write("(%s\n" % attrname)
216 pushing(attrname, "sub-elem", len(stack) + depth + 1)
217 self.line = skip_white(line)[1:]
Fred Drakef79acbd1999-05-07 21:12:21 +0000218 line = self.subconvert("}", len(stack) + depth + 1)[1:]
Fred Drake96c00b01999-05-07 19:59:02 +0000219 popping(attrname, "sub-elem", len(stack) + depth + 1)
220 self.write(")%s\n" % attrname)
221 else:
222 m = _parameter_rx.match(line)
223 if not m:
224 raise LaTeXFormatError(
225 "could not extract parameter %s for %s: %s"
226 % (attrname, macroname, `line[:100]`))
227 value = m.group(1)
228 if _token_rx.match(value):
229 dtype = "TOKEN"
230 else:
231 dtype = "CDATA"
232 self.write("A%s %s %s\n"
233 % (attrname, dtype, encode(value)))
Fred Drake30a68c71998-11-23 16:59:39 +0000234 line = line[m.end():]
Fred Drake54fb7fb1999-05-10 19:36:03 +0000235 if params and type(params[-1]) is StringType \
Fred Drake96c00b01999-05-07 19:59:02 +0000236 and (not empty) and not environ:
237 # attempt to strip off next '{'
238 m = _start_group_rx.match(line)
Fred Drake30a68c71998-11-23 16:59:39 +0000239 if not m:
240 raise LaTeXFormatError(
Fred Drake96c00b01999-05-07 19:59:02 +0000241 "non-empty element '%s' has no content: %s"
242 % (macroname, line[:12]))
Fred Drake30a68c71998-11-23 16:59:39 +0000243 line = line[m.end():]
Fred Drake96c00b01999-05-07 19:59:02 +0000244 if not opened:
245 self.write("(%s\n" % macroname)
246 pushing(macroname, "d", len(stack) + depth)
247 if empty:
248 line = "}" + line
249 stack.append(macroname)
250 self.pop_output()
251 continue
252 if line[0] == endchar and not stack:
253 if DEBUG:
254 self.err_write("subconvert() --> %s\n" % `line[1:21]`)
255 self.line = line[1:]
256 return self.line
257 if line[0] == "}":
258 # end of macro or group
259 macroname = stack[-1]
260 conversion = self.table.get(macroname)
261 if macroname \
262 and macroname not in self.discards \
Fred Drake54fb7fb1999-05-10 19:36:03 +0000263 and type(conversion) is not StringType:
Fred Drake96c00b01999-05-07 19:59:02 +0000264 # otherwise, it was just a bare group
265 self.write(")%s\n" % stack[-1])
266 popping(macroname, "d", len(stack) + depth - 1)
267 del stack[-1]
268 line = line[1:]
269 continue
270 if line[0] == "{":
271 pushing("", "e", len(stack) + depth)
272 stack.append("")
273 line = line[1:]
274 continue
275 if line[0] == "\\" and line[1] in ESCAPED_CHARS:
276 self.write("-%s\n" % encode(line[1]))
277 line = line[2:]
278 continue
279 if line[:2] == r"\\":
280 self.write("(BREAK\n)BREAK\n")
281 line = line[2:]
282 continue
283 m = _text_rx.match(line)
284 if m:
285 text = encode(m.group())
286 self.write("-%s\n" % text)
Fred Drake42f52981998-11-30 14:45:24 +0000287 line = line[m.end():]
Fred Drake96c00b01999-05-07 19:59:02 +0000288 continue
289 # special case because of \item[]
290 if line[0] == "]":
291 self.write("-]\n")
292 line = line[1:]
293 continue
294 # avoid infinite loops
295 extra = ""
296 if len(line) > 100:
297 extra = "..."
298 raise LaTeXFormatError("could not identify markup: %s%s"
299 % (`line[:100]`, extra))
300 while stack and stack[-1] in self.autoclosing:
301 self.write("-\\n\n")
302 self.write(")%s\n" % stack[-1])
303 popping(stack.pop(), "e", len(stack) + depth - 1)
304 if stack:
305 raise LaTeXFormatError("elements remain on stack: "
306 + string.join(stack, ", "))
307 # otherwise we just ran out of input here...
308
309 def convert(self):
310 self.subconvert()
311
312 def start_macro(self, name):
313 conversion = self.table.get(name, ([], 0, 0, 0, 0))
314 params, optional, empty, environ, nocontent = conversion
315 if empty:
316 self.write("e\n")
317 elif nocontent:
318 empty = 1
319 return params, optional, empty, environ
Fred Drake30a68c71998-11-23 16:59:39 +0000320
321
Fred Drakeaeea9811998-12-01 19:04:12 +0000322def convert(ifp, ofp, table={}, discards=(), autoclosing=()):
Fred Drake96c00b01999-05-07 19:59:02 +0000323 c = Conversion(ifp, ofp, table, discards, autoclosing)
Fred Drake30a68c71998-11-23 16:59:39 +0000324 try:
Fred Drake96c00b01999-05-07 19:59:02 +0000325 c.convert()
Fred Drake30a68c71998-11-23 16:59:39 +0000326 except IOError, (err, msg):
327 if err != errno.EPIPE:
328 raise
329
330
Fred Draked7acf021999-01-14 17:38:12 +0000331def skip_white(line):
332 while line and line[0] in " %\n\t":
333 line = string.lstrip(line[1:])
334 return line
335
336
Fred Drake30a68c71998-11-23 16:59:39 +0000337def main():
338 if len(sys.argv) == 2:
339 ifp = open(sys.argv[1])
340 ofp = sys.stdout
341 elif len(sys.argv) == 3:
342 ifp = open(sys.argv[1])
343 ofp = open(sys.argv[2], "w")
344 else:
345 usage()
346 sys.exit(2)
347 convert(ifp, ofp, {
Fred Drakeaeea9811998-12-01 19:04:12 +0000348 # entries have the form:
Fred Drake9d1c3b51999-01-14 18:10:09 +0000349 # name: ([attribute names], is1stOptional, isEmpty, isEnv, nocontent)
350 # attribute names can be:
351 # "string" -- normal attribute
352 # ("string",) -- sub-element with content of macro; like for \section
353 # ["string"] -- sub-element
Fred Draked7acf021999-01-14 17:38:12 +0000354 "appendix": ([], 0, 1, 0, 0),
355 "bifuncindex": (["name"], 0, 1, 0, 0),
356 "catcode": ([], 0, 1, 0, 0),
357 "cfuncdesc": (["type", "name", ("args",)], 0, 0, 1, 0),
358 "chapter": ([("title",)], 0, 0, 0, 0),
359 "chapter*": ([("title",)], 0, 0, 0, 0),
Fred Drake1453a8c1999-01-28 23:10:48 +0000360 "classdesc": (["name", ("args",)], 0, 0, 1, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000361 "ctypedesc": (["name"], 0, 0, 1, 0),
362 "cvardesc": (["type", "name"], 0, 0, 1, 0),
363 "datadesc": (["name"], 0, 0, 1, 0),
364 "declaremodule": (["id", "type", "name"], 1, 1, 0, 0),
365 "deprecated": (["release"], 0, 0, 0, 0),
366 "documentclass": (["classname"], 0, 1, 0, 0),
367 "excdesc": (["name"], 0, 0, 1, 0),
368 "funcdesc": (["name", ("args",)], 0, 0, 1, 0),
369 "funcdescni": (["name", ("args",)], 0, 0, 1, 0),
Fred Drake75930b31999-01-29 20:09:27 +0000370 "funcline": (["name"], 0, 0, 0, 0),
371 "funclineni": (["name"], 0, 0, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000372 "geq": ([], 0, 1, 0, 0),
373 "hline": ([], 0, 1, 0, 0),
374 "indexii": (["ie1", "ie2"], 0, 1, 0, 0),
375 "indexiii": (["ie1", "ie2", "ie3"], 0, 1, 0, 0),
376 "indexiv": (["ie1", "ie2", "ie3", "ie4"], 0, 1, 0, 0),
377 "indexname": ([], 0, 0, 0, 0),
378 "input": (["source"], 0, 1, 0, 0),
379 "item": ([("leader",)], 1, 0, 0, 0),
380 "label": (["id"], 0, 1, 0, 0),
381 "labelwidth": ([], 0, 1, 0, 0),
Fred Drakef79acbd1999-05-07 21:12:21 +0000382 "large": ([], 0, 1, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000383 "LaTeX": ([], 0, 1, 0, 0),
384 "leftmargin": ([], 0, 1, 0, 0),
385 "leq": ([], 0, 1, 0, 0),
386 "lineii": ([["entry"], ["entry"]], 0, 0, 0, 1),
387 "lineiii": ([["entry"], ["entry"], ["entry"]], 0, 0, 0, 1),
388 "lineiv": ([["entry"], ["entry"], ["entry"], ["entry"]], 0, 0, 0, 1),
389 "localmoduletable": ([], 0, 1, 0, 0),
390 "makeindex": ([], 0, 1, 0, 0),
391 "makemodindex": ([], 0, 1, 0, 0),
392 "maketitle": ([], 0, 1, 0, 0),
393 "manpage": (["name", "section"], 0, 1, 0, 0),
394 "memberdesc": (["class", "name"], 1, 0, 1, 0),
395 "methoddesc": (["class", "name", ("args",)], 1, 0, 1, 0),
396 "methoddescni": (["class", "name", ("args",)], 1, 0, 1, 0),
Fred Drake3f3b0961999-01-28 23:49:37 +0000397 "methodline": (["class", "name"], 1, 0, 0, 0),
398 "methodlineni": (["class", "name"], 1, 0, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000399 "moduleauthor": (["name", "email"], 0, 1, 0, 0),
400 "opcodedesc": (["name", "var"], 0, 0, 1, 0),
401 "par": ([], 0, 1, 0, 0),
402 "paragraph": ([("title",)], 0, 0, 0, 0),
Fred Drake54fb7fb1999-05-10 19:36:03 +0000403 "refbimodindex": (["name"], 0, 1, 0, 0),
404 "refexmodindex": (["name"], 0, 1, 0, 0),
405 "refmodindex": (["name"], 0, 1, 0, 0),
406 "refstmodindex": (["name"], 0, 1, 0, 0),
407 "refmodule": (["ref"], 1, 0, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000408 "renewcommand": (["macro"], 0, 0, 0, 0),
Fred Drake3effeed1999-01-14 21:18:52 +0000409 "rfc": (["num"], 0, 1, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000410 "section": ([("title",)], 0, 0, 0, 0),
411 "sectionauthor": (["name", "email"], 0, 1, 0, 0),
412 "seemodule": (["ref", "name"], 1, 0, 0, 0),
413 "stindex": (["type"], 0, 1, 0, 0),
414 "subparagraph": ([("title",)], 0, 0, 0, 0),
415 "subsection": ([("title",)], 0, 0, 0, 0),
416 "subsubsection": ([("title",)], 0, 0, 0, 0),
417 "list": (["bullet", "init"], 0, 0, 1, 0),
418 "tableii": (["colspec", "style",
419 ["entry"], ["entry"]], 0, 0, 1, 0),
420 "tableiii": (["colspec", "style",
421 ["entry"], ["entry"], ["entry"]], 0, 0, 1, 0),
422 "tableiv": (["colspec", "style",
423 ["entry"], ["entry"], ["entry"], ["entry"]], 0, 0, 1, 0),
424 "version": ([], 0, 1, 0, 0),
425 "versionadded": (["version"], 0, 1, 0, 0),
426 "versionchanged": (["version"], 0, 1, 0, 0),
427 "withsubitem": (["text"], 0, 0, 0, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000428 #
Fred Draked7acf021999-01-14 17:38:12 +0000429 "ABC": ([], 0, 1, 0, 0),
430 "ASCII": ([], 0, 1, 0, 0),
431 "C": ([], 0, 1, 0, 0),
432 "Cpp": ([], 0, 1, 0, 0),
433 "EOF": ([], 0, 1, 0, 0),
434 "e": ([], 0, 1, 0, 0),
435 "ldots": ([], 0, 1, 0, 0),
436 "NULL": ([], 0, 1, 0, 0),
437 "POSIX": ([], 0, 1, 0, 0),
438 "UNIX": ([], 0, 1, 0, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000439 #
440 # Things that will actually be going away!
441 #
Fred Draked7acf021999-01-14 17:38:12 +0000442 "fi": ([], 0, 1, 0, 0),
443 "ifhtml": ([], 0, 1, 0, 0),
444 "makeindex": ([], 0, 1, 0, 0),
445 "makemodindex": ([], 0, 1, 0, 0),
446 "maketitle": ([], 0, 1, 0, 0),
447 "noindent": ([], 0, 1, 0, 0),
448 "protect": ([], 0, 1, 0, 0),
449 "tableofcontents": ([], 0, 1, 0, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000450 },
451 discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle",
452 "noindent", "tableofcontents"],
453 autoclosing=["chapter", "section", "subsection", "subsubsection",
Fred Drakeaeea9811998-12-01 19:04:12 +0000454 "paragraph", "subparagraph", ])
Fred Drake30a68c71998-11-23 16:59:39 +0000455
456
457if __name__ == "__main__":
458 main()