blob: 149f847bcca491ca3787b517870f9bd31d29c260 [file] [log] [blame]
Fred Drake30a68c71998-11-23 16:59:39 +00001#! /usr/bin/env python
2
3"""Generate ESIS events based on a LaTeX source document and configuration
4data.
Fred Drake30a68c71998-11-23 16:59:39 +00005"""
6__version__ = '$Revision$'
7
8import errno
9import re
10import string
11import StringIO
12import sys
13
Fred Drakeaeea9811998-12-01 19:04:12 +000014from esistools import encode
15
Fred Drake30a68c71998-11-23 16:59:39 +000016
Fred Draked7acf021999-01-14 17:38:12 +000017DEBUG = 0
18
19
Fred Drake30a68c71998-11-23 16:59:39 +000020class Error(Exception):
21 pass
22
23class LaTeXFormatError(Error):
24 pass
25
26
27_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
28_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
29_begin_macro_rx = re.compile("[\\\\]([a-zA-Z]+[*]?)({|\\s*\n?)")
Fred Drake96c00b01999-05-07 19:59:02 +000030_comment_rx = re.compile("%+ ?(.*)\n[ \t]*")
Fred Drake30a68c71998-11-23 16:59:39 +000031_text_rx = re.compile(r"[^]%\\{}]+")
32_optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
Fred Drakeaeea9811998-12-01 19:04:12 +000033# _parameter_rx is this complicated to allow {...} inside a parameter;
34# this is useful to match tabular layout specifications like {c|p{24pt}}
35_parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
Fred Drake30a68c71998-11-23 16:59:39 +000036_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
37_start_group_rx = re.compile("[ \n]*{")
38_start_optional_rx = re.compile("[ \n]*[[]")
39
40
Fred Drake42f52981998-11-30 14:45:24 +000041ESCAPED_CHARS = "$%#^ {}&~"
Fred Drake30a68c71998-11-23 16:59:39 +000042
43
Fred Drakef79acbd1999-05-07 21:12:21 +000044def dbgmsg(msg):
Fred Draked7acf021999-01-14 17:38:12 +000045 if DEBUG:
Fred Drakef79acbd1999-05-07 21:12:21 +000046 sys.stderr.write(msg + "\n")
47
48def pushing(name, point, depth):
49 dbgmsg("%s<%s> at %s" % (" "*depth, name, point))
Fred Draked7acf021999-01-14 17:38:12 +000050
51def popping(name, point, depth):
Fred Drakef79acbd1999-05-07 21:12:21 +000052 dbgmsg("%s</%s> at %s" % (" "*depth, name, point))
Fred Draked7acf021999-01-14 17:38:12 +000053
54
Fred Drake96c00b01999-05-07 19:59:02 +000055class Conversion:
56 def __init__(self, ifp, ofp, table=None, discards=(), autoclosing=()):
57 self.ofp_stack = [ofp]
58 self.pop_output()
59 self.table = table
60 self.discards = discards
61 self.autoclosing = autoclosing
62 self.line = string.join(map(string.rstrip, ifp.readlines()), "\n")
63 self.err_write = sys.stderr.write
64 self.preamble = 1
65
66 def push_output(self, ofp):
67 self.ofp_stack.append(self.ofp)
68 self.ofp = ofp
69 self.write = ofp.write
70
71 def pop_output(self):
72 self.ofp = self.ofp_stack.pop()
73 self.write = self.ofp.write
74
75 def subconvert(self, endchar=None, depth=0):
Fred Drakef79acbd1999-05-07 21:12:21 +000076 stack = []
77 line = self.line
Fred Drake96c00b01999-05-07 19:59:02 +000078 if DEBUG and endchar:
79 self.err_write(
80 "subconvert(%s)\n line = %s\n" % (`endchar`, `line[:20]`))
Fred Drake96c00b01999-05-07 19:59:02 +000081 while line:
82 if line[0] == endchar and not stack:
83 if DEBUG:
84 self.err_write("subconvert() --> %s\n" % `line[1:21]`)
85 self.line = line
86 return line
87 m = _comment_rx.match(line)
88 if m:
89 text = m.group(1)
90 if text:
91 self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
92 % encode(text))
Fred Drake30a68c71998-11-23 16:59:39 +000093 line = line[m.end():]
Fred Drake30a68c71998-11-23 16:59:39 +000094 continue
Fred Drake96c00b01999-05-07 19:59:02 +000095 m = _begin_env_rx.match(line)
96 if m:
97 # re-write to use the macro handler
98 line = r"\%s %s" % (m.group(1), line[m.end():])
99 continue
100 m = _end_env_rx.match(line)
101 if m:
102 # end of environment
103 envname = m.group(1)
104 if envname == "document":
105 # special magic
106 for n in stack[1:]:
107 if n not in self.autoclosing:
108 raise LaTeXFormatError(
109 "open element on stack: " + `n`)
110 # should be more careful, but this is easier to code:
111 stack = []
112 self.write(")document\n")
Fred Drakef79acbd1999-05-07 21:12:21 +0000113 elif stack and envname == stack[-1]:
Fred Drake96c00b01999-05-07 19:59:02 +0000114 self.write(")%s\n" % envname)
115 del stack[-1]
116 popping(envname, "a", len(stack) + depth)
117 else:
118 self.err_write("stack: %s\n" % `stack`)
119 raise LaTeXFormatError(
120 "environment close for %s doesn't match" % envname)
121 line = line[m.end():]
122 continue
123 m = _begin_macro_rx.match(line)
124 if m:
125 # start of macro
126 macroname = m.group(1)
127 if macroname == "verbatim":
128 # really magic case!
129 pos = string.find(line, "\\end{verbatim}")
130 text = line[m.end(1):pos]
131 self.write("(verbatim\n")
132 self.write("-%s\n" % encode(text))
133 self.write(")verbatim\n")
134 line = line[pos + len("\\end{verbatim}"):]
135 continue
136 numbered = 1
137 opened = 0
138 if macroname[-1] == "*":
139 macroname = macroname[:-1]
140 numbered = 0
141 if macroname in self.autoclosing and macroname in stack:
142 while stack[-1] != macroname:
143 top = stack.pop()
144 if top and top not in self.discards:
145 self.write(")%s\n-\\n\n" % top)
146 popping(top, "b", len(stack) + depth)
147 if macroname not in self.discards:
148 self.write("-\\n\n)%s\n-\\n\n" % macroname)
149 popping(macroname, "c", len(stack) + depth - 1)
150 del stack[-1]
151 #
152 if macroname in self.discards:
153 self.push_output(StringIO.StringIO())
154 else:
155 self.push_output(self.ofp)
156 #
157 params, optional, empty, environ = self.start_macro(macroname)
158 if not numbered:
159 self.write("Anumbered TOKEN no\n")
160 # rip off the macroname
161 if params:
162 if optional and len(params) == 1:
163 line = line[m.end():]
164 else:
165 line = line[m.end(1):]
166 elif empty:
167 line = line[m.end(1):]
168 else:
169 line = line[m.end():]
170 #
171 # Very ugly special case to deal with \item[]. The catch
172 # is that this needs to occur outside the for loop that
173 # handles attribute parsing so we can 'continue' the outer
174 # loop.
175 #
176 if optional and type(params[0]) is type(()):
177 # the attribute name isn't used in this special case
178 pushing(macroname, "a", depth + len(stack))
179 stack.append(macroname)
180 self.write("(%s\n" % macroname)
181 m = _start_optional_rx.match(line)
182 if m:
183 self.line = line[m.end():]
184 line = self.subconvert("]", depth + len(stack))
185 line = "}" + line
186 continue
187 # handle attribute mappings here:
188 for attrname in params:
189 if optional:
190 optional = 0
191 if type(attrname) is type(""):
192 m = _optional_rx.match(line)
193 if m:
194 line = line[m.end():]
195 self.write("A%s TOKEN %s\n"
196 % (attrname, encode(m.group(1))))
197 elif type(attrname) is type(()):
198 # This is a sub-element; but don't place the
199 # element we found on the stack (\section-like)
200 pushing(macroname, "b", len(stack) + depth)
201 stack.append(macroname)
202 self.write("(%s\n" % macroname)
203 macroname = attrname[0]
204 m = _start_group_rx.match(line)
Fred Drake30a68c71998-11-23 16:59:39 +0000205 if m:
206 line = line[m.end():]
Fred Drake96c00b01999-05-07 19:59:02 +0000207 elif type(attrname) is type([]):
Fred Drakef79acbd1999-05-07 21:12:21 +0000208 # A normal subelement: <macroname><attrname>...</>...
Fred Drake96c00b01999-05-07 19:59:02 +0000209 attrname = attrname[0]
210 if not opened:
211 opened = 1
212 self.write("(%s\n" % macroname)
213 pushing(macroname, "c", len(stack) + depth)
214 self.write("(%s\n" % attrname)
215 pushing(attrname, "sub-elem", len(stack) + depth + 1)
216 self.line = skip_white(line)[1:]
Fred Drakef79acbd1999-05-07 21:12:21 +0000217 line = self.subconvert("}", len(stack) + depth + 1)[1:]
218 dbgmsg("subconvert() ==> " + `line[:20]`)
Fred Drake96c00b01999-05-07 19:59:02 +0000219 popping(attrname, "sub-elem", len(stack) + depth + 1)
220 self.write(")%s\n" % attrname)
221 else:
222 m = _parameter_rx.match(line)
223 if not m:
224 raise LaTeXFormatError(
225 "could not extract parameter %s for %s: %s"
226 % (attrname, macroname, `line[:100]`))
227 value = m.group(1)
228 if _token_rx.match(value):
229 dtype = "TOKEN"
230 else:
231 dtype = "CDATA"
232 self.write("A%s %s %s\n"
233 % (attrname, dtype, encode(value)))
Fred Drake30a68c71998-11-23 16:59:39 +0000234 line = line[m.end():]
Fred Drake96c00b01999-05-07 19:59:02 +0000235 if params and type(params[-1]) is type('') \
236 and (not empty) and not environ:
237 # attempt to strip off next '{'
238 m = _start_group_rx.match(line)
Fred Drake30a68c71998-11-23 16:59:39 +0000239 if not m:
240 raise LaTeXFormatError(
Fred Drake96c00b01999-05-07 19:59:02 +0000241 "non-empty element '%s' has no content: %s"
242 % (macroname, line[:12]))
Fred Drake30a68c71998-11-23 16:59:39 +0000243 line = line[m.end():]
Fred Drake96c00b01999-05-07 19:59:02 +0000244 if not opened:
245 self.write("(%s\n" % macroname)
246 pushing(macroname, "d", len(stack) + depth)
247 if empty:
248 line = "}" + line
249 stack.append(macroname)
250 self.pop_output()
251 continue
252 if line[0] == endchar and not stack:
253 if DEBUG:
254 self.err_write("subconvert() --> %s\n" % `line[1:21]`)
255 self.line = line[1:]
256 return self.line
257 if line[0] == "}":
258 # end of macro or group
259 macroname = stack[-1]
260 conversion = self.table.get(macroname)
261 if macroname \
262 and macroname not in self.discards \
263 and type(conversion) is not type(""):
264 # otherwise, it was just a bare group
265 self.write(")%s\n" % stack[-1])
266 popping(macroname, "d", len(stack) + depth - 1)
267 del stack[-1]
268 line = line[1:]
269 continue
270 if line[0] == "{":
271 pushing("", "e", len(stack) + depth)
272 stack.append("")
273 line = line[1:]
274 continue
275 if line[0] == "\\" and line[1] in ESCAPED_CHARS:
276 self.write("-%s\n" % encode(line[1]))
277 line = line[2:]
278 continue
279 if line[:2] == r"\\":
280 self.write("(BREAK\n)BREAK\n")
281 line = line[2:]
282 continue
283 m = _text_rx.match(line)
284 if m:
285 text = encode(m.group())
286 self.write("-%s\n" % text)
Fred Drake42f52981998-11-30 14:45:24 +0000287 line = line[m.end():]
Fred Drake96c00b01999-05-07 19:59:02 +0000288 continue
289 # special case because of \item[]
290 if line[0] == "]":
291 self.write("-]\n")
292 line = line[1:]
293 continue
294 # avoid infinite loops
295 extra = ""
296 if len(line) > 100:
297 extra = "..."
298 raise LaTeXFormatError("could not identify markup: %s%s"
299 % (`line[:100]`, extra))
300 while stack and stack[-1] in self.autoclosing:
301 self.write("-\\n\n")
302 self.write(")%s\n" % stack[-1])
303 popping(stack.pop(), "e", len(stack) + depth - 1)
304 if stack:
305 raise LaTeXFormatError("elements remain on stack: "
306 + string.join(stack, ", "))
307 # otherwise we just ran out of input here...
308
309 def convert(self):
310 self.subconvert()
311
312 def start_macro(self, name):
313 conversion = self.table.get(name, ([], 0, 0, 0, 0))
314 params, optional, empty, environ, nocontent = conversion
315 if empty:
316 self.write("e\n")
317 elif nocontent:
318 empty = 1
319 return params, optional, empty, environ
Fred Drake30a68c71998-11-23 16:59:39 +0000320
321
Fred Drakeaeea9811998-12-01 19:04:12 +0000322def convert(ifp, ofp, table={}, discards=(), autoclosing=()):
Fred Drake96c00b01999-05-07 19:59:02 +0000323 c = Conversion(ifp, ofp, table, discards, autoclosing)
Fred Drake30a68c71998-11-23 16:59:39 +0000324 try:
Fred Drake96c00b01999-05-07 19:59:02 +0000325 c.convert()
Fred Drake30a68c71998-11-23 16:59:39 +0000326 except IOError, (err, msg):
327 if err != errno.EPIPE:
328 raise
329
330
Fred Draked7acf021999-01-14 17:38:12 +0000331def skip_white(line):
332 while line and line[0] in " %\n\t":
333 line = string.lstrip(line[1:])
334 return line
335
336
Fred Drake30a68c71998-11-23 16:59:39 +0000337def main():
338 if len(sys.argv) == 2:
339 ifp = open(sys.argv[1])
340 ofp = sys.stdout
341 elif len(sys.argv) == 3:
342 ifp = open(sys.argv[1])
343 ofp = open(sys.argv[2], "w")
344 else:
345 usage()
346 sys.exit(2)
347 convert(ifp, ofp, {
Fred Drakeaeea9811998-12-01 19:04:12 +0000348 # entries have the form:
Fred Drake9d1c3b51999-01-14 18:10:09 +0000349 # name: ([attribute names], is1stOptional, isEmpty, isEnv, nocontent)
350 # attribute names can be:
351 # "string" -- normal attribute
352 # ("string",) -- sub-element with content of macro; like for \section
353 # ["string"] -- sub-element
Fred Draked7acf021999-01-14 17:38:12 +0000354 "appendix": ([], 0, 1, 0, 0),
355 "bifuncindex": (["name"], 0, 1, 0, 0),
356 "catcode": ([], 0, 1, 0, 0),
357 "cfuncdesc": (["type", "name", ("args",)], 0, 0, 1, 0),
358 "chapter": ([("title",)], 0, 0, 0, 0),
359 "chapter*": ([("title",)], 0, 0, 0, 0),
Fred Drake1453a8c1999-01-28 23:10:48 +0000360 "classdesc": (["name", ("args",)], 0, 0, 1, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000361 "ctypedesc": (["name"], 0, 0, 1, 0),
362 "cvardesc": (["type", "name"], 0, 0, 1, 0),
363 "datadesc": (["name"], 0, 0, 1, 0),
364 "declaremodule": (["id", "type", "name"], 1, 1, 0, 0),
365 "deprecated": (["release"], 0, 0, 0, 0),
366 "documentclass": (["classname"], 0, 1, 0, 0),
367 "excdesc": (["name"], 0, 0, 1, 0),
368 "funcdesc": (["name", ("args",)], 0, 0, 1, 0),
369 "funcdescni": (["name", ("args",)], 0, 0, 1, 0),
Fred Drake75930b31999-01-29 20:09:27 +0000370 "funcline": (["name"], 0, 0, 0, 0),
371 "funclineni": (["name"], 0, 0, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000372 "geq": ([], 0, 1, 0, 0),
373 "hline": ([], 0, 1, 0, 0),
374 "indexii": (["ie1", "ie2"], 0, 1, 0, 0),
375 "indexiii": (["ie1", "ie2", "ie3"], 0, 1, 0, 0),
376 "indexiv": (["ie1", "ie2", "ie3", "ie4"], 0, 1, 0, 0),
377 "indexname": ([], 0, 0, 0, 0),
378 "input": (["source"], 0, 1, 0, 0),
379 "item": ([("leader",)], 1, 0, 0, 0),
380 "label": (["id"], 0, 1, 0, 0),
381 "labelwidth": ([], 0, 1, 0, 0),
Fred Drakef79acbd1999-05-07 21:12:21 +0000382 "large": ([], 0, 1, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000383 "LaTeX": ([], 0, 1, 0, 0),
384 "leftmargin": ([], 0, 1, 0, 0),
385 "leq": ([], 0, 1, 0, 0),
386 "lineii": ([["entry"], ["entry"]], 0, 0, 0, 1),
387 "lineiii": ([["entry"], ["entry"], ["entry"]], 0, 0, 0, 1),
388 "lineiv": ([["entry"], ["entry"], ["entry"], ["entry"]], 0, 0, 0, 1),
389 "localmoduletable": ([], 0, 1, 0, 0),
390 "makeindex": ([], 0, 1, 0, 0),
391 "makemodindex": ([], 0, 1, 0, 0),
392 "maketitle": ([], 0, 1, 0, 0),
393 "manpage": (["name", "section"], 0, 1, 0, 0),
394 "memberdesc": (["class", "name"], 1, 0, 1, 0),
395 "methoddesc": (["class", "name", ("args",)], 1, 0, 1, 0),
396 "methoddescni": (["class", "name", ("args",)], 1, 0, 1, 0),
Fred Drake3f3b0961999-01-28 23:49:37 +0000397 "methodline": (["class", "name"], 1, 0, 0, 0),
398 "methodlineni": (["class", "name"], 1, 0, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000399 "moduleauthor": (["name", "email"], 0, 1, 0, 0),
400 "opcodedesc": (["name", "var"], 0, 0, 1, 0),
401 "par": ([], 0, 1, 0, 0),
402 "paragraph": ([("title",)], 0, 0, 0, 0),
403 "renewcommand": (["macro"], 0, 0, 0, 0),
Fred Drake3effeed1999-01-14 21:18:52 +0000404 "rfc": (["num"], 0, 1, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000405 "section": ([("title",)], 0, 0, 0, 0),
406 "sectionauthor": (["name", "email"], 0, 1, 0, 0),
407 "seemodule": (["ref", "name"], 1, 0, 0, 0),
408 "stindex": (["type"], 0, 1, 0, 0),
409 "subparagraph": ([("title",)], 0, 0, 0, 0),
410 "subsection": ([("title",)], 0, 0, 0, 0),
411 "subsubsection": ([("title",)], 0, 0, 0, 0),
412 "list": (["bullet", "init"], 0, 0, 1, 0),
413 "tableii": (["colspec", "style",
414 ["entry"], ["entry"]], 0, 0, 1, 0),
415 "tableiii": (["colspec", "style",
416 ["entry"], ["entry"], ["entry"]], 0, 0, 1, 0),
417 "tableiv": (["colspec", "style",
418 ["entry"], ["entry"], ["entry"], ["entry"]], 0, 0, 1, 0),
419 "version": ([], 0, 1, 0, 0),
420 "versionadded": (["version"], 0, 1, 0, 0),
421 "versionchanged": (["version"], 0, 1, 0, 0),
422 "withsubitem": (["text"], 0, 0, 0, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000423 #
Fred Draked7acf021999-01-14 17:38:12 +0000424 "ABC": ([], 0, 1, 0, 0),
425 "ASCII": ([], 0, 1, 0, 0),
426 "C": ([], 0, 1, 0, 0),
427 "Cpp": ([], 0, 1, 0, 0),
428 "EOF": ([], 0, 1, 0, 0),
429 "e": ([], 0, 1, 0, 0),
430 "ldots": ([], 0, 1, 0, 0),
431 "NULL": ([], 0, 1, 0, 0),
432 "POSIX": ([], 0, 1, 0, 0),
433 "UNIX": ([], 0, 1, 0, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000434 #
435 # Things that will actually be going away!
436 #
Fred Draked7acf021999-01-14 17:38:12 +0000437 "fi": ([], 0, 1, 0, 0),
438 "ifhtml": ([], 0, 1, 0, 0),
439 "makeindex": ([], 0, 1, 0, 0),
440 "makemodindex": ([], 0, 1, 0, 0),
441 "maketitle": ([], 0, 1, 0, 0),
442 "noindent": ([], 0, 1, 0, 0),
443 "protect": ([], 0, 1, 0, 0),
444 "tableofcontents": ([], 0, 1, 0, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000445 },
446 discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle",
447 "noindent", "tableofcontents"],
448 autoclosing=["chapter", "section", "subsection", "subsubsection",
Fred Drakeaeea9811998-12-01 19:04:12 +0000449 "paragraph", "subparagraph", ])
Fred Drake30a68c71998-11-23 16:59:39 +0000450
451
452if __name__ == "__main__":
453 main()