blob: 1ea928d9dc3a4dbe9b7b0b07d7d3b6af15a42f58 [file] [log] [blame]
Fred Drake30a68c71998-11-23 16:59:39 +00001#! /usr/bin/env python
2
3"""Generate ESIS events based on a LaTeX source document and configuration
4data.
Fred Drake30a68c71998-11-23 16:59:39 +00005"""
6__version__ = '$Revision$'
7
8import errno
9import re
10import string
11import StringIO
12import sys
13
Fred Drakeaeea9811998-12-01 19:04:12 +000014from esistools import encode
15
Fred Drake30a68c71998-11-23 16:59:39 +000016
Fred Draked7acf021999-01-14 17:38:12 +000017DEBUG = 0
18
19
Fred Drake30a68c71998-11-23 16:59:39 +000020class Error(Exception):
21 pass
22
23class LaTeXFormatError(Error):
24 pass
25
26
27_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
28_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
29_begin_macro_rx = re.compile("[\\\\]([a-zA-Z]+[*]?)({|\\s*\n?)")
Fred Drake96c00b01999-05-07 19:59:02 +000030_comment_rx = re.compile("%+ ?(.*)\n[ \t]*")
Fred Drake30a68c71998-11-23 16:59:39 +000031_text_rx = re.compile(r"[^]%\\{}]+")
32_optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
Fred Drakeaeea9811998-12-01 19:04:12 +000033# _parameter_rx is this complicated to allow {...} inside a parameter;
34# this is useful to match tabular layout specifications like {c|p{24pt}}
35_parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
Fred Drake30a68c71998-11-23 16:59:39 +000036_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
37_start_group_rx = re.compile("[ \n]*{")
38_start_optional_rx = re.compile("[ \n]*[[]")
39
40
Fred Drake42f52981998-11-30 14:45:24 +000041ESCAPED_CHARS = "$%#^ {}&~"
Fred Drake30a68c71998-11-23 16:59:39 +000042
43
Fred Draked7acf021999-01-14 17:38:12 +000044def pushing(name, point, depth):
45 if DEBUG:
46 sys.stderr.write("%s<%s> at %s\n" % (" "*depth, name, point))
47
48def popping(name, point, depth):
49 if DEBUG:
50 sys.stderr.write("%s</%s> at %s\n" % (" "*depth, name, point))
51
52
Fred Drake96c00b01999-05-07 19:59:02 +000053class Conversion:
54 def __init__(self, ifp, ofp, table=None, discards=(), autoclosing=()):
55 self.ofp_stack = [ofp]
56 self.pop_output()
57 self.table = table
58 self.discards = discards
59 self.autoclosing = autoclosing
60 self.line = string.join(map(string.rstrip, ifp.readlines()), "\n")
61 self.err_write = sys.stderr.write
62 self.preamble = 1
63
64 def push_output(self, ofp):
65 self.ofp_stack.append(self.ofp)
66 self.ofp = ofp
67 self.write = ofp.write
68
69 def pop_output(self):
70 self.ofp = self.ofp_stack.pop()
71 self.write = self.ofp.write
72
73 def subconvert(self, endchar=None, depth=0):
74 if DEBUG and endchar:
75 self.err_write(
76 "subconvert(%s)\n line = %s\n" % (`endchar`, `line[:20]`))
77 stack = []
78 line = self.line
79 while line:
80 if line[0] == endchar and not stack:
81 if DEBUG:
82 self.err_write("subconvert() --> %s\n" % `line[1:21]`)
83 self.line = line
84 return line
85 m = _comment_rx.match(line)
86 if m:
87 text = m.group(1)
88 if text:
89 self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
90 % encode(text))
Fred Drake30a68c71998-11-23 16:59:39 +000091 line = line[m.end():]
Fred Drake30a68c71998-11-23 16:59:39 +000092 continue
Fred Drake96c00b01999-05-07 19:59:02 +000093 m = _begin_env_rx.match(line)
94 if m:
95 # re-write to use the macro handler
96 line = r"\%s %s" % (m.group(1), line[m.end():])
97 continue
98 m = _end_env_rx.match(line)
99 if m:
100 # end of environment
101 envname = m.group(1)
102 if envname == "document":
103 # special magic
104 for n in stack[1:]:
105 if n not in self.autoclosing:
106 raise LaTeXFormatError(
107 "open element on stack: " + `n`)
108 # should be more careful, but this is easier to code:
109 stack = []
110 self.write(")document\n")
111 elif envname == stack[-1]:
112 self.write(")%s\n" % envname)
113 del stack[-1]
114 popping(envname, "a", len(stack) + depth)
115 else:
116 self.err_write("stack: %s\n" % `stack`)
117 raise LaTeXFormatError(
118 "environment close for %s doesn't match" % envname)
119 line = line[m.end():]
120 continue
121 m = _begin_macro_rx.match(line)
122 if m:
123 # start of macro
124 macroname = m.group(1)
125 if macroname == "verbatim":
126 # really magic case!
127 pos = string.find(line, "\\end{verbatim}")
128 text = line[m.end(1):pos]
129 self.write("(verbatim\n")
130 self.write("-%s\n" % encode(text))
131 self.write(")verbatim\n")
132 line = line[pos + len("\\end{verbatim}"):]
133 continue
134 numbered = 1
135 opened = 0
136 if macroname[-1] == "*":
137 macroname = macroname[:-1]
138 numbered = 0
139 if macroname in self.autoclosing and macroname in stack:
140 while stack[-1] != macroname:
141 top = stack.pop()
142 if top and top not in self.discards:
143 self.write(")%s\n-\\n\n" % top)
144 popping(top, "b", len(stack) + depth)
145 if macroname not in self.discards:
146 self.write("-\\n\n)%s\n-\\n\n" % macroname)
147 popping(macroname, "c", len(stack) + depth - 1)
148 del stack[-1]
149 #
150 if macroname in self.discards:
151 self.push_output(StringIO.StringIO())
152 else:
153 self.push_output(self.ofp)
154 #
155 params, optional, empty, environ = self.start_macro(macroname)
156 if not numbered:
157 self.write("Anumbered TOKEN no\n")
158 # rip off the macroname
159 if params:
160 if optional and len(params) == 1:
161 line = line[m.end():]
162 else:
163 line = line[m.end(1):]
164 elif empty:
165 line = line[m.end(1):]
166 else:
167 line = line[m.end():]
168 #
169 # Very ugly special case to deal with \item[]. The catch
170 # is that this needs to occur outside the for loop that
171 # handles attribute parsing so we can 'continue' the outer
172 # loop.
173 #
174 if optional and type(params[0]) is type(()):
175 # the attribute name isn't used in this special case
176 pushing(macroname, "a", depth + len(stack))
177 stack.append(macroname)
178 self.write("(%s\n" % macroname)
179 m = _start_optional_rx.match(line)
180 if m:
181 self.line = line[m.end():]
182 line = self.subconvert("]", depth + len(stack))
183 line = "}" + line
184 continue
185 # handle attribute mappings here:
186 for attrname in params:
187 if optional:
188 optional = 0
189 if type(attrname) is type(""):
190 m = _optional_rx.match(line)
191 if m:
192 line = line[m.end():]
193 self.write("A%s TOKEN %s\n"
194 % (attrname, encode(m.group(1))))
195 elif type(attrname) is type(()):
196 # This is a sub-element; but don't place the
197 # element we found on the stack (\section-like)
198 pushing(macroname, "b", len(stack) + depth)
199 stack.append(macroname)
200 self.write("(%s\n" % macroname)
201 macroname = attrname[0]
202 m = _start_group_rx.match(line)
Fred Drake30a68c71998-11-23 16:59:39 +0000203 if m:
204 line = line[m.end():]
Fred Drake96c00b01999-05-07 19:59:02 +0000205 elif type(attrname) is type([]):
206 # A normal subelement.
207 attrname = attrname[0]
208 if not opened:
209 opened = 1
210 self.write("(%s\n" % macroname)
211 pushing(macroname, "c", len(stack) + depth)
212 self.write("(%s\n" % attrname)
213 pushing(attrname, "sub-elem", len(stack) + depth + 1)
214 self.line = skip_white(line)[1:]
215 line = subconvert("}", depth + len(stack) + 2)
216 popping(attrname, "sub-elem", len(stack) + depth + 1)
217 self.write(")%s\n" % attrname)
218 else:
219 m = _parameter_rx.match(line)
220 if not m:
221 raise LaTeXFormatError(
222 "could not extract parameter %s for %s: %s"
223 % (attrname, macroname, `line[:100]`))
224 value = m.group(1)
225 if _token_rx.match(value):
226 dtype = "TOKEN"
227 else:
228 dtype = "CDATA"
229 self.write("A%s %s %s\n"
230 % (attrname, dtype, encode(value)))
Fred Drake30a68c71998-11-23 16:59:39 +0000231 line = line[m.end():]
Fred Drake96c00b01999-05-07 19:59:02 +0000232 if params and type(params[-1]) is type('') \
233 and (not empty) and not environ:
234 # attempt to strip off next '{'
235 m = _start_group_rx.match(line)
Fred Drake30a68c71998-11-23 16:59:39 +0000236 if not m:
237 raise LaTeXFormatError(
Fred Drake96c00b01999-05-07 19:59:02 +0000238 "non-empty element '%s' has no content: %s"
239 % (macroname, line[:12]))
Fred Drake30a68c71998-11-23 16:59:39 +0000240 line = line[m.end():]
Fred Drake96c00b01999-05-07 19:59:02 +0000241 if not opened:
242 self.write("(%s\n" % macroname)
243 pushing(macroname, "d", len(stack) + depth)
244 if empty:
245 line = "}" + line
246 stack.append(macroname)
247 self.pop_output()
248 continue
249 if line[0] == endchar and not stack:
250 if DEBUG:
251 self.err_write("subconvert() --> %s\n" % `line[1:21]`)
252 self.line = line[1:]
253 return self.line
254 if line[0] == "}":
255 # end of macro or group
256 macroname = stack[-1]
257 conversion = self.table.get(macroname)
258 if macroname \
259 and macroname not in self.discards \
260 and type(conversion) is not type(""):
261 # otherwise, it was just a bare group
262 self.write(")%s\n" % stack[-1])
263 popping(macroname, "d", len(stack) + depth - 1)
264 del stack[-1]
265 line = line[1:]
266 continue
267 if line[0] == "{":
268 pushing("", "e", len(stack) + depth)
269 stack.append("")
270 line = line[1:]
271 continue
272 if line[0] == "\\" and line[1] in ESCAPED_CHARS:
273 self.write("-%s\n" % encode(line[1]))
274 line = line[2:]
275 continue
276 if line[:2] == r"\\":
277 self.write("(BREAK\n)BREAK\n")
278 line = line[2:]
279 continue
280 m = _text_rx.match(line)
281 if m:
282 text = encode(m.group())
283 self.write("-%s\n" % text)
Fred Drake42f52981998-11-30 14:45:24 +0000284 line = line[m.end():]
Fred Drake96c00b01999-05-07 19:59:02 +0000285 continue
286 # special case because of \item[]
287 if line[0] == "]":
288 self.write("-]\n")
289 line = line[1:]
290 continue
291 # avoid infinite loops
292 extra = ""
293 if len(line) > 100:
294 extra = "..."
295 raise LaTeXFormatError("could not identify markup: %s%s"
296 % (`line[:100]`, extra))
297 while stack and stack[-1] in self.autoclosing:
298 self.write("-\\n\n")
299 self.write(")%s\n" % stack[-1])
300 popping(stack.pop(), "e", len(stack) + depth - 1)
301 if stack:
302 raise LaTeXFormatError("elements remain on stack: "
303 + string.join(stack, ", "))
304 # otherwise we just ran out of input here...
305
306 def convert(self):
307 self.subconvert()
308
309 def start_macro(self, name):
310 conversion = self.table.get(name, ([], 0, 0, 0, 0))
311 params, optional, empty, environ, nocontent = conversion
312 if empty:
313 self.write("e\n")
314 elif nocontent:
315 empty = 1
316 return params, optional, empty, environ
Fred Drake30a68c71998-11-23 16:59:39 +0000317
318
Fred Drakeaeea9811998-12-01 19:04:12 +0000319def convert(ifp, ofp, table={}, discards=(), autoclosing=()):
Fred Drake96c00b01999-05-07 19:59:02 +0000320 c = Conversion(ifp, ofp, table, discards, autoclosing)
Fred Drake30a68c71998-11-23 16:59:39 +0000321 try:
Fred Drake96c00b01999-05-07 19:59:02 +0000322 c.convert()
Fred Drake30a68c71998-11-23 16:59:39 +0000323 except IOError, (err, msg):
324 if err != errno.EPIPE:
325 raise
326
327
Fred Draked7acf021999-01-14 17:38:12 +0000328def skip_white(line):
329 while line and line[0] in " %\n\t":
330 line = string.lstrip(line[1:])
331 return line
332
333
Fred Drake30a68c71998-11-23 16:59:39 +0000334def main():
335 if len(sys.argv) == 2:
336 ifp = open(sys.argv[1])
337 ofp = sys.stdout
338 elif len(sys.argv) == 3:
339 ifp = open(sys.argv[1])
340 ofp = open(sys.argv[2], "w")
341 else:
342 usage()
343 sys.exit(2)
344 convert(ifp, ofp, {
Fred Drakeaeea9811998-12-01 19:04:12 +0000345 # entries have the form:
Fred Drake9d1c3b51999-01-14 18:10:09 +0000346 # name: ([attribute names], is1stOptional, isEmpty, isEnv, nocontent)
347 # attribute names can be:
348 # "string" -- normal attribute
349 # ("string",) -- sub-element with content of macro; like for \section
350 # ["string"] -- sub-element
Fred Draked7acf021999-01-14 17:38:12 +0000351 "appendix": ([], 0, 1, 0, 0),
352 "bifuncindex": (["name"], 0, 1, 0, 0),
353 "catcode": ([], 0, 1, 0, 0),
354 "cfuncdesc": (["type", "name", ("args",)], 0, 0, 1, 0),
355 "chapter": ([("title",)], 0, 0, 0, 0),
356 "chapter*": ([("title",)], 0, 0, 0, 0),
Fred Drake1453a8c1999-01-28 23:10:48 +0000357 "classdesc": (["name", ("args",)], 0, 0, 1, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000358 "ctypedesc": (["name"], 0, 0, 1, 0),
359 "cvardesc": (["type", "name"], 0, 0, 1, 0),
360 "datadesc": (["name"], 0, 0, 1, 0),
361 "declaremodule": (["id", "type", "name"], 1, 1, 0, 0),
362 "deprecated": (["release"], 0, 0, 0, 0),
363 "documentclass": (["classname"], 0, 1, 0, 0),
364 "excdesc": (["name"], 0, 0, 1, 0),
365 "funcdesc": (["name", ("args",)], 0, 0, 1, 0),
366 "funcdescni": (["name", ("args",)], 0, 0, 1, 0),
Fred Drake75930b31999-01-29 20:09:27 +0000367 "funcline": (["name"], 0, 0, 0, 0),
368 "funclineni": (["name"], 0, 0, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000369 "geq": ([], 0, 1, 0, 0),
370 "hline": ([], 0, 1, 0, 0),
371 "indexii": (["ie1", "ie2"], 0, 1, 0, 0),
372 "indexiii": (["ie1", "ie2", "ie3"], 0, 1, 0, 0),
373 "indexiv": (["ie1", "ie2", "ie3", "ie4"], 0, 1, 0, 0),
374 "indexname": ([], 0, 0, 0, 0),
375 "input": (["source"], 0, 1, 0, 0),
376 "item": ([("leader",)], 1, 0, 0, 0),
377 "label": (["id"], 0, 1, 0, 0),
378 "labelwidth": ([], 0, 1, 0, 0),
379 "LaTeX": ([], 0, 1, 0, 0),
380 "leftmargin": ([], 0, 1, 0, 0),
381 "leq": ([], 0, 1, 0, 0),
382 "lineii": ([["entry"], ["entry"]], 0, 0, 0, 1),
383 "lineiii": ([["entry"], ["entry"], ["entry"]], 0, 0, 0, 1),
384 "lineiv": ([["entry"], ["entry"], ["entry"], ["entry"]], 0, 0, 0, 1),
385 "localmoduletable": ([], 0, 1, 0, 0),
386 "makeindex": ([], 0, 1, 0, 0),
387 "makemodindex": ([], 0, 1, 0, 0),
388 "maketitle": ([], 0, 1, 0, 0),
389 "manpage": (["name", "section"], 0, 1, 0, 0),
390 "memberdesc": (["class", "name"], 1, 0, 1, 0),
391 "methoddesc": (["class", "name", ("args",)], 1, 0, 1, 0),
392 "methoddescni": (["class", "name", ("args",)], 1, 0, 1, 0),
Fred Drake3f3b0961999-01-28 23:49:37 +0000393 "methodline": (["class", "name"], 1, 0, 0, 0),
394 "methodlineni": (["class", "name"], 1, 0, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000395 "moduleauthor": (["name", "email"], 0, 1, 0, 0),
396 "opcodedesc": (["name", "var"], 0, 0, 1, 0),
397 "par": ([], 0, 1, 0, 0),
398 "paragraph": ([("title",)], 0, 0, 0, 0),
399 "renewcommand": (["macro"], 0, 0, 0, 0),
Fred Drake3effeed1999-01-14 21:18:52 +0000400 "rfc": (["num"], 0, 1, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000401 "section": ([("title",)], 0, 0, 0, 0),
402 "sectionauthor": (["name", "email"], 0, 1, 0, 0),
403 "seemodule": (["ref", "name"], 1, 0, 0, 0),
404 "stindex": (["type"], 0, 1, 0, 0),
405 "subparagraph": ([("title",)], 0, 0, 0, 0),
406 "subsection": ([("title",)], 0, 0, 0, 0),
407 "subsubsection": ([("title",)], 0, 0, 0, 0),
408 "list": (["bullet", "init"], 0, 0, 1, 0),
409 "tableii": (["colspec", "style",
410 ["entry"], ["entry"]], 0, 0, 1, 0),
411 "tableiii": (["colspec", "style",
412 ["entry"], ["entry"], ["entry"]], 0, 0, 1, 0),
413 "tableiv": (["colspec", "style",
414 ["entry"], ["entry"], ["entry"], ["entry"]], 0, 0, 1, 0),
415 "version": ([], 0, 1, 0, 0),
416 "versionadded": (["version"], 0, 1, 0, 0),
417 "versionchanged": (["version"], 0, 1, 0, 0),
418 "withsubitem": (["text"], 0, 0, 0, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000419 #
Fred Draked7acf021999-01-14 17:38:12 +0000420 "ABC": ([], 0, 1, 0, 0),
421 "ASCII": ([], 0, 1, 0, 0),
422 "C": ([], 0, 1, 0, 0),
423 "Cpp": ([], 0, 1, 0, 0),
424 "EOF": ([], 0, 1, 0, 0),
425 "e": ([], 0, 1, 0, 0),
426 "ldots": ([], 0, 1, 0, 0),
427 "NULL": ([], 0, 1, 0, 0),
428 "POSIX": ([], 0, 1, 0, 0),
429 "UNIX": ([], 0, 1, 0, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000430 #
431 # Things that will actually be going away!
432 #
Fred Draked7acf021999-01-14 17:38:12 +0000433 "fi": ([], 0, 1, 0, 0),
434 "ifhtml": ([], 0, 1, 0, 0),
435 "makeindex": ([], 0, 1, 0, 0),
436 "makemodindex": ([], 0, 1, 0, 0),
437 "maketitle": ([], 0, 1, 0, 0),
438 "noindent": ([], 0, 1, 0, 0),
439 "protect": ([], 0, 1, 0, 0),
440 "tableofcontents": ([], 0, 1, 0, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000441 },
442 discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle",
443 "noindent", "tableofcontents"],
444 autoclosing=["chapter", "section", "subsection", "subsubsection",
Fred Drakeaeea9811998-12-01 19:04:12 +0000445 "paragraph", "subparagraph", ])
Fred Drake30a68c71998-11-23 16:59:39 +0000446
447
448if __name__ == "__main__":
449 main()