blob: 731bb51c67136e838766a9813d52d6f67145b307 [file] [log] [blame]
Fred Drake30a68c71998-11-23 16:59:39 +00001#! /usr/bin/env python
2
3"""Generate ESIS events based on a LaTeX source document and configuration
4data.
Fred Drake30a68c71998-11-23 16:59:39 +00005"""
6__version__ = '$Revision$'
7
8import errno
9import re
10import string
11import StringIO
12import sys
13
Fred Drakeaeea9811998-12-01 19:04:12 +000014from esistools import encode
15
Fred Drake30a68c71998-11-23 16:59:39 +000016
Fred Draked7acf021999-01-14 17:38:12 +000017DEBUG = 0
18
19
Fred Drake30a68c71998-11-23 16:59:39 +000020class Error(Exception):
21 pass
22
23class LaTeXFormatError(Error):
24 pass
25
26
27_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
28_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
29_begin_macro_rx = re.compile("[\\\\]([a-zA-Z]+[*]?)({|\\s*\n?)")
Fred Drake42f52981998-11-30 14:45:24 +000030_comment_rx = re.compile("%+ ?(.*)\n *")
Fred Drake30a68c71998-11-23 16:59:39 +000031_text_rx = re.compile(r"[^]%\\{}]+")
32_optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
Fred Drakeaeea9811998-12-01 19:04:12 +000033# _parameter_rx is this complicated to allow {...} inside a parameter;
34# this is useful to match tabular layout specifications like {c|p{24pt}}
35_parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
Fred Drake30a68c71998-11-23 16:59:39 +000036_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
37_start_group_rx = re.compile("[ \n]*{")
38_start_optional_rx = re.compile("[ \n]*[[]")
39
40
Fred Drake42f52981998-11-30 14:45:24 +000041ESCAPED_CHARS = "$%#^ {}&~"
Fred Drake30a68c71998-11-23 16:59:39 +000042
43
Fred Draked7acf021999-01-14 17:38:12 +000044def pushing(name, point, depth):
45 if DEBUG:
46 sys.stderr.write("%s<%s> at %s\n" % (" "*depth, name, point))
47
48def popping(name, point, depth):
49 if DEBUG:
50 sys.stderr.write("%s</%s> at %s\n" % (" "*depth, name, point))
51
52
53def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0):
54 if DEBUG and endchar:
55 sys.stderr.write("subconvert(%s, ..., endchar=%s)\n"
56 % (`line[:20]`, `endchar`))
Fred Drake30a68c71998-11-23 16:59:39 +000057 stack = []
58 while line:
59 if line[0] == endchar and not stack:
Fred Draked7acf021999-01-14 17:38:12 +000060 if DEBUG:
61 sys.stderr.write("subconvert() --> %s\n" % `line[1:21]`)
Fred Drake30a68c71998-11-23 16:59:39 +000062 return line[1:]
63 m = _comment_rx.match(line)
64 if m:
65 text = m.group(1)
66 if text:
Fred Draked7acf021999-01-14 17:38:12 +000067 ofp.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n" % encode(text))
Fred Drake30a68c71998-11-23 16:59:39 +000068 line = line[m.end():]
69 continue
70 m = _begin_env_rx.match(line)
71 if m:
72 # re-write to use the macro handler
73 line = r"\%s %s" % (m.group(1), line[m.end():])
74 continue
Fred Draked7acf021999-01-14 17:38:12 +000075 m = _end_env_rx.match(line)
Fred Drake30a68c71998-11-23 16:59:39 +000076 if m:
77 # end of environment
78 envname = m.group(1)
79 if envname == "document":
80 # special magic
81 for n in stack[1:]:
82 if n not in autoclosing:
83 raise LaTeXFormatError("open element on stack: " + `n`)
84 # should be more careful, but this is easier to code:
85 stack = []
86 ofp.write(")document\n")
87 elif envname == stack[-1]:
88 ofp.write(")%s\n" % envname)
89 del stack[-1]
Fred Draked7acf021999-01-14 17:38:12 +000090 popping(envname, "a", len(stack) + depth)
Fred Drake30a68c71998-11-23 16:59:39 +000091 else:
Fred Draked7acf021999-01-14 17:38:12 +000092 sys.stderr.write("stack: %s\n" % `stack`)
Fred Drakeaeea9811998-12-01 19:04:12 +000093 raise LaTeXFormatError(
94 "environment close for %s doesn't match" % envname)
Fred Drake30a68c71998-11-23 16:59:39 +000095 line = line[m.end():]
96 continue
97 m = _begin_macro_rx.match(line)
98 if m:
99 # start of macro
100 macroname = m.group(1)
101 if macroname == "verbatim":
102 # really magic case!
103 pos = string.find(line, "\\end{verbatim}")
104 text = line[m.end(1):pos]
105 ofp.write("(verbatim\n")
106 ofp.write("-%s\n" % encode(text))
107 ofp.write(")verbatim\n")
108 line = line[pos + len("\\end{verbatim}"):]
109 continue
110 numbered = 1
111 if macroname[-1] == "*":
112 macroname = macroname[:-1]
113 numbered = 0
114 if macroname in autoclosing and macroname in stack:
115 while stack[-1] != macroname:
116 if stack[-1] and stack[-1] not in discards:
117 ofp.write(")%s\n-\\n\n" % stack[-1])
Fred Draked7acf021999-01-14 17:38:12 +0000118 popping(stack[-1], "b", len(stack) + depth - 1)
Fred Drake30a68c71998-11-23 16:59:39 +0000119 del stack[-1]
120 if macroname not in discards:
121 ofp.write("-\\n\n)%s\n-\\n\n" % macroname)
Fred Draked7acf021999-01-14 17:38:12 +0000122 popping(macroname, "c", len(stack) + depth - 1)
Fred Drake30a68c71998-11-23 16:59:39 +0000123 del stack[-1]
124 real_ofp = ofp
125 if macroname in discards:
126 ofp = StringIO.StringIO()
127 #
Fred Draked7acf021999-01-14 17:38:12 +0000128 conversion = table.get(macroname, ([], 0, 0, 0, 0))
129 params, optional, empty, environ, nocontent = conversion
Fred Drake30a68c71998-11-23 16:59:39 +0000130 if empty:
131 ofp.write("e\n")
Fred Draked7acf021999-01-14 17:38:12 +0000132 elif nocontent:
133 empty = 1
Fred Drake30a68c71998-11-23 16:59:39 +0000134 if not numbered:
135 ofp.write("Anumbered TOKEN no\n")
Fred Draked7acf021999-01-14 17:38:12 +0000136 opened = 0
Fred Drake30a68c71998-11-23 16:59:39 +0000137 # rip off the macroname
138 if params:
139 if optional and len(params) == 1:
140 line = line = line[m.end():]
141 else:
142 line = line[m.end(1):]
143 elif empty:
144 line = line[m.end(1):]
145 else:
146 line = line[m.end():]
147 #
148 # Very ugly special case to deal with \item[]. The catch is that
149 # this needs to occur outside the for loop that handles attribute
150 # parsing so we can 'continue' the outer loop.
151 #
152 if optional and type(params[0]) is type(()):
153 # the attribute name isn't used in this special case
Fred Draked7acf021999-01-14 17:38:12 +0000154 pushing(macroname, "a", depth + len(stack))
Fred Drake30a68c71998-11-23 16:59:39 +0000155 stack.append(macroname)
156 ofp.write("(%s\n" % macroname)
157 m = _start_optional_rx.match(line)
158 if m:
159 line = line[m.end():]
160 line = subconvert(line, ofp, table, discards,
Fred Draked7acf021999-01-14 17:38:12 +0000161 autoclosing, endchar="]",
162 depth=depth + len(stack))
Fred Drake30a68c71998-11-23 16:59:39 +0000163 line = "}" + line
164 continue
165 # handle attribute mappings here:
166 for attrname in params:
167 if optional:
168 optional = 0
169 if type(attrname) is type(""):
170 m = _optional_rx.match(line)
171 if m:
172 line = line[m.end():]
173 ofp.write("A%s TOKEN %s\n"
174 % (attrname, encode(m.group(1))))
175 elif type(attrname) is type(()):
176 # This is a sub-element; but don't place the
177 # element we found on the stack (\section-like)
Fred Draked7acf021999-01-14 17:38:12 +0000178 pushing(macroname, "b", len(stack) + depth)
Fred Drake30a68c71998-11-23 16:59:39 +0000179 stack.append(macroname)
180 ofp.write("(%s\n" % macroname)
181 macroname = attrname[0]
182 m = _start_group_rx.match(line)
183 if m:
184 line = line[m.end():]
185 elif type(attrname) is type([]):
186 # A normal subelement.
187 attrname = attrname[0]
Fred Draked7acf021999-01-14 17:38:12 +0000188 if not opened:
189 opened = 1
190 ofp.write("(%s\n" % macroname)
191 pushing(macroname, "c", len(stack) + depth)
192 ofp.write("(%s\n" % attrname)
193 pushing(attrname, "sub-elem", len(stack) + depth + 1)
194 line = subconvert(skip_white(line)[1:], ofp, table,
195 discards, autoclosing, endchar="}",
196 depth=depth + len(stack) + 2)
197 popping(attrname, "sub-elem", len(stack) + depth + 1)
198 ofp.write(")%s\n" % attrname)
Fred Drake30a68c71998-11-23 16:59:39 +0000199 else:
200 m = _parameter_rx.match(line)
201 if not m:
202 raise LaTeXFormatError(
203 "could not extract parameter %s for %s: %s"
204 % (attrname, macroname, `line[:100]`))
205 value = m.group(1)
206 if _token_rx.match(value):
207 dtype = "TOKEN"
208 else:
209 dtype = "CDATA"
210 ofp.write("A%s %s %s\n"
211 % (attrname, dtype, encode(value)))
212 line = line[m.end():]
Fred Drake42f52981998-11-30 14:45:24 +0000213 if params and type(params[-1]) is type('') \
214 and (not empty) and not environ:
215 # attempt to strip off next '{'
216 m = _start_group_rx.match(line)
217 if not m:
218 raise LaTeXFormatError(
219 "non-empty element '%s' has no content: %s"
220 % (macroname, line[:12]))
221 line = line[m.end():]
Fred Draked7acf021999-01-14 17:38:12 +0000222 if not opened:
223 ofp.write("(%s\n" % macroname)
224 pushing(macroname, "d", len(stack) + depth)
Fred Drake30a68c71998-11-23 16:59:39 +0000225 if empty:
226 line = "}" + line
Fred Draked7acf021999-01-14 17:38:12 +0000227 stack.append(macroname)
Fred Drake30a68c71998-11-23 16:59:39 +0000228 ofp = real_ofp
229 continue
Fred Draked7acf021999-01-14 17:38:12 +0000230 if line[0] == endchar and not stack:
231 if DEBUG:
232 sys.stderr.write("subconvert() --> %s\n" % `line[1:21]`)
233 return line[1:]
Fred Drake30a68c71998-11-23 16:59:39 +0000234 if line[0] == "}":
235 # end of macro
236 macroname = stack[-1]
237 conversion = table.get(macroname)
238 if macroname \
239 and macroname not in discards \
240 and type(conversion) is not type(""):
241 # otherwise, it was just a bare group
242 ofp.write(")%s\n" % stack[-1])
Fred Draked7acf021999-01-14 17:38:12 +0000243 popping(macroname, "d", len(stack) + depth - 1)
Fred Drake30a68c71998-11-23 16:59:39 +0000244 del stack[-1]
245 line = line[1:]
246 continue
247 if line[0] == "{":
Fred Draked7acf021999-01-14 17:38:12 +0000248 pushing("", "e", len(stack) + depth)
Fred Drake30a68c71998-11-23 16:59:39 +0000249 stack.append("")
250 line = line[1:]
251 continue
252 if line[0] == "\\" and line[1] in ESCAPED_CHARS:
253 ofp.write("-%s\n" % encode(line[1]))
254 line = line[2:]
255 continue
256 if line[:2] == r"\\":
257 ofp.write("(BREAK\n)BREAK\n")
258 line = line[2:]
259 continue
260 m = _text_rx.match(line)
261 if m:
262 text = encode(m.group())
263 ofp.write("-%s\n" % text)
264 line = line[m.end():]
265 continue
266 # special case because of \item[]
267 if line[0] == "]":
268 ofp.write("-]\n")
269 line = line[1:]
270 continue
271 # avoid infinite loops
272 extra = ""
273 if len(line) > 100:
274 extra = "..."
275 raise LaTeXFormatError("could not identify markup: %s%s"
276 % (`line[:100]`, extra))
Fred Drake42f52981998-11-30 14:45:24 +0000277 while stack and stack[-1] in autoclosing:
278 ofp.write("-\\n\n")
279 ofp.write(")%s\n" % stack[-1])
Fred Draked7acf021999-01-14 17:38:12 +0000280 popping(stack[-1], "e", len(stack) + depth - 1)
Fred Drake42f52981998-11-30 14:45:24 +0000281 del stack[-1]
282 if stack:
283 raise LaTeXFormatError("elements remain on stack: "
284 + string.join(stack))
Fred Draked7acf021999-01-14 17:38:12 +0000285 # otherwise we just ran out of input here...
Fred Drake30a68c71998-11-23 16:59:39 +0000286
287
Fred Drakeaeea9811998-12-01 19:04:12 +0000288def convert(ifp, ofp, table={}, discards=(), autoclosing=()):
Fred Drake637ad471999-01-07 18:47:07 +0000289 lines = string.split(ifp.read(), "\n")
290 for i in range(len(lines)):
291 lines[i] = string.rstrip(lines[i])
292 data = string.join(lines, "\n")
Fred Drake30a68c71998-11-23 16:59:39 +0000293 try:
Fred Drake637ad471999-01-07 18:47:07 +0000294 subconvert(data, ofp, table, discards, autoclosing)
Fred Drake30a68c71998-11-23 16:59:39 +0000295 except IOError, (err, msg):
296 if err != errno.EPIPE:
297 raise
298
299
Fred Draked7acf021999-01-14 17:38:12 +0000300def skip_white(line):
301 while line and line[0] in " %\n\t":
302 line = string.lstrip(line[1:])
303 return line
304
305
Fred Drake30a68c71998-11-23 16:59:39 +0000306def main():
307 if len(sys.argv) == 2:
308 ifp = open(sys.argv[1])
309 ofp = sys.stdout
310 elif len(sys.argv) == 3:
311 ifp = open(sys.argv[1])
312 ofp = open(sys.argv[2], "w")
313 else:
314 usage()
315 sys.exit(2)
316 convert(ifp, ofp, {
Fred Drakeaeea9811998-12-01 19:04:12 +0000317 # entries have the form:
Fred Drake9d1c3b51999-01-14 18:10:09 +0000318 # name: ([attribute names], is1stOptional, isEmpty, isEnv, nocontent)
319 # attribute names can be:
320 # "string" -- normal attribute
321 # ("string",) -- sub-element with content of macro; like for \section
322 # ["string"] -- sub-element
Fred Draked7acf021999-01-14 17:38:12 +0000323 "appendix": ([], 0, 1, 0, 0),
324 "bifuncindex": (["name"], 0, 1, 0, 0),
325 "catcode": ([], 0, 1, 0, 0),
326 "cfuncdesc": (["type", "name", ("args",)], 0, 0, 1, 0),
327 "chapter": ([("title",)], 0, 0, 0, 0),
328 "chapter*": ([("title",)], 0, 0, 0, 0),
Fred Drake1453a8c1999-01-28 23:10:48 +0000329 "classdesc": (["name", ("args",)], 0, 0, 1, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000330 "ctypedesc": (["name"], 0, 0, 1, 0),
331 "cvardesc": (["type", "name"], 0, 0, 1, 0),
332 "datadesc": (["name"], 0, 0, 1, 0),
333 "declaremodule": (["id", "type", "name"], 1, 1, 0, 0),
334 "deprecated": (["release"], 0, 0, 0, 0),
335 "documentclass": (["classname"], 0, 1, 0, 0),
336 "excdesc": (["name"], 0, 0, 1, 0),
337 "funcdesc": (["name", ("args",)], 0, 0, 1, 0),
338 "funcdescni": (["name", ("args",)], 0, 0, 1, 0),
339 "geq": ([], 0, 1, 0, 0),
340 "hline": ([], 0, 1, 0, 0),
341 "indexii": (["ie1", "ie2"], 0, 1, 0, 0),
342 "indexiii": (["ie1", "ie2", "ie3"], 0, 1, 0, 0),
343 "indexiv": (["ie1", "ie2", "ie3", "ie4"], 0, 1, 0, 0),
344 "indexname": ([], 0, 0, 0, 0),
345 "input": (["source"], 0, 1, 0, 0),
346 "item": ([("leader",)], 1, 0, 0, 0),
347 "label": (["id"], 0, 1, 0, 0),
348 "labelwidth": ([], 0, 1, 0, 0),
349 "LaTeX": ([], 0, 1, 0, 0),
350 "leftmargin": ([], 0, 1, 0, 0),
351 "leq": ([], 0, 1, 0, 0),
352 "lineii": ([["entry"], ["entry"]], 0, 0, 0, 1),
353 "lineiii": ([["entry"], ["entry"], ["entry"]], 0, 0, 0, 1),
354 "lineiv": ([["entry"], ["entry"], ["entry"], ["entry"]], 0, 0, 0, 1),
355 "localmoduletable": ([], 0, 1, 0, 0),
356 "makeindex": ([], 0, 1, 0, 0),
357 "makemodindex": ([], 0, 1, 0, 0),
358 "maketitle": ([], 0, 1, 0, 0),
359 "manpage": (["name", "section"], 0, 1, 0, 0),
360 "memberdesc": (["class", "name"], 1, 0, 1, 0),
361 "methoddesc": (["class", "name", ("args",)], 1, 0, 1, 0),
362 "methoddescni": (["class", "name", ("args",)], 1, 0, 1, 0),
363 "moduleauthor": (["name", "email"], 0, 1, 0, 0),
364 "opcodedesc": (["name", "var"], 0, 0, 1, 0),
365 "par": ([], 0, 1, 0, 0),
366 "paragraph": ([("title",)], 0, 0, 0, 0),
367 "renewcommand": (["macro"], 0, 0, 0, 0),
Fred Drake3effeed1999-01-14 21:18:52 +0000368 "rfc": (["num"], 0, 1, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000369 "section": ([("title",)], 0, 0, 0, 0),
370 "sectionauthor": (["name", "email"], 0, 1, 0, 0),
371 "seemodule": (["ref", "name"], 1, 0, 0, 0),
372 "stindex": (["type"], 0, 1, 0, 0),
373 "subparagraph": ([("title",)], 0, 0, 0, 0),
374 "subsection": ([("title",)], 0, 0, 0, 0),
375 "subsubsection": ([("title",)], 0, 0, 0, 0),
376 "list": (["bullet", "init"], 0, 0, 1, 0),
377 "tableii": (["colspec", "style",
378 ["entry"], ["entry"]], 0, 0, 1, 0),
379 "tableiii": (["colspec", "style",
380 ["entry"], ["entry"], ["entry"]], 0, 0, 1, 0),
381 "tableiv": (["colspec", "style",
382 ["entry"], ["entry"], ["entry"], ["entry"]], 0, 0, 1, 0),
383 "version": ([], 0, 1, 0, 0),
384 "versionadded": (["version"], 0, 1, 0, 0),
385 "versionchanged": (["version"], 0, 1, 0, 0),
386 "withsubitem": (["text"], 0, 0, 0, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000387 #
Fred Draked7acf021999-01-14 17:38:12 +0000388 "ABC": ([], 0, 1, 0, 0),
389 "ASCII": ([], 0, 1, 0, 0),
390 "C": ([], 0, 1, 0, 0),
391 "Cpp": ([], 0, 1, 0, 0),
392 "EOF": ([], 0, 1, 0, 0),
393 "e": ([], 0, 1, 0, 0),
394 "ldots": ([], 0, 1, 0, 0),
395 "NULL": ([], 0, 1, 0, 0),
396 "POSIX": ([], 0, 1, 0, 0),
397 "UNIX": ([], 0, 1, 0, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000398 #
399 # Things that will actually be going away!
400 #
Fred Draked7acf021999-01-14 17:38:12 +0000401 "fi": ([], 0, 1, 0, 0),
402 "ifhtml": ([], 0, 1, 0, 0),
403 "makeindex": ([], 0, 1, 0, 0),
404 "makemodindex": ([], 0, 1, 0, 0),
405 "maketitle": ([], 0, 1, 0, 0),
406 "noindent": ([], 0, 1, 0, 0),
407 "protect": ([], 0, 1, 0, 0),
408 "tableofcontents": ([], 0, 1, 0, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000409 },
410 discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle",
411 "noindent", "tableofcontents"],
412 autoclosing=["chapter", "section", "subsection", "subsubsection",
Fred Drakeaeea9811998-12-01 19:04:12 +0000413 "paragraph", "subparagraph", ])
Fred Drake30a68c71998-11-23 16:59:39 +0000414
415
416if __name__ == "__main__":
417 main()