blob: 4a86747600fdbbb44d669c046e83e3cca619dad5 [file] [log] [blame]
Fred Drake30a68c71998-11-23 16:59:39 +00001#! /usr/bin/env python
2
3"""Generate ESIS events based on a LaTeX source document and configuration
4data.
5
6
7"""
8__version__ = '$Revision$'
9
10import errno
11import re
12import string
13import StringIO
14import sys
15
Fred Drakeaeea9811998-12-01 19:04:12 +000016from esistools import encode
17
Fred Drake30a68c71998-11-23 16:59:39 +000018
19class Error(Exception):
20 pass
21
22class LaTeXFormatError(Error):
23 pass
24
25
26_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
27_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
28_begin_macro_rx = re.compile("[\\\\]([a-zA-Z]+[*]?)({|\\s*\n?)")
Fred Drake42f52981998-11-30 14:45:24 +000029_comment_rx = re.compile("%+ ?(.*)\n *")
Fred Drake30a68c71998-11-23 16:59:39 +000030_text_rx = re.compile(r"[^]%\\{}]+")
31_optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
Fred Drakeaeea9811998-12-01 19:04:12 +000032# _parameter_rx is this complicated to allow {...} inside a parameter;
33# this is useful to match tabular layout specifications like {c|p{24pt}}
34_parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
Fred Drake30a68c71998-11-23 16:59:39 +000035_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
36_start_group_rx = re.compile("[ \n]*{")
37_start_optional_rx = re.compile("[ \n]*[[]")
38
39
Fred Drake42f52981998-11-30 14:45:24 +000040ESCAPED_CHARS = "$%#^ {}&~"
Fred Drake30a68c71998-11-23 16:59:39 +000041
42
Fred Drakeaeea9811998-12-01 19:04:12 +000043def subconvert(line, ofp, table, discards, autoclosing, endchar=None):
Fred Drake30a68c71998-11-23 16:59:39 +000044 stack = []
45 while line:
46 if line[0] == endchar and not stack:
47 return line[1:]
48 m = _comment_rx.match(line)
49 if m:
50 text = m.group(1)
51 if text:
52 ofp.write("(COMMENT\n")
53 ofp.write("- %s \n" % encode(text))
54 ofp.write(")COMMENT\n")
55 ofp.write("-\\n\n")
Fred Drake30a68c71998-11-23 16:59:39 +000056 line = line[m.end():]
57 continue
58 m = _begin_env_rx.match(line)
59 if m:
60 # re-write to use the macro handler
61 line = r"\%s %s" % (m.group(1), line[m.end():])
62 continue
63 m =_end_env_rx.match(line)
64 if m:
65 # end of environment
66 envname = m.group(1)
67 if envname == "document":
68 # special magic
69 for n in stack[1:]:
70 if n not in autoclosing:
71 raise LaTeXFormatError("open element on stack: " + `n`)
72 # should be more careful, but this is easier to code:
73 stack = []
74 ofp.write(")document\n")
75 elif envname == stack[-1]:
76 ofp.write(")%s\n" % envname)
77 del stack[-1]
78 else:
Fred Drake42f52981998-11-30 14:45:24 +000079 print stack
Fred Drakeaeea9811998-12-01 19:04:12 +000080 raise LaTeXFormatError(
81 "environment close for %s doesn't match" % envname)
Fred Drake30a68c71998-11-23 16:59:39 +000082 line = line[m.end():]
83 continue
84 m = _begin_macro_rx.match(line)
85 if m:
86 # start of macro
87 macroname = m.group(1)
88 if macroname == "verbatim":
89 # really magic case!
90 pos = string.find(line, "\\end{verbatim}")
91 text = line[m.end(1):pos]
92 ofp.write("(verbatim\n")
93 ofp.write("-%s\n" % encode(text))
94 ofp.write(")verbatim\n")
95 line = line[pos + len("\\end{verbatim}"):]
96 continue
97 numbered = 1
98 if macroname[-1] == "*":
99 macroname = macroname[:-1]
100 numbered = 0
101 if macroname in autoclosing and macroname in stack:
102 while stack[-1] != macroname:
103 if stack[-1] and stack[-1] not in discards:
104 ofp.write(")%s\n-\\n\n" % stack[-1])
105 del stack[-1]
106 if macroname not in discards:
107 ofp.write("-\\n\n)%s\n-\\n\n" % macroname)
108 del stack[-1]
109 real_ofp = ofp
110 if macroname in discards:
111 ofp = StringIO.StringIO()
112 #
Fred Drake42f52981998-11-30 14:45:24 +0000113 conversion = table.get(macroname, ([], 0, 0, 0))
114 params, optional, empty, environ = conversion
Fred Drake30a68c71998-11-23 16:59:39 +0000115 if empty:
116 ofp.write("e\n")
117 if not numbered:
118 ofp.write("Anumbered TOKEN no\n")
119 # rip off the macroname
120 if params:
121 if optional and len(params) == 1:
122 line = line = line[m.end():]
123 else:
124 line = line[m.end(1):]
125 elif empty:
126 line = line[m.end(1):]
127 else:
128 line = line[m.end():]
129 #
130 # Very ugly special case to deal with \item[]. The catch is that
131 # this needs to occur outside the for loop that handles attribute
132 # parsing so we can 'continue' the outer loop.
133 #
134 if optional and type(params[0]) is type(()):
135 # the attribute name isn't used in this special case
136 stack.append(macroname)
137 ofp.write("(%s\n" % macroname)
138 m = _start_optional_rx.match(line)
139 if m:
140 line = line[m.end():]
141 line = subconvert(line, ofp, table, discards,
Fred Drakeaeea9811998-12-01 19:04:12 +0000142 autoclosing, endchar="]")
Fred Drake30a68c71998-11-23 16:59:39 +0000143 line = "}" + line
144 continue
145 # handle attribute mappings here:
146 for attrname in params:
147 if optional:
148 optional = 0
149 if type(attrname) is type(""):
150 m = _optional_rx.match(line)
151 if m:
152 line = line[m.end():]
153 ofp.write("A%s TOKEN %s\n"
154 % (attrname, encode(m.group(1))))
155 elif type(attrname) is type(()):
156 # This is a sub-element; but don't place the
157 # element we found on the stack (\section-like)
158 stack.append(macroname)
159 ofp.write("(%s\n" % macroname)
160 macroname = attrname[0]
161 m = _start_group_rx.match(line)
162 if m:
163 line = line[m.end():]
164 elif type(attrname) is type([]):
165 # A normal subelement.
166 attrname = attrname[0]
167 stack.append(macroname)
168 stack.append(attrname)
169 ofp.write("(%s\n" % macroname)
170 macroname = attrname
171 else:
172 m = _parameter_rx.match(line)
173 if not m:
174 raise LaTeXFormatError(
175 "could not extract parameter %s for %s: %s"
176 % (attrname, macroname, `line[:100]`))
177 value = m.group(1)
178 if _token_rx.match(value):
179 dtype = "TOKEN"
180 else:
181 dtype = "CDATA"
182 ofp.write("A%s %s %s\n"
183 % (attrname, dtype, encode(value)))
184 line = line[m.end():]
Fred Drake42f52981998-11-30 14:45:24 +0000185 if params and type(params[-1]) is type('') \
186 and (not empty) and not environ:
187 # attempt to strip off next '{'
188 m = _start_group_rx.match(line)
189 if not m:
190 raise LaTeXFormatError(
191 "non-empty element '%s' has no content: %s"
192 % (macroname, line[:12]))
193 line = line[m.end():]
Fred Drake30a68c71998-11-23 16:59:39 +0000194 stack.append(macroname)
195 ofp.write("(%s\n" % macroname)
196 if empty:
197 line = "}" + line
198 ofp = real_ofp
199 continue
200 if line[0] == "}":
201 # end of macro
202 macroname = stack[-1]
203 conversion = table.get(macroname)
204 if macroname \
205 and macroname not in discards \
206 and type(conversion) is not type(""):
207 # otherwise, it was just a bare group
208 ofp.write(")%s\n" % stack[-1])
209 del stack[-1]
210 line = line[1:]
211 continue
212 if line[0] == "{":
213 stack.append("")
214 line = line[1:]
215 continue
216 if line[0] == "\\" and line[1] in ESCAPED_CHARS:
217 ofp.write("-%s\n" % encode(line[1]))
218 line = line[2:]
219 continue
220 if line[:2] == r"\\":
221 ofp.write("(BREAK\n)BREAK\n")
222 line = line[2:]
223 continue
224 m = _text_rx.match(line)
225 if m:
226 text = encode(m.group())
227 ofp.write("-%s\n" % text)
228 line = line[m.end():]
229 continue
230 # special case because of \item[]
231 if line[0] == "]":
232 ofp.write("-]\n")
233 line = line[1:]
234 continue
235 # avoid infinite loops
236 extra = ""
237 if len(line) > 100:
238 extra = "..."
239 raise LaTeXFormatError("could not identify markup: %s%s"
240 % (`line[:100]`, extra))
Fred Drake42f52981998-11-30 14:45:24 +0000241 while stack and stack[-1] in autoclosing:
242 ofp.write("-\\n\n")
243 ofp.write(")%s\n" % stack[-1])
244 del stack[-1]
245 if stack:
246 raise LaTeXFormatError("elements remain on stack: "
247 + string.join(stack))
Fred Drake30a68c71998-11-23 16:59:39 +0000248
249
Fred Drakeaeea9811998-12-01 19:04:12 +0000250def convert(ifp, ofp, table={}, discards=(), autoclosing=()):
Fred Drake637ad471999-01-07 18:47:07 +0000251 lines = string.split(ifp.read(), "\n")
252 for i in range(len(lines)):
253 lines[i] = string.rstrip(lines[i])
254 data = string.join(lines, "\n")
Fred Drake30a68c71998-11-23 16:59:39 +0000255 try:
Fred Drake637ad471999-01-07 18:47:07 +0000256 subconvert(data, ofp, table, discards, autoclosing)
Fred Drake30a68c71998-11-23 16:59:39 +0000257 except IOError, (err, msg):
258 if err != errno.EPIPE:
259 raise
260
261
262def main():
263 if len(sys.argv) == 2:
264 ifp = open(sys.argv[1])
265 ofp = sys.stdout
266 elif len(sys.argv) == 3:
267 ifp = open(sys.argv[1])
268 ofp = open(sys.argv[2], "w")
269 else:
270 usage()
271 sys.exit(2)
272 convert(ifp, ofp, {
Fred Drakeaeea9811998-12-01 19:04:12 +0000273 # entries have the form:
274 # name: ([attribute names], first_is_optional, empty, isenv)
275 "appendix": ([], 0, 1, 0),
Fred Drake42f52981998-11-30 14:45:24 +0000276 "bifuncindex": (["name"], 0, 1, 0),
Fred Drakeaeea9811998-12-01 19:04:12 +0000277 "catcode": ([], 0, 1, 0),
Fred Drake42f52981998-11-30 14:45:24 +0000278 "cfuncdesc": (["type", "name", ("args",)], 0, 0, 1),
279 "chapter": ([("title",)], 0, 0, 0),
280 "chapter*": ([("title",)], 0, 0, 0),
281 "classdesc": (["name", ("constructor-args",)], 0, 0, 1),
282 "ctypedesc": (["name"], 0, 0, 1),
283 "cvardesc": (["type", "name"], 0, 0, 1),
284 "datadesc": (["name"], 0, 0, 1),
285 "declaremodule": (["id", "type", "name"], 1, 1, 0),
286 "deprecated": (["release"], 0, 0, 0),
287 "documentclass": (["classname"], 0, 1, 0),
288 "excdesc": (["name"], 0, 0, 1),
289 "funcdesc": (["name", ("args",)], 0, 0, 1),
290 "funcdescni": (["name", ("args",)], 0, 0, 1),
291 "geq": ([], 0, 1, 0),
292 "hline": ([], 0, 1, 0),
293 "indexii": (["ie1", "ie2"], 0, 1, 0),
294 "indexiii": (["ie1", "ie2", "ie3"], 0, 1, 0),
295 "indexiv": (["ie1", "ie2", "ie3", "ie4"], 0, 1, 0),
296 "indexname": ([], 0, 0, 0),
297 "input": (["source"], 0, 1, 0),
298 "item": ([("leader",)], 1, 0, 0),
299 "label": (["id"], 0, 1, 0),
Fred Drakeaeea9811998-12-01 19:04:12 +0000300 "labelwidth": ([], 0, 1, 0),
301 "LaTeX": ([], 0, 1, 0),
302 "leftmargin": ([], 0, 1, 0),
Fred Drake42f52981998-11-30 14:45:24 +0000303 "leq": ([], 0, 1, 0),
Fred Drakeaeea9811998-12-01 19:04:12 +0000304 "localmoduletable": ([], 0, 1, 0),
305 "makeindex": ([], 0, 1, 0),
306 "makemodindex": ([], 0, 1, 0),
307 "maketitle": ([], 0, 1, 0),
Fred Drake42f52981998-11-30 14:45:24 +0000308 "manpage": (["name", "section"], 0, 1, 0),
309 "memberdesc": (["class", "name"], 1, 0, 1),
310 "methoddesc": (["class", "name", ("args",)], 1, 0, 1),
311 "methoddescni": (["class", "name", ("args",)], 1, 0, 1),
312 "moduleauthor": (["name", "email"], 0, 1, 0),
313 "opcodedesc": (["name", "var"], 0, 0, 1),
314 "par": ([], 0, 1, 0),
315 "paragraph": ([("title",)], 0, 0, 0),
316 "renewcommand": (["macro"], 0, 0, 0),
317 "rfc": (["number"], 0, 1, 0),
318 "section": ([("title",)], 0, 0, 0),
319 "sectionauthor": (["name", "email"], 0, 1, 0),
320 "seemodule": (["ref", "name"], 1, 0, 0),
321 "stindex": (["type"], 0, 1, 0),
322 "subparagraph": ([("title",)], 0, 0, 0),
323 "subsection": ([("title",)], 0, 0, 0),
324 "subsubsection": ([("title",)], 0, 0, 0),
Fred Drakeaeea9811998-12-01 19:04:12 +0000325 "list": (["bullet", "init"], 0, 0, 1),
Fred Drake42f52981998-11-30 14:45:24 +0000326 "tableii": (["colspec", "style", "head1", "head2"], 0, 0, 1),
327 "tableiii": (["colspec", "style", "head1", "head2", "head3"], 0, 0, 1),
Fred Drake30a68c71998-11-23 16:59:39 +0000328 "tableiv": (["colspec", "style", "head1", "head2", "head3", "head4"],
Fred Drake42f52981998-11-30 14:45:24 +0000329 0, 0, 1),
Fred Drakeaeea9811998-12-01 19:04:12 +0000330 "version": ([], 0, 1, 0),
Fred Drake42f52981998-11-30 14:45:24 +0000331 "versionadded": (["version"], 0, 1, 0),
332 "versionchanged": (["version"], 0, 1, 0),
333 "withsubitem": (["text"], 0, 0, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000334 #
Fred Drake42f52981998-11-30 14:45:24 +0000335 "ABC": ([], 0, 1, 0),
336 "ASCII": ([], 0, 1, 0),
337 "C": ([], 0, 1, 0),
338 "Cpp": ([], 0, 1, 0),
339 "EOF": ([], 0, 1, 0),
340 "e": ([], 0, 1, 0),
341 "ldots": ([], 0, 1, 0),
342 "NULL": ([], 0, 1, 0),
343 "POSIX": ([], 0, 1, 0),
344 "UNIX": ([], 0, 1, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000345 #
346 # Things that will actually be going away!
347 #
Fred Drake42f52981998-11-30 14:45:24 +0000348 "fi": ([], 0, 1, 0),
349 "ifhtml": ([], 0, 1, 0),
350 "makeindex": ([], 0, 1, 0),
351 "makemodindex": ([], 0, 1, 0),
352 "maketitle": ([], 0, 1, 0),
353 "noindent": ([], 0, 1, 0),
354 "tableofcontents": ([], 0, 1, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000355 },
356 discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle",
357 "noindent", "tableofcontents"],
358 autoclosing=["chapter", "section", "subsection", "subsubsection",
Fred Drakeaeea9811998-12-01 19:04:12 +0000359 "paragraph", "subparagraph", ])
Fred Drake30a68c71998-11-23 16:59:39 +0000360
361
362if __name__ == "__main__":
363 main()