blob: f3c39f58b2ca0f65a4f92e30f8e9298fccd508e0 [file] [log] [blame]
Fred Drake30a68c71998-11-23 16:59:39 +00001#! /usr/bin/env python
2
3"""Generate ESIS events based on a LaTeX source document and configuration
4data.
5
6
7"""
8__version__ = '$Revision$'
9
10import errno
11import re
12import string
13import StringIO
14import sys
15
Fred Drakeaeea9811998-12-01 19:04:12 +000016from esistools import encode
17
Fred Drake30a68c71998-11-23 16:59:39 +000018
19class Error(Exception):
20 pass
21
22class LaTeXFormatError(Error):
23 pass
24
25
26_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
27_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
28_begin_macro_rx = re.compile("[\\\\]([a-zA-Z]+[*]?)({|\\s*\n?)")
Fred Drake42f52981998-11-30 14:45:24 +000029_comment_rx = re.compile("%+ ?(.*)\n *")
Fred Drake30a68c71998-11-23 16:59:39 +000030_text_rx = re.compile(r"[^]%\\{}]+")
31_optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
Fred Drakeaeea9811998-12-01 19:04:12 +000032# _parameter_rx is this complicated to allow {...} inside a parameter;
33# this is useful to match tabular layout specifications like {c|p{24pt}}
34_parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
Fred Drake30a68c71998-11-23 16:59:39 +000035_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
36_start_group_rx = re.compile("[ \n]*{")
37_start_optional_rx = re.compile("[ \n]*[[]")
38
39
Fred Drake42f52981998-11-30 14:45:24 +000040ESCAPED_CHARS = "$%#^ {}&~"
Fred Drake30a68c71998-11-23 16:59:39 +000041
42
Fred Drakeaeea9811998-12-01 19:04:12 +000043def subconvert(line, ofp, table, discards, autoclosing, endchar=None):
Fred Drake30a68c71998-11-23 16:59:39 +000044 stack = []
45 while line:
46 if line[0] == endchar and not stack:
47 return line[1:]
48 m = _comment_rx.match(line)
49 if m:
50 text = m.group(1)
51 if text:
52 ofp.write("(COMMENT\n")
53 ofp.write("- %s \n" % encode(text))
54 ofp.write(")COMMENT\n")
55 ofp.write("-\\n\n")
Fred Drake42f52981998-11-30 14:45:24 +000056## else:
57## ofp.write("-\\n\n")
Fred Drake30a68c71998-11-23 16:59:39 +000058 line = line[m.end():]
59 continue
60 m = _begin_env_rx.match(line)
61 if m:
62 # re-write to use the macro handler
63 line = r"\%s %s" % (m.group(1), line[m.end():])
64 continue
65 m =_end_env_rx.match(line)
66 if m:
67 # end of environment
68 envname = m.group(1)
69 if envname == "document":
70 # special magic
71 for n in stack[1:]:
72 if n not in autoclosing:
73 raise LaTeXFormatError("open element on stack: " + `n`)
74 # should be more careful, but this is easier to code:
75 stack = []
76 ofp.write(")document\n")
77 elif envname == stack[-1]:
78 ofp.write(")%s\n" % envname)
79 del stack[-1]
80 else:
Fred Drake42f52981998-11-30 14:45:24 +000081 print stack
Fred Drakeaeea9811998-12-01 19:04:12 +000082 raise LaTeXFormatError(
83 "environment close for %s doesn't match" % envname)
Fred Drake30a68c71998-11-23 16:59:39 +000084 line = line[m.end():]
85 continue
86 m = _begin_macro_rx.match(line)
87 if m:
88 # start of macro
89 macroname = m.group(1)
90 if macroname == "verbatim":
91 # really magic case!
92 pos = string.find(line, "\\end{verbatim}")
93 text = line[m.end(1):pos]
94 ofp.write("(verbatim\n")
95 ofp.write("-%s\n" % encode(text))
96 ofp.write(")verbatim\n")
97 line = line[pos + len("\\end{verbatim}"):]
98 continue
99 numbered = 1
100 if macroname[-1] == "*":
101 macroname = macroname[:-1]
102 numbered = 0
103 if macroname in autoclosing and macroname in stack:
104 while stack[-1] != macroname:
105 if stack[-1] and stack[-1] not in discards:
106 ofp.write(")%s\n-\\n\n" % stack[-1])
107 del stack[-1]
108 if macroname not in discards:
109 ofp.write("-\\n\n)%s\n-\\n\n" % macroname)
110 del stack[-1]
111 real_ofp = ofp
112 if macroname in discards:
113 ofp = StringIO.StringIO()
114 #
Fred Drake42f52981998-11-30 14:45:24 +0000115 conversion = table.get(macroname, ([], 0, 0, 0))
116 params, optional, empty, environ = conversion
Fred Drake30a68c71998-11-23 16:59:39 +0000117 if empty:
118 ofp.write("e\n")
119 if not numbered:
120 ofp.write("Anumbered TOKEN no\n")
121 # rip off the macroname
122 if params:
123 if optional and len(params) == 1:
124 line = line = line[m.end():]
125 else:
126 line = line[m.end(1):]
127 elif empty:
128 line = line[m.end(1):]
129 else:
130 line = line[m.end():]
131 #
132 # Very ugly special case to deal with \item[]. The catch is that
133 # this needs to occur outside the for loop that handles attribute
134 # parsing so we can 'continue' the outer loop.
135 #
136 if optional and type(params[0]) is type(()):
137 # the attribute name isn't used in this special case
138 stack.append(macroname)
139 ofp.write("(%s\n" % macroname)
140 m = _start_optional_rx.match(line)
141 if m:
142 line = line[m.end():]
143 line = subconvert(line, ofp, table, discards,
Fred Drakeaeea9811998-12-01 19:04:12 +0000144 autoclosing, endchar="]")
Fred Drake30a68c71998-11-23 16:59:39 +0000145 line = "}" + line
146 continue
147 # handle attribute mappings here:
148 for attrname in params:
149 if optional:
150 optional = 0
151 if type(attrname) is type(""):
152 m = _optional_rx.match(line)
153 if m:
154 line = line[m.end():]
155 ofp.write("A%s TOKEN %s\n"
156 % (attrname, encode(m.group(1))))
157 elif type(attrname) is type(()):
158 # This is a sub-element; but don't place the
159 # element we found on the stack (\section-like)
160 stack.append(macroname)
161 ofp.write("(%s\n" % macroname)
162 macroname = attrname[0]
163 m = _start_group_rx.match(line)
164 if m:
165 line = line[m.end():]
166 elif type(attrname) is type([]):
167 # A normal subelement.
168 attrname = attrname[0]
169 stack.append(macroname)
170 stack.append(attrname)
171 ofp.write("(%s\n" % macroname)
172 macroname = attrname
173 else:
174 m = _parameter_rx.match(line)
175 if not m:
176 raise LaTeXFormatError(
177 "could not extract parameter %s for %s: %s"
178 % (attrname, macroname, `line[:100]`))
179 value = m.group(1)
180 if _token_rx.match(value):
181 dtype = "TOKEN"
182 else:
183 dtype = "CDATA"
184 ofp.write("A%s %s %s\n"
185 % (attrname, dtype, encode(value)))
186 line = line[m.end():]
Fred Drake42f52981998-11-30 14:45:24 +0000187 if params and type(params[-1]) is type('') \
188 and (not empty) and not environ:
189 # attempt to strip off next '{'
190 m = _start_group_rx.match(line)
191 if not m:
192 raise LaTeXFormatError(
193 "non-empty element '%s' has no content: %s"
194 % (macroname, line[:12]))
195 line = line[m.end():]
Fred Drake30a68c71998-11-23 16:59:39 +0000196 stack.append(macroname)
197 ofp.write("(%s\n" % macroname)
198 if empty:
199 line = "}" + line
200 ofp = real_ofp
201 continue
202 if line[0] == "}":
203 # end of macro
204 macroname = stack[-1]
205 conversion = table.get(macroname)
206 if macroname \
207 and macroname not in discards \
208 and type(conversion) is not type(""):
209 # otherwise, it was just a bare group
210 ofp.write(")%s\n" % stack[-1])
211 del stack[-1]
212 line = line[1:]
213 continue
214 if line[0] == "{":
215 stack.append("")
216 line = line[1:]
217 continue
218 if line[0] == "\\" and line[1] in ESCAPED_CHARS:
219 ofp.write("-%s\n" % encode(line[1]))
220 line = line[2:]
221 continue
222 if line[:2] == r"\\":
223 ofp.write("(BREAK\n)BREAK\n")
224 line = line[2:]
225 continue
226 m = _text_rx.match(line)
227 if m:
228 text = encode(m.group())
229 ofp.write("-%s\n" % text)
230 line = line[m.end():]
231 continue
232 # special case because of \item[]
233 if line[0] == "]":
234 ofp.write("-]\n")
235 line = line[1:]
236 continue
237 # avoid infinite loops
238 extra = ""
239 if len(line) > 100:
240 extra = "..."
241 raise LaTeXFormatError("could not identify markup: %s%s"
242 % (`line[:100]`, extra))
Fred Drake42f52981998-11-30 14:45:24 +0000243 while stack and stack[-1] in autoclosing:
244 ofp.write("-\\n\n")
245 ofp.write(")%s\n" % stack[-1])
246 del stack[-1]
247 if stack:
248 raise LaTeXFormatError("elements remain on stack: "
249 + string.join(stack))
Fred Drake30a68c71998-11-23 16:59:39 +0000250
251
Fred Drakeaeea9811998-12-01 19:04:12 +0000252def convert(ifp, ofp, table={}, discards=(), autoclosing=()):
Fred Drake30a68c71998-11-23 16:59:39 +0000253 try:
Fred Drakeaeea9811998-12-01 19:04:12 +0000254 subconvert(ifp.read(), ofp, table, discards, autoclosing)
Fred Drake30a68c71998-11-23 16:59:39 +0000255 except IOError, (err, msg):
256 if err != errno.EPIPE:
257 raise
258
259
260def main():
261 if len(sys.argv) == 2:
262 ifp = open(sys.argv[1])
263 ofp = sys.stdout
264 elif len(sys.argv) == 3:
265 ifp = open(sys.argv[1])
266 ofp = open(sys.argv[2], "w")
267 else:
268 usage()
269 sys.exit(2)
270 convert(ifp, ofp, {
Fred Drakeaeea9811998-12-01 19:04:12 +0000271 # entries have the form:
272 # name: ([attribute names], first_is_optional, empty, isenv)
273 "appendix": ([], 0, 1, 0),
Fred Drake42f52981998-11-30 14:45:24 +0000274 "bifuncindex": (["name"], 0, 1, 0),
Fred Drakeaeea9811998-12-01 19:04:12 +0000275 "catcode": ([], 0, 1, 0),
Fred Drake42f52981998-11-30 14:45:24 +0000276 "cfuncdesc": (["type", "name", ("args",)], 0, 0, 1),
277 "chapter": ([("title",)], 0, 0, 0),
278 "chapter*": ([("title",)], 0, 0, 0),
279 "classdesc": (["name", ("constructor-args",)], 0, 0, 1),
280 "ctypedesc": (["name"], 0, 0, 1),
281 "cvardesc": (["type", "name"], 0, 0, 1),
282 "datadesc": (["name"], 0, 0, 1),
283 "declaremodule": (["id", "type", "name"], 1, 1, 0),
284 "deprecated": (["release"], 0, 0, 0),
285 "documentclass": (["classname"], 0, 1, 0),
286 "excdesc": (["name"], 0, 0, 1),
287 "funcdesc": (["name", ("args",)], 0, 0, 1),
288 "funcdescni": (["name", ("args",)], 0, 0, 1),
289 "geq": ([], 0, 1, 0),
290 "hline": ([], 0, 1, 0),
291 "indexii": (["ie1", "ie2"], 0, 1, 0),
292 "indexiii": (["ie1", "ie2", "ie3"], 0, 1, 0),
293 "indexiv": (["ie1", "ie2", "ie3", "ie4"], 0, 1, 0),
294 "indexname": ([], 0, 0, 0),
295 "input": (["source"], 0, 1, 0),
296 "item": ([("leader",)], 1, 0, 0),
297 "label": (["id"], 0, 1, 0),
Fred Drakeaeea9811998-12-01 19:04:12 +0000298 "labelwidth": ([], 0, 1, 0),
299 "LaTeX": ([], 0, 1, 0),
300 "leftmargin": ([], 0, 1, 0),
Fred Drake42f52981998-11-30 14:45:24 +0000301 "leq": ([], 0, 1, 0),
Fred Drakeaeea9811998-12-01 19:04:12 +0000302 "localmoduletable": ([], 0, 1, 0),
303 "makeindex": ([], 0, 1, 0),
304 "makemodindex": ([], 0, 1, 0),
305 "maketitle": ([], 0, 1, 0),
Fred Drake42f52981998-11-30 14:45:24 +0000306 "manpage": (["name", "section"], 0, 1, 0),
307 "memberdesc": (["class", "name"], 1, 0, 1),
308 "methoddesc": (["class", "name", ("args",)], 1, 0, 1),
309 "methoddescni": (["class", "name", ("args",)], 1, 0, 1),
310 "moduleauthor": (["name", "email"], 0, 1, 0),
311 "opcodedesc": (["name", "var"], 0, 0, 1),
312 "par": ([], 0, 1, 0),
313 "paragraph": ([("title",)], 0, 0, 0),
314 "renewcommand": (["macro"], 0, 0, 0),
315 "rfc": (["number"], 0, 1, 0),
316 "section": ([("title",)], 0, 0, 0),
317 "sectionauthor": (["name", "email"], 0, 1, 0),
318 "seemodule": (["ref", "name"], 1, 0, 0),
319 "stindex": (["type"], 0, 1, 0),
320 "subparagraph": ([("title",)], 0, 0, 0),
321 "subsection": ([("title",)], 0, 0, 0),
322 "subsubsection": ([("title",)], 0, 0, 0),
Fred Drakeaeea9811998-12-01 19:04:12 +0000323 "list": (["bullet", "init"], 0, 0, 1),
Fred Drake42f52981998-11-30 14:45:24 +0000324 "tableii": (["colspec", "style", "head1", "head2"], 0, 0, 1),
325 "tableiii": (["colspec", "style", "head1", "head2", "head3"], 0, 0, 1),
Fred Drake30a68c71998-11-23 16:59:39 +0000326 "tableiv": (["colspec", "style", "head1", "head2", "head3", "head4"],
Fred Drake42f52981998-11-30 14:45:24 +0000327 0, 0, 1),
Fred Drakeaeea9811998-12-01 19:04:12 +0000328 "version": ([], 0, 1, 0),
Fred Drake42f52981998-11-30 14:45:24 +0000329 "versionadded": (["version"], 0, 1, 0),
330 "versionchanged": (["version"], 0, 1, 0),
331 "withsubitem": (["text"], 0, 0, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000332 #
Fred Drake42f52981998-11-30 14:45:24 +0000333 "ABC": ([], 0, 1, 0),
334 "ASCII": ([], 0, 1, 0),
335 "C": ([], 0, 1, 0),
336 "Cpp": ([], 0, 1, 0),
337 "EOF": ([], 0, 1, 0),
338 "e": ([], 0, 1, 0),
339 "ldots": ([], 0, 1, 0),
340 "NULL": ([], 0, 1, 0),
341 "POSIX": ([], 0, 1, 0),
342 "UNIX": ([], 0, 1, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000343 #
344 # Things that will actually be going away!
345 #
Fred Drake42f52981998-11-30 14:45:24 +0000346 "fi": ([], 0, 1, 0),
347 "ifhtml": ([], 0, 1, 0),
348 "makeindex": ([], 0, 1, 0),
349 "makemodindex": ([], 0, 1, 0),
350 "maketitle": ([], 0, 1, 0),
351 "noindent": ([], 0, 1, 0),
352 "tableofcontents": ([], 0, 1, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000353 },
354 discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle",
355 "noindent", "tableofcontents"],
356 autoclosing=["chapter", "section", "subsection", "subsubsection",
Fred Drakeaeea9811998-12-01 19:04:12 +0000357 "paragraph", "subparagraph", ])
Fred Drake30a68c71998-11-23 16:59:39 +0000358
359
360if __name__ == "__main__":
361 main()