blob: f922b4000cf18d0f7e1e0e213b4e902230dd969a [file] [log] [blame]
Fred Drake30a68c71998-11-23 16:59:39 +00001#! /usr/bin/env python
2
3"""Generate ESIS events based on a LaTeX source document and configuration
4data.
5
6
7"""
8__version__ = '$Revision$'
9
10import errno
11import re
12import string
13import StringIO
14import sys
15
16
17class Error(Exception):
18 pass
19
20class LaTeXFormatError(Error):
21 pass
22
23
24_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
25_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
26_begin_macro_rx = re.compile("[\\\\]([a-zA-Z]+[*]?)({|\\s*\n?)")
Fred Drake42f52981998-11-30 14:45:24 +000027_comment_rx = re.compile("%+ ?(.*)\n *")
Fred Drake30a68c71998-11-23 16:59:39 +000028_text_rx = re.compile(r"[^]%\\{}]+")
29_optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
30_parameter_rx = re.compile("[ \n]*{([^}]*)}")
31_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
32_start_group_rx = re.compile("[ \n]*{")
33_start_optional_rx = re.compile("[ \n]*[[]")
34
35
36_charmap = {}
37for c in map(chr, range(256)):
38 _charmap[c] = c
39_charmap["\n"] = r"\n"
40_charmap["\\"] = r"\\"
41del c
42
43def encode(s):
44 return string.join(map(_charmap.get, s), '')
45
46
Fred Drake42f52981998-11-30 14:45:24 +000047ESCAPED_CHARS = "$%#^ {}&~"
Fred Drake30a68c71998-11-23 16:59:39 +000048
49
50def subconvert(line, ofp, table, discards, autoclosing, knownempty,
51 endchar=None):
52 stack = []
53 while line:
54 if line[0] == endchar and not stack:
55 return line[1:]
56 m = _comment_rx.match(line)
57 if m:
58 text = m.group(1)
59 if text:
60 ofp.write("(COMMENT\n")
61 ofp.write("- %s \n" % encode(text))
62 ofp.write(")COMMENT\n")
63 ofp.write("-\\n\n")
Fred Drake42f52981998-11-30 14:45:24 +000064## else:
65## ofp.write("-\\n\n")
Fred Drake30a68c71998-11-23 16:59:39 +000066 line = line[m.end():]
67 continue
68 m = _begin_env_rx.match(line)
69 if m:
70 # re-write to use the macro handler
71 line = r"\%s %s" % (m.group(1), line[m.end():])
72 continue
73 m =_end_env_rx.match(line)
74 if m:
75 # end of environment
76 envname = m.group(1)
77 if envname == "document":
78 # special magic
79 for n in stack[1:]:
80 if n not in autoclosing:
81 raise LaTeXFormatError("open element on stack: " + `n`)
82 # should be more careful, but this is easier to code:
83 stack = []
84 ofp.write(")document\n")
85 elif envname == stack[-1]:
86 ofp.write(")%s\n" % envname)
87 del stack[-1]
88 else:
Fred Drake42f52981998-11-30 14:45:24 +000089 print stack
90 print envname
Fred Drake30a68c71998-11-23 16:59:39 +000091 raise LaTeXFormatError("environment close doesn't match")
92 line = line[m.end():]
93 continue
94 m = _begin_macro_rx.match(line)
95 if m:
96 # start of macro
97 macroname = m.group(1)
98 if macroname == "verbatim":
99 # really magic case!
100 pos = string.find(line, "\\end{verbatim}")
101 text = line[m.end(1):pos]
102 ofp.write("(verbatim\n")
103 ofp.write("-%s\n" % encode(text))
104 ofp.write(")verbatim\n")
105 line = line[pos + len("\\end{verbatim}"):]
106 continue
107 numbered = 1
108 if macroname[-1] == "*":
109 macroname = macroname[:-1]
110 numbered = 0
111 if macroname in autoclosing and macroname in stack:
112 while stack[-1] != macroname:
113 if stack[-1] and stack[-1] not in discards:
114 ofp.write(")%s\n-\\n\n" % stack[-1])
115 del stack[-1]
116 if macroname not in discards:
117 ofp.write("-\\n\n)%s\n-\\n\n" % macroname)
118 del stack[-1]
119 real_ofp = ofp
120 if macroname in discards:
121 ofp = StringIO.StringIO()
122 #
Fred Drake42f52981998-11-30 14:45:24 +0000123 conversion = table.get(macroname, ([], 0, 0, 0))
124 params, optional, empty, environ = conversion
Fred Drake30a68c71998-11-23 16:59:39 +0000125 empty = empty or knownempty(macroname)
126 if empty:
127 ofp.write("e\n")
128 if not numbered:
129 ofp.write("Anumbered TOKEN no\n")
130 # rip off the macroname
131 if params:
132 if optional and len(params) == 1:
133 line = line = line[m.end():]
134 else:
135 line = line[m.end(1):]
136 elif empty:
137 line = line[m.end(1):]
138 else:
139 line = line[m.end():]
140 #
141 # Very ugly special case to deal with \item[]. The catch is that
142 # this needs to occur outside the for loop that handles attribute
143 # parsing so we can 'continue' the outer loop.
144 #
145 if optional and type(params[0]) is type(()):
146 # the attribute name isn't used in this special case
147 stack.append(macroname)
148 ofp.write("(%s\n" % macroname)
149 m = _start_optional_rx.match(line)
150 if m:
151 line = line[m.end():]
152 line = subconvert(line, ofp, table, discards,
153 autoclosing, knownempty, endchar="]")
154 line = "}" + line
155 continue
156 # handle attribute mappings here:
157 for attrname in params:
158 if optional:
159 optional = 0
160 if type(attrname) is type(""):
161 m = _optional_rx.match(line)
162 if m:
163 line = line[m.end():]
164 ofp.write("A%s TOKEN %s\n"
165 % (attrname, encode(m.group(1))))
166 elif type(attrname) is type(()):
167 # This is a sub-element; but don't place the
168 # element we found on the stack (\section-like)
169 stack.append(macroname)
170 ofp.write("(%s\n" % macroname)
171 macroname = attrname[0]
172 m = _start_group_rx.match(line)
173 if m:
174 line = line[m.end():]
175 elif type(attrname) is type([]):
176 # A normal subelement.
177 attrname = attrname[0]
178 stack.append(macroname)
179 stack.append(attrname)
180 ofp.write("(%s\n" % macroname)
181 macroname = attrname
182 else:
183 m = _parameter_rx.match(line)
184 if not m:
185 raise LaTeXFormatError(
186 "could not extract parameter %s for %s: %s"
187 % (attrname, macroname, `line[:100]`))
188 value = m.group(1)
189 if _token_rx.match(value):
190 dtype = "TOKEN"
191 else:
192 dtype = "CDATA"
193 ofp.write("A%s %s %s\n"
194 % (attrname, dtype, encode(value)))
195 line = line[m.end():]
Fred Drake42f52981998-11-30 14:45:24 +0000196 if params and type(params[-1]) is type('') \
197 and (not empty) and not environ:
198 # attempt to strip off next '{'
199 m = _start_group_rx.match(line)
200 if not m:
201 raise LaTeXFormatError(
202 "non-empty element '%s' has no content: %s"
203 % (macroname, line[:12]))
204 line = line[m.end():]
Fred Drake30a68c71998-11-23 16:59:39 +0000205 stack.append(macroname)
206 ofp.write("(%s\n" % macroname)
207 if empty:
208 line = "}" + line
209 ofp = real_ofp
210 continue
211 if line[0] == "}":
212 # end of macro
213 macroname = stack[-1]
214 conversion = table.get(macroname)
215 if macroname \
216 and macroname not in discards \
217 and type(conversion) is not type(""):
218 # otherwise, it was just a bare group
219 ofp.write(")%s\n" % stack[-1])
220 del stack[-1]
221 line = line[1:]
222 continue
223 if line[0] == "{":
224 stack.append("")
225 line = line[1:]
226 continue
227 if line[0] == "\\" and line[1] in ESCAPED_CHARS:
228 ofp.write("-%s\n" % encode(line[1]))
229 line = line[2:]
230 continue
231 if line[:2] == r"\\":
232 ofp.write("(BREAK\n)BREAK\n")
233 line = line[2:]
234 continue
235 m = _text_rx.match(line)
236 if m:
237 text = encode(m.group())
238 ofp.write("-%s\n" % text)
239 line = line[m.end():]
240 continue
241 # special case because of \item[]
242 if line[0] == "]":
243 ofp.write("-]\n")
244 line = line[1:]
245 continue
246 # avoid infinite loops
247 extra = ""
248 if len(line) > 100:
249 extra = "..."
250 raise LaTeXFormatError("could not identify markup: %s%s"
251 % (`line[:100]`, extra))
Fred Drake42f52981998-11-30 14:45:24 +0000252 while stack and stack[-1] in autoclosing:
253 ofp.write("-\\n\n")
254 ofp.write(")%s\n" % stack[-1])
255 del stack[-1]
256 if stack:
257 raise LaTeXFormatError("elements remain on stack: "
258 + string.join(stack))
Fred Drake30a68c71998-11-23 16:59:39 +0000259
260
261def convert(ifp, ofp, table={}, discards=(), autoclosing=(), knownempties=()):
262 d = {}
263 for gi in knownempties:
264 d[gi] = gi
265 try:
266 subconvert(ifp.read(), ofp, table, discards, autoclosing, d.has_key)
267 except IOError, (err, msg):
268 if err != errno.EPIPE:
269 raise
270
271
272def main():
273 if len(sys.argv) == 2:
274 ifp = open(sys.argv[1])
275 ofp = sys.stdout
276 elif len(sys.argv) == 3:
277 ifp = open(sys.argv[1])
278 ofp = open(sys.argv[2], "w")
279 else:
280 usage()
281 sys.exit(2)
282 convert(ifp, ofp, {
283 # entries are name
284 # -> ([list of attribute names], first_is_optional, empty)
Fred Drake42f52981998-11-30 14:45:24 +0000285 "bifuncindex": (["name"], 0, 1, 0),
286 "cfuncdesc": (["type", "name", ("args",)], 0, 0, 1),
287 "chapter": ([("title",)], 0, 0, 0),
288 "chapter*": ([("title",)], 0, 0, 0),
289 "classdesc": (["name", ("constructor-args",)], 0, 0, 1),
290 "ctypedesc": (["name"], 0, 0, 1),
291 "cvardesc": (["type", "name"], 0, 0, 1),
292 "datadesc": (["name"], 0, 0, 1),
293 "declaremodule": (["id", "type", "name"], 1, 1, 0),
294 "deprecated": (["release"], 0, 0, 0),
295 "documentclass": (["classname"], 0, 1, 0),
296 "excdesc": (["name"], 0, 0, 1),
297 "funcdesc": (["name", ("args",)], 0, 0, 1),
298 "funcdescni": (["name", ("args",)], 0, 0, 1),
299 "geq": ([], 0, 1, 0),
300 "hline": ([], 0, 1, 0),
301 "indexii": (["ie1", "ie2"], 0, 1, 0),
302 "indexiii": (["ie1", "ie2", "ie3"], 0, 1, 0),
303 "indexiv": (["ie1", "ie2", "ie3", "ie4"], 0, 1, 0),
304 "indexname": ([], 0, 0, 0),
305 "input": (["source"], 0, 1, 0),
306 "item": ([("leader",)], 1, 0, 0),
307 "label": (["id"], 0, 1, 0),
308 "leq": ([], 0, 1, 0),
309 "manpage": (["name", "section"], 0, 1, 0),
310 "memberdesc": (["class", "name"], 1, 0, 1),
311 "methoddesc": (["class", "name", ("args",)], 1, 0, 1),
312 "methoddescni": (["class", "name", ("args",)], 1, 0, 1),
313 "moduleauthor": (["name", "email"], 0, 1, 0),
314 "opcodedesc": (["name", "var"], 0, 0, 1),
315 "par": ([], 0, 1, 0),
316 "paragraph": ([("title",)], 0, 0, 0),
317 "renewcommand": (["macro"], 0, 0, 0),
318 "rfc": (["number"], 0, 1, 0),
319 "section": ([("title",)], 0, 0, 0),
320 "sectionauthor": (["name", "email"], 0, 1, 0),
321 "seemodule": (["ref", "name"], 1, 0, 0),
322 "stindex": (["type"], 0, 1, 0),
323 "subparagraph": ([("title",)], 0, 0, 0),
324 "subsection": ([("title",)], 0, 0, 0),
325 "subsubsection": ([("title",)], 0, 0, 0),
326 "tableii": (["colspec", "style", "head1", "head2"], 0, 0, 1),
327 "tableiii": (["colspec", "style", "head1", "head2", "head3"], 0, 0, 1),
Fred Drake30a68c71998-11-23 16:59:39 +0000328 "tableiv": (["colspec", "style", "head1", "head2", "head3", "head4"],
Fred Drake42f52981998-11-30 14:45:24 +0000329 0, 0, 1),
330 "versionadded": (["version"], 0, 1, 0),
331 "versionchanged": (["version"], 0, 1, 0),
332 "withsubitem": (["text"], 0, 0, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000333 #
Fred Drake42f52981998-11-30 14:45:24 +0000334 "ABC": ([], 0, 1, 0),
335 "ASCII": ([], 0, 1, 0),
336 "C": ([], 0, 1, 0),
337 "Cpp": ([], 0, 1, 0),
338 "EOF": ([], 0, 1, 0),
339 "e": ([], 0, 1, 0),
340 "ldots": ([], 0, 1, 0),
341 "NULL": ([], 0, 1, 0),
342 "POSIX": ([], 0, 1, 0),
343 "UNIX": ([], 0, 1, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000344 #
345 # Things that will actually be going away!
346 #
Fred Drake42f52981998-11-30 14:45:24 +0000347 "fi": ([], 0, 1, 0),
348 "ifhtml": ([], 0, 1, 0),
349 "makeindex": ([], 0, 1, 0),
350 "makemodindex": ([], 0, 1, 0),
351 "maketitle": ([], 0, 1, 0),
352 "noindent": ([], 0, 1, 0),
353 "tableofcontents": ([], 0, 1, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000354 },
355 discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle",
356 "noindent", "tableofcontents"],
357 autoclosing=["chapter", "section", "subsection", "subsubsection",
358 "paragraph", "subparagraph", ],
359 knownempties=["appendix",
360 "maketitle", "makeindex", "makemodindex",
361 "localmoduletable"])
362
363
364if __name__ == "__main__":
365 main()