blob: afa2d865845f3eaa533a4cb316bcb9d83efe7dc3 [file] [log] [blame]
Fred Drake30a68c71998-11-23 16:59:39 +00001#! /usr/bin/env python
2
3"""Generate ESIS events based on a LaTeX source document and configuration
4data.
5
6
7"""
8__version__ = '$Revision$'
9
10import errno
11import re
12import string
13import StringIO
14import sys
15
16
17class Error(Exception):
18 pass
19
20class LaTeXFormatError(Error):
21 pass
22
23
24_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
25_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
26_begin_macro_rx = re.compile("[\\\\]([a-zA-Z]+[*]?)({|\\s*\n?)")
27_comment_rx = re.compile("%+[ \t]*(.*)\n")
28_text_rx = re.compile(r"[^]%\\{}]+")
29_optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
30_parameter_rx = re.compile("[ \n]*{([^}]*)}")
31_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
32_start_group_rx = re.compile("[ \n]*{")
33_start_optional_rx = re.compile("[ \n]*[[]")
34
35
36_charmap = {}
37for c in map(chr, range(256)):
38 _charmap[c] = c
39_charmap["\n"] = r"\n"
40_charmap["\\"] = r"\\"
41del c
42
43def encode(s):
44 return string.join(map(_charmap.get, s), '')
45
46
47ESCAPED_CHARS = "$%#^ {}"
48
49
50def subconvert(line, ofp, table, discards, autoclosing, knownempty,
51 endchar=None):
52 stack = []
53 while line:
54 if line[0] == endchar and not stack:
55 return line[1:]
56 m = _comment_rx.match(line)
57 if m:
58 text = m.group(1)
59 if text:
60 ofp.write("(COMMENT\n")
61 ofp.write("- %s \n" % encode(text))
62 ofp.write(")COMMENT\n")
63 ofp.write("-\\n\n")
64 else:
65 ofp.write("-\\n\n")
66 line = line[m.end():]
67 continue
68 m = _begin_env_rx.match(line)
69 if m:
70 # re-write to use the macro handler
71 line = r"\%s %s" % (m.group(1), line[m.end():])
72 continue
73 m =_end_env_rx.match(line)
74 if m:
75 # end of environment
76 envname = m.group(1)
77 if envname == "document":
78 # special magic
79 for n in stack[1:]:
80 if n not in autoclosing:
81 raise LaTeXFormatError("open element on stack: " + `n`)
82 # should be more careful, but this is easier to code:
83 stack = []
84 ofp.write(")document\n")
85 elif envname == stack[-1]:
86 ofp.write(")%s\n" % envname)
87 del stack[-1]
88 else:
89 raise LaTeXFormatError("environment close doesn't match")
90 line = line[m.end():]
91 continue
92 m = _begin_macro_rx.match(line)
93 if m:
94 # start of macro
95 macroname = m.group(1)
96 if macroname == "verbatim":
97 # really magic case!
98 pos = string.find(line, "\\end{verbatim}")
99 text = line[m.end(1):pos]
100 ofp.write("(verbatim\n")
101 ofp.write("-%s\n" % encode(text))
102 ofp.write(")verbatim\n")
103 line = line[pos + len("\\end{verbatim}"):]
104 continue
105 numbered = 1
106 if macroname[-1] == "*":
107 macroname = macroname[:-1]
108 numbered = 0
109 if macroname in autoclosing and macroname in stack:
110 while stack[-1] != macroname:
111 if stack[-1] and stack[-1] not in discards:
112 ofp.write(")%s\n-\\n\n" % stack[-1])
113 del stack[-1]
114 if macroname not in discards:
115 ofp.write("-\\n\n)%s\n-\\n\n" % macroname)
116 del stack[-1]
117 real_ofp = ofp
118 if macroname in discards:
119 ofp = StringIO.StringIO()
120 #
121 conversion = table.get(macroname, ([], 0, 0))
122 params, optional, empty = conversion
123 empty = empty or knownempty(macroname)
124 if empty:
125 ofp.write("e\n")
126 if not numbered:
127 ofp.write("Anumbered TOKEN no\n")
128 # rip off the macroname
129 if params:
130 if optional and len(params) == 1:
131 line = line = line[m.end():]
132 else:
133 line = line[m.end(1):]
134 elif empty:
135 line = line[m.end(1):]
136 else:
137 line = line[m.end():]
138 #
139 # Very ugly special case to deal with \item[]. The catch is that
140 # this needs to occur outside the for loop that handles attribute
141 # parsing so we can 'continue' the outer loop.
142 #
143 if optional and type(params[0]) is type(()):
144 # the attribute name isn't used in this special case
145 stack.append(macroname)
146 ofp.write("(%s\n" % macroname)
147 m = _start_optional_rx.match(line)
148 if m:
149 line = line[m.end():]
150 line = subconvert(line, ofp, table, discards,
151 autoclosing, knownempty, endchar="]")
152 line = "}" + line
153 continue
154 # handle attribute mappings here:
155 for attrname in params:
156 if optional:
157 optional = 0
158 if type(attrname) is type(""):
159 m = _optional_rx.match(line)
160 if m:
161 line = line[m.end():]
162 ofp.write("A%s TOKEN %s\n"
163 % (attrname, encode(m.group(1))))
164 elif type(attrname) is type(()):
165 # This is a sub-element; but don't place the
166 # element we found on the stack (\section-like)
167 stack.append(macroname)
168 ofp.write("(%s\n" % macroname)
169 macroname = attrname[0]
170 m = _start_group_rx.match(line)
171 if m:
172 line = line[m.end():]
173 elif type(attrname) is type([]):
174 # A normal subelement.
175 attrname = attrname[0]
176 stack.append(macroname)
177 stack.append(attrname)
178 ofp.write("(%s\n" % macroname)
179 macroname = attrname
180 else:
181 m = _parameter_rx.match(line)
182 if not m:
183 raise LaTeXFormatError(
184 "could not extract parameter %s for %s: %s"
185 % (attrname, macroname, `line[:100]`))
186 value = m.group(1)
187 if _token_rx.match(value):
188 dtype = "TOKEN"
189 else:
190 dtype = "CDATA"
191 ofp.write("A%s %s %s\n"
192 % (attrname, dtype, encode(value)))
193 line = line[m.end():]
194 stack.append(macroname)
195 ofp.write("(%s\n" % macroname)
196 if empty:
197 line = "}" + line
198 ofp = real_ofp
199 continue
200 if line[0] == "}":
201 # end of macro
202 macroname = stack[-1]
203 conversion = table.get(macroname)
204 if macroname \
205 and macroname not in discards \
206 and type(conversion) is not type(""):
207 # otherwise, it was just a bare group
208 ofp.write(")%s\n" % stack[-1])
209 del stack[-1]
210 line = line[1:]
211 continue
212 if line[0] == "{":
213 stack.append("")
214 line = line[1:]
215 continue
216 if line[0] == "\\" and line[1] in ESCAPED_CHARS:
217 ofp.write("-%s\n" % encode(line[1]))
218 line = line[2:]
219 continue
220 if line[:2] == r"\\":
221 ofp.write("(BREAK\n)BREAK\n")
222 line = line[2:]
223 continue
224 m = _text_rx.match(line)
225 if m:
226 text = encode(m.group())
227 ofp.write("-%s\n" % text)
228 line = line[m.end():]
229 continue
230 # special case because of \item[]
231 if line[0] == "]":
232 ofp.write("-]\n")
233 line = line[1:]
234 continue
235 # avoid infinite loops
236 extra = ""
237 if len(line) > 100:
238 extra = "..."
239 raise LaTeXFormatError("could not identify markup: %s%s"
240 % (`line[:100]`, extra))
241
242
243def convert(ifp, ofp, table={}, discards=(), autoclosing=(), knownempties=()):
244 d = {}
245 for gi in knownempties:
246 d[gi] = gi
247 try:
248 subconvert(ifp.read(), ofp, table, discards, autoclosing, d.has_key)
249 except IOError, (err, msg):
250 if err != errno.EPIPE:
251 raise
252
253
254def main():
255 if len(sys.argv) == 2:
256 ifp = open(sys.argv[1])
257 ofp = sys.stdout
258 elif len(sys.argv) == 3:
259 ifp = open(sys.argv[1])
260 ofp = open(sys.argv[2], "w")
261 else:
262 usage()
263 sys.exit(2)
264 convert(ifp, ofp, {
265 # entries are name
266 # -> ([list of attribute names], first_is_optional, empty)
267 "cfuncdesc": (["type", "name", ("args",)], 0, 0),
268 "chapter": ([("title",)], 0, 0),
269 "chapter*": ([("title",)], 0, 0),
270 "classdesc": (["name", ("constructor-args",)], 0, 0),
271 "ctypedesc": (["name"], 0, 0),
272 "cvardesc": (["type", "name"], 0, 0),
273 "datadesc": (["name"], 0, 0),
274 "declaremodule": (["id", "type", "name"], 1, 1),
275 "deprecated": (["release"], 0, 1),
276 "documentclass": (["classname"], 0, 1),
277 "excdesc": (["name"], 0, 0),
278 "funcdesc": (["name", ("args",)], 0, 0),
279 "funcdescni": (["name", ("args",)], 0, 0),
280 "indexii": (["ie1", "ie2"], 0, 1),
281 "indexiii": (["ie1", "ie2", "ie3"], 0, 1),
282 "indexiv": (["ie1", "ie2", "ie3", "ie4"], 0, 1),
283 "input": (["source"], 0, 1),
284 "item": ([("leader",)], 1, 0),
285 "label": (["id"], 0, 1),
286 "manpage": (["name", "section"], 0, 1),
287 "memberdesc": (["class", "name"], 1, 0),
288 "methoddesc": (["class", "name", ("args",)], 1, 0),
289 "methoddescni": (["class", "name", ("args",)], 1, 0),
290 "opcodedesc": (["name", "var"], 0, 0),
291 "par": ([], 0, 1),
292 "paragraph": ([("title",)], 0, 0),
293 "rfc": (["number"], 0, 1),
294 "section": ([("title",)], 0, 0),
295 "seemodule": (["ref", "name"], 1, 0),
296 "subparagraph": ([("title",)], 0, 0),
297 "subsection": ([("title",)], 0, 0),
298 "subsubsection": ([("title",)], 0, 0),
299 "tableii": (["colspec", "style", "head1", "head2"], 0, 0),
300 "tableiii": (["colspec", "style", "head1", "head2", "head3"], 0, 0),
301 "tableiv": (["colspec", "style", "head1", "head2", "head3", "head4"],
302 0, 0),
303 "versionadded": (["version"], 0, 1),
304 "versionchanged": (["version"], 0, 1),
305 #
306 "ABC": ([], 0, 1),
307 "ASCII": ([], 0, 1),
308 "C": ([], 0, 1),
309 "Cpp": ([], 0, 1),
310 "EOF": ([], 0, 1),
311 "e": ([], 0, 1),
312 "ldots": ([], 0, 1),
313 "NULL": ([], 0, 1),
314 "POSIX": ([], 0, 1),
315 "UNIX": ([], 0, 1),
316 #
317 # Things that will actually be going away!
318 #
319 "fi": ([], 0, 1),
320 "ifhtml": ([], 0, 1),
321 "makeindex": ([], 0, 1),
322 "makemodindex": ([], 0, 1),
323 "maketitle": ([], 0, 1),
324 "noindent": ([], 0, 1),
325 "tableofcontents": ([], 0, 1),
326 },
327 discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle",
328 "noindent", "tableofcontents"],
329 autoclosing=["chapter", "section", "subsection", "subsubsection",
330 "paragraph", "subparagraph", ],
331 knownempties=["appendix",
332 "maketitle", "makeindex", "makemodindex",
333 "localmoduletable"])
334
335
336if __name__ == "__main__":
337 main()