blob: ae643d9f15edbe75a5081a14702189ebd7d24311 [file] [log] [blame]
Fred Drake95f4f921998-11-18 23:27:24 +00001#! /usr/bin/env python
2
3"""Generate ESIS events based on a LaTeX source document and configuration
4data.
5
6
7"""
8__version__ = '$Revision$'
9
10
11import re
12import string
13import StringIO
14import sys
15
16
17class Error(Exception):
18 pass
19
20class LaTeXFormatError(Error):
21 pass
22
23
24_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
25_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
26_begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?)({| |)")
27_comment_rx = re.compile("%([^\n]*)\n")
28_text_rx = re.compile(r"[^]%\\{}]+")
29_optional_rx = re.compile(r"[[]([^]]*)[]]")
30_parameter_rx = re.compile("[ \n]*{([^}]*)}")
31_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
32_start_group_rx = re.compile("[ \n]*{")
33_start_optional_rx = re.compile("[ \n]*[[]")
34
35
36_charmap = {}
37for c in map(chr, range(256)):
38 _charmap[c] = c
39_charmap["\n"] = r"\n"
40_charmap["\\"] = r"\\"
41del c
42
43def encode(s):
44 return string.join(map(_charmap.get, s), '')
45
46
47ESCAPED_CHARS = "$%#^ {}"
48
49
50def subconvert(line, ofp, table, discards, autoclosing, knownempty,
51 endchar=None):
52 stack = []
53 while line:
54 if line[0] == endchar and not stack:
55 return line[1:]
56 m = _comment_rx.match(line)
57 if m:
58 text = m.group(1)
59 if text:
60 ofp.write("(COMMENT\n")
61 ofp.write("-%s\n" % encode(text))
62 ofp.write(")COMMENT\n")
63 ofp.write("-\\n\n")
64 else:
65 ofp.write("-\\n\n")
66 line = line[m.end():]
67 continue
68 m = _begin_env_rx.match(line)
69 if m:
70 # re-write to use the macro handler
71 line = r"\%s%s" % (m.group(1), line[m.end():])
72 continue
73 m =_end_env_rx.match(line)
74 if m:
75 # end of environment
76 envname = m.group(1)
77 if envname == "document":
78 # special magic
79 for n in stack[1:]:
80 if n not in autoclosing:
81 raise LaTeXFormatError("open element on stack: " + `n`)
82 # should be more careful, but this is easier to code:
83 stack = []
84 ofp.write(")document\n")
85 elif envname == stack[-1]:
86 ofp.write(")%s\n" % envname)
87 del stack[-1]
88 else:
89## print "envname ==>", envname
90## print stack
91 raise LaTeXFormatError("environment close doesn't match")
92 line = line[m.end():]
93 continue
94 m = _begin_macro_rx.match(line)
95 if m:
96 # start of macro
97 macroname = m.group(1)
98 if macroname == "verbatim":
99 # really magic case!
100 pos = string.find(line, "\\end{verbatim}")
101 text = line[m.end(1):pos]
102 ofp.write("(verbatim\n")
103 ofp.write("-%s\n" % encode(text))
104 ofp.write(")verbatim\n")
105 line = line[pos + len("\\end{verbatim}"):]
106 continue
107 numbered = 1
108 if macroname[-1] == "*":
109 macroname = macroname[:-1]
110 numbered = 0
111 real_ofp = ofp
112 if macroname in autoclosing and macroname in stack:
113 while stack[-1] != macroname:
114 if stack[-1] and stack[-1] not in discards:
115 ofp.write(")%s\n-\\n\n" % stack[-1])
116 del stack[-1]
117 if macroname not in discards:
118 ofp.write("-\\n\n)%s\n-\\n\n" % macroname)
119 del stack[-1]
120 if macroname in discards:
121 ofp = StringIO.StringIO()
122 #
123 conversion = table.get(macroname, ([], 0, 0))
124 if type(conversion) is type(""):
125 # XXX convert to general entity; ESIS cheats!
126 line = "&%s;%s" % (conversion, line[m.end(1):])
127 continue
128 params, optional, empty = conversion
129 empty = empty or knownempty(macroname)
130 if empty:
131 ofp.write("e\n")
132 if not numbered:
133 ofp.write("Anumbered TOKEN no\n")
134 if params:
135 if optional and len(params) == 1:
136 line = line = line[m.end():]
137 else:
138 line = line[m.end() - 1:]
139 else:
140 line = line[m.end():]
141 #
142 # Very ugly special case to deal with \item[]. The catch is that
143 # this needs to occur outside the for loop that handles attribute
144 # parsing so we can 'continue' the outer loop.
145 #
146 if optional and type(params[0]) is type(()):
147 # the attribute name isn't used in this special case
148 stack.append(macroname)
149 ofp.write("(%s\n" % macroname)
150 m = _start_optional_rx.match(line)
151 if m:
152 line = line[m.end():]
153 line = subconvert(line, ofp, table, discards,
154 autoclosing, knownempty, endchar="]")
155 line = "}" + line
156 continue
157 # handle attribute mappings here:
158 for attrname in params:
159 if optional:
160 optional = 0
161 if type(attrname) is type(""):
162 m = _optional_rx.match(line)
163 if m:
164 line = line[m.end():]
165 ofp.write("A%s TOKEN %s\n"
166 % (attrname, encode(m.group(1))))
167 elif type(attrname) is type(()):
168 # This is a sub-element; but don't place the
169 # element we found on the stack (\section-like)
170 stack.append(macroname)
171 ofp.write("(%s\n" % macroname)
172 macroname = attrname[0]
173 m = _start_group_rx.match(line)
174 if m:
175 line = line[m.end():]
176 elif type(attrname) is type([]):
177 # A normal subelement.
178 attrname = attrname[0]
179 stack.append(macroname)
180 stack.append(attrname)
181 ofp.write("(%s\n" % macroname)
182 macroname = attrname
183 else:
184 m = _parameter_rx.match(line)
185 if not m:
186 raise LaTeXFormatError(
187 "could not extract parameter group: "
188 + `line`)
189 value = m.group(1)
190 if _token_rx.match(value):
191 dtype = "TOKEN"
192 else:
193 dtype = "CDATA"
194 ofp.write("A%s %s %s\n"
195 % (attrname, dtype, encode(value)))
196 line = line[m.end():]
197 stack.append(macroname)
198 if type(conversion) is not type(""):
199 ofp.write("(%s\n" % macroname)
200 if empty:
201 line = "}" + line
202 ofp = real_ofp
203 continue
204 if line[0] == "}":
205 # end of macro
206 macroname = stack[-1]
207 conversion = table.get(macroname)
208 if macroname \
209 and macroname not in discards \
210 and type(conversion) is not type(""):
211 # otherwise, it was just a bare group
212 ofp.write(")%s\n" % stack[-1])
213 del stack[-1]
214 line = line[1:]
215 continue
216 if line[0] == "{":
217 stack.append("")
218 line = line[1:]
219 continue
220 if line[0] == "\\" and line[1] in ESCAPED_CHARS:
221## print "*** Found", `line[1]`, "as escaped character. ***"
222 ofp.write("-%s\n" % encode(line[1]))
223 line = line[2:]
224 continue
225 if line[:2] == r"\\":
226 ofp.write("(BREAK\n)BREAK\n")
227 line = line[2:]
228 continue
229 m = _text_rx.match(line)
230 if m:
231 text = encode(m.group())
232 ofp.write("-%s\n" % text)
233 line = line[m.end():]
234 continue
235 # special case because of \item[]
236 if line[0] == "]":
237 ofp.write("-]\n")
238 line = line[1:]
239 continue
240 # avoid infinite loops
241 extra = ""
242 if len(line) > 100:
243 extra = "..."
244 raise LaTeXFormatError("could not identify markup: %s%s"
245 % (`line[:100]`, extra))
246
247
248def convert(ifp, ofp, table={}, discards=(), autoclosing=(), knownempties=()):
249 d = {}
250 for gi in knownempties:
251 d[gi] = gi
252 return subconvert(ifp.read(), ofp, table, discards, autoclosing, d.has_key)
253
254
255def main():
256 if len(sys.argv) == 2:
257 ifp = open(sys.argv[1])
258 ofp = sys.stdout
259 elif len(sys.argv) == 3:
260 ifp = open(sys.argv[1])
261 ofp = open(sys.argv[2], "w")
262 else:
263 usage()
264 sys.exit(2)
265 convert(ifp, ofp, {
266 # entries are name
267 # -> ([list of attribute names], first_is_optional, empty)
268 "cfuncdesc": (["type", "name", ("args",)], 0, 0),
269 "chapter": ([("title",)], 0, 0),
270 "chapter*": ([("title",)], 0, 0),
271 "classdesc": (["name", ("constructor-args",)], 0, 0),
272 "ctypedesc": (["name"], 0, 0),
273 "cvardesc": (["type", "name"], 0, 0),
274 "datadesc": (["name"], 0, 0),
275 "declaremodule": (["id", "type", "name"], 1, 1),
276 "deprecated": (["release"], 0, 1),
277 "documentclass": (["classname"], 0, 1),
278 "excdesc": (["name"], 0, 0),
279 "funcdesc": (["name", ("args",)], 0, 0),
280 "funcdescni": (["name", ("args",)], 0, 0),
281 "indexii": (["ie1", "ie2"], 0, 1),
282 "indexiii": (["ie1", "ie2", "ie3"], 0, 1),
283 "indexiv": (["ie1", "ie2", "ie3", "ie4"], 0, 1),
284 "input": (["source"], 0, 1),
285 "item": ([("leader",)], 1, 0),
286 "label": (["id"], 0, 1),
287 "manpage": (["name", "section"], 0, 1),
288 "memberdesc": (["class", "name"], 1, 0),
289 "methoddesc": (["class", "name", ("args",)], 1, 0),
290 "methoddescni": (["class", "name", ("args",)], 1, 0),
291 "opcodedesc": (["name", "var"], 0, 0),
292 "par": ([], 0, 1),
293 "rfc": (["number"], 0, 1),
294 "section": ([("title",)], 0, 0),
295 "seemodule": (["ref", "name"], 1, 0),
296 "tableii": (["colspec", "style", "head1", "head2"], 0, 0),
297 "tableiii": (["colspec", "style", "head1", "head2", "head3"], 0, 0),
298 "tableiv": (["colspec", "style", "head1", "head2", "head3", "head4"],
299 0, 0),
300 "versionadded": (["version"], 0, 1),
301 "versionchanged": (["version"], 0, 1),
302 #
303 "ABC": "ABC",
304 "ASCII": "ASCII",
305 "C": "C",
306 "Cpp": "Cpp",
307 "EOF": "EOF",
308 "e": "backslash",
309 "ldots": "ldots",
310 "NULL": "NULL",
311 "POSIX": "POSIX",
312 "UNIX": "Unix",
313 #
314 # Things that will actually be going away!
315 #
316 "fi": ([], 0, 1),
317 "ifhtml": ([], 0, 1),
318 "makeindex": ([], 0, 1),
319 "makemodindex": ([], 0, 1),
320 "maketitle": ([], 0, 1),
321 "noindent": ([], 0, 1),
322 "tableofcontents": ([], 0, 1),
323 },
324 discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle",
325 "noindent", "tableofcontents"],
326 autoclosing=["chapter", "section", "subsection", "subsubsection",
327 "paragraph", "subparagraph", ],
328 knownempties=["rfc", "declaremodule", "appendix",
329 "maketitle", "makeindex", "makemodindex",
330 "localmoduletable", "manpage", "input"])
331
332
333if __name__ == "__main__":
334 main()