blob: 325b0b138fea2eb6f9ddbdb54dbcd99bc4587830 [file] [log] [blame]
Fred Drake30a68c71998-11-23 16:59:39 +00001#! /usr/bin/env python
2
Fred Drake0eb7b2a1999-05-19 17:37:37 +00003"""Generate ESIS events based on a LaTeX source document and
4configuration data.
5
6The conversion is not strong enough to work with arbitrary LaTeX
7documents; it has only been designed to work with the highly stylized
8markup used in the standard Python documentation. A lot of
9information about specific markup is encoded in the control table
10passed to the convert() function; changing this table can allow this
11tool to support additional LaTeX markups.
12
13The format of the table is largely undocumented; see the commented
14headers where the table is specified in main(). There is no provision
15to load an alternate table from an external file.
Fred Drake30a68c71998-11-23 16:59:39 +000016"""
17__version__ = '$Revision$'
18
Fred Drake96e4a061999-07-29 22:22:13 +000019import copy
Fred Drake30a68c71998-11-23 16:59:39 +000020import errno
Fred Drake96e4a061999-07-29 22:22:13 +000021import getopt
22import os
Fred Drake30a68c71998-11-23 16:59:39 +000023import re
24import string
25import StringIO
26import sys
Fred Drake96e4a061999-07-29 22:22:13 +000027import UserList
Fred Drake30a68c71998-11-23 16:59:39 +000028
Fred Drakeaeea9811998-12-01 19:04:12 +000029from esistools import encode
Fred Drake54fb7fb1999-05-10 19:36:03 +000030from types import ListType, StringType, TupleType
Fred Drakeaeea9811998-12-01 19:04:12 +000031
Fred Drake96e4a061999-07-29 22:22:13 +000032try:
33 from xml.parsers.xmllib import XMLParser
34except ImportError:
35 from xmllib import XMLParser
36
Fred Drake30a68c71998-11-23 16:59:39 +000037
Fred Draked7acf021999-01-14 17:38:12 +000038DEBUG = 0
39
40
Fred Drake96e4a061999-07-29 22:22:13 +000041class LaTeXFormatError(Exception):
Fred Drake30a68c71998-11-23 16:59:39 +000042 pass
43
44
Fred Drake96e4a061999-07-29 22:22:13 +000045class LaTeXStackError(LaTeXFormatError):
46 def __init__(self, found, stack):
47 msg = "environment close for %s doesn't match;\n stack = %s" \
48 % (found, stack)
49 self.found = found
50 self.stack = stack[:]
51 LaTeXFormatError.__init__(self, msg)
52
53
Fred Drake30a68c71998-11-23 16:59:39 +000054_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
55_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
Fred Drake0eb7b2a1999-05-19 17:37:37 +000056_begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)")
Fred Drake96c00b01999-05-07 19:59:02 +000057_comment_rx = re.compile("%+ ?(.*)\n[ \t]*")
Fred Drake30a68c71998-11-23 16:59:39 +000058_text_rx = re.compile(r"[^]%\\{}]+")
59_optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
Fred Drakeaeea9811998-12-01 19:04:12 +000060# _parameter_rx is this complicated to allow {...} inside a parameter;
61# this is useful to match tabular layout specifications like {c|p{24pt}}
62_parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
Fred Drake30a68c71998-11-23 16:59:39 +000063_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
64_start_group_rx = re.compile("[ \n]*{")
65_start_optional_rx = re.compile("[ \n]*[[]")
66
67
Fred Drake42f52981998-11-30 14:45:24 +000068ESCAPED_CHARS = "$%#^ {}&~"
Fred Drake30a68c71998-11-23 16:59:39 +000069
70
Fred Drakef79acbd1999-05-07 21:12:21 +000071def dbgmsg(msg):
Fred Draked7acf021999-01-14 17:38:12 +000072 if DEBUG:
Fred Drakef79acbd1999-05-07 21:12:21 +000073 sys.stderr.write(msg + "\n")
74
75def pushing(name, point, depth):
Fred Drake96e4a061999-07-29 22:22:13 +000076 dbgmsg("pushing <%s> at %s" % (name, point))
Fred Draked7acf021999-01-14 17:38:12 +000077
78def popping(name, point, depth):
Fred Drake96e4a061999-07-29 22:22:13 +000079 dbgmsg("popping </%s> at %s" % (name, point))
Fred Draked7acf021999-01-14 17:38:12 +000080
81
Fred Drake96e4a061999-07-29 22:22:13 +000082class _Stack(UserList.UserList):
Fred Drake96e4a061999-07-29 22:22:13 +000083 def append(self, entry):
Fred Drake4fbdf971999-08-02 14:35:25 +000084 if type(entry) is not StringType:
Fred Drake96e4a061999-07-29 22:22:13 +000085 raise LaTeXFormatError("cannot push non-string on stack: "
86 + `entry`)
87 sys.stderr.write("%s<%s>\n" % (" "*len(self.data), entry))
88 self.data.append(entry)
89
90 def pop(self, index=-1):
91 entry = self.data[index]
92 del self.data[index]
93 sys.stderr.write("%s</%s>\n" % (" "*len(self.data), entry))
94
95 def __delitem__(self, index):
96 entry = self.data[index]
97 del self.data[index]
98 sys.stderr.write("%s</%s>\n" % (" "*len(self.data), entry))
99
100
101def new_stack():
102 if DEBUG:
103 return _Stack()
104 return []
105
106
Fred Drake4fbdf971999-08-02 14:35:25 +0000107class Conversion:
108 def __init__(self, ifp, ofp, table):
109 self.write = ofp.write
110 self.ofp = ofp
Fred Drake96c00b01999-05-07 19:59:02 +0000111 self.table = table
Fred Drake96c00b01999-05-07 19:59:02 +0000112 self.line = string.join(map(string.rstrip, ifp.readlines()), "\n")
Fred Drake96c00b01999-05-07 19:59:02 +0000113 self.preamble = 1
Fred Drake96c00b01999-05-07 19:59:02 +0000114
Fred Drake96e4a061999-07-29 22:22:13 +0000115 def err_write(self, msg):
116 if DEBUG:
117 sys.stderr.write(str(msg) + "\n")
118
119 def convert(self):
120 self.subconvert()
121
Fred Drake96e4a061999-07-29 22:22:13 +0000122 def subconvert(self, endchar=None, depth=0):
123 #
124 # Parses content, including sub-structures, until the character
125 # 'endchar' is found (with no open structures), or until the end
126 # of the input data is endchar is None.
127 #
128 stack = new_stack()
129 line = self.line
130 while line:
131 if line[0] == endchar and not stack:
132 self.line = line
133 return line
134 m = _comment_rx.match(line)
135 if m:
136 text = m.group(1)
137 if text:
138 self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
139 % encode(text))
140 line = line[m.end():]
141 continue
142 m = _begin_env_rx.match(line)
143 if m:
144 name = m.group(1)
145 entry = self.get_env_entry(name)
146 # re-write to use the macro handler
147 line = r"\%s %s" % (name, line[m.end():])
148 continue
149 m = _end_env_rx.match(line)
150 if m:
151 # end of environment
152 envname = m.group(1)
153 entry = self.get_entry(envname)
154 while stack and envname != stack[-1] \
155 and stack[-1] in entry.endcloses:
156 self.write(")%s\n" % stack.pop())
157 if stack and envname == stack[-1]:
158 self.write(")%s\n" % entry.outputname)
159 del stack[-1]
160 else:
161 raise LaTeXStackError(envname, stack)
162 line = line[m.end():]
163 continue
164 m = _begin_macro_rx.match(line)
165 if m:
166 # start of macro
167 macroname = m.group(1)
168 entry = self.get_entry(macroname)
169 if entry.verbatim:
170 # magic case!
171 pos = string.find(line, "\\end{%s}" % macroname)
172 text = line[m.end(1):pos]
173 stack.append(entry.name)
174 self.write("(%s\n" % entry.outputname)
175 self.write("-%s\n" % encode(text))
176 self.write(")%s\n" % entry.outputname)
177 stack.pop()
178 line = line[pos + len("\\end{%s}" % macroname):]
179 continue
180 while stack and stack[-1] in entry.closes:
181 top = stack.pop()
182 topentry = self.get_entry(top)
183 if topentry.outputname:
184 self.write(")%s\n-\\n\n" % topentry.outputname)
185 #
186 if entry.outputname:
187 if entry.empty:
188 self.write("e\n")
Fred Drake96e4a061999-07-29 22:22:13 +0000189 #
190 params, optional, empty, environ = self.start_macro(macroname)
191 # rip off the macroname
192 if params:
193 line = line[m.end(1):]
194 elif empty:
195 line = line[m.end(1):]
196 else:
197 line = line[m.end():]
198 opened = 0
199 implied_content = 0
200
201 # handle attribute mappings here:
202 for pentry in params:
203 if pentry.type == "attribute":
204 if pentry.optional:
205 m = _optional_rx.match(line)
Fred Drake4fbdf971999-08-02 14:35:25 +0000206 if m and entry.outputname:
Fred Drake96e4a061999-07-29 22:22:13 +0000207 line = line[m.end():]
208 self.dump_attr(pentry, m.group(1))
Fred Drake4fbdf971999-08-02 14:35:25 +0000209 elif pentry.text and entry.outputname:
Fred Drake96e4a061999-07-29 22:22:13 +0000210 # value supplied by conversion spec:
211 self.dump_attr(pentry, pentry.text)
212 else:
213 m = _parameter_rx.match(line)
214 if not m:
215 raise LaTeXFormatError(
216 "could not extract parameter %s for %s: %s"
217 % (pentry.name, macroname, `line[:100]`))
Fred Drake4fbdf971999-08-02 14:35:25 +0000218 if entry.outputname:
219 self.dump_attr(pentry, m.group(1))
Fred Drake96e4a061999-07-29 22:22:13 +0000220 line = line[m.end():]
221 elif pentry.type == "child":
222 if pentry.optional:
223 m = _optional_rx.match(line)
224 if m:
225 line = line[m.end():]
226 if entry.outputname and not opened:
227 opened = 1
228 self.write("(%s\n" % entry.outputname)
229 stack.append(macroname)
230 stack.append(pentry.name)
231 self.write("(%s\n" % pentry.name)
232 self.write("-%s\n" % encode(m.group(1)))
233 self.write(")%s\n" % pentry.name)
234 stack.pop()
235 else:
236 if entry.outputname and not opened:
237 opened = 1
238 self.write("(%s\n" % entry.outputname)
239 stack.append(entry.name)
240 self.write("(%s\n" % pentry.name)
241 stack.append(pentry.name)
242 self.line = skip_white(line)[1:]
243 line = self.subconvert(
244 "}", len(stack) + depth + 1)[1:]
245 self.write(")%s\n" % stack.pop())
246 elif pentry.type == "content":
247 if pentry.implied:
248 implied_content = 1
249 else:
250 if entry.outputname and not opened:
251 opened = 1
252 self.write("(%s\n" % entry.outputname)
253 stack.append(entry.name)
254 line = skip_white(line)
255 if line[0] != "{":
256 raise LaTeXFormatError(
257 "missing content for " + macroname)
258 self.line = line[1:]
259 line = self.subconvert("}", len(stack) + depth + 1)
260 if line and line[0] == "}":
261 line = line[1:]
Fred Drake4fbdf971999-08-02 14:35:25 +0000262 elif pentry.type == "text" and pentry.text:
263 if entry.outputname and not opened:
264 opened = 1
265 stack.append(entry.name)
266 self.write("(%s\n" % entry.outputname)
267 self.err_write("--- text: %s\n" % `pentry.text`)
268 self.write("-%s\n" % encode(pentry.text))
Fred Drakef6199ed1999-08-26 17:54:16 +0000269 elif pentry.type == "entityref":
270 self.write("&%s\n" % pentry.name)
Fred Drake96e4a061999-07-29 22:22:13 +0000271 if entry.outputname:
272 if not opened:
273 self.write("(%s\n" % entry.outputname)
274 stack.append(entry.name)
275 if not implied_content:
276 self.write(")%s\n" % entry.outputname)
277 stack.pop()
Fred Drake96e4a061999-07-29 22:22:13 +0000278 continue
279 if line[0] == endchar and not stack:
280 self.line = line[1:]
281 return self.line
282 if line[0] == "}":
283 # end of macro or group
284 macroname = stack[-1]
285 if macroname:
286 conversion = self.table.get(macroname)
287 if conversion.outputname:
288 # otherwise, it was just a bare group
289 self.write(")%s\n" % conversion.outputname)
290 del stack[-1]
291 line = line[1:]
292 continue
293 if line[0] == "{":
294 stack.append("")
295 line = line[1:]
296 continue
297 if line[0] == "\\" and line[1] in ESCAPED_CHARS:
298 self.write("-%s\n" % encode(line[1]))
299 line = line[2:]
300 continue
301 if line[:2] == r"\\":
302 self.write("(BREAK\n)BREAK\n")
303 line = line[2:]
304 continue
305 m = _text_rx.match(line)
306 if m:
307 text = encode(m.group())
308 self.write("-%s\n" % text)
309 line = line[m.end():]
310 continue
311 # special case because of \item[]
312 # XXX can we axe this???
313 if line[0] == "]":
314 self.write("-]\n")
315 line = line[1:]
316 continue
317 # avoid infinite loops
318 extra = ""
319 if len(line) > 100:
320 extra = "..."
321 raise LaTeXFormatError("could not identify markup: %s%s"
322 % (`line[:100]`, extra))
323 while stack:
324 entry = self.get_entry(stack[-1])
325 if entry.closes:
326 self.write(")%s\n-%s\n" % (entry.outputname, encode("\n")))
327 del stack[-1]
328 else:
329 break
330 if stack:
331 raise LaTeXFormatError("elements remain on stack: "
332 + string.join(stack, ", "))
333 # otherwise we just ran out of input here...
334
335 def start_macro(self, name):
336 conversion = self.get_entry(name)
337 parameters = conversion.parameters
338 optional = parameters and parameters[0].optional
Fred Drake96e4a061999-07-29 22:22:13 +0000339 return parameters, optional, conversion.empty, conversion.environment
340
341 def get_entry(self, name):
342 entry = self.table.get(name)
343 if entry is None:
344 self.err_write("get_entry(%s) failing; building default entry!"
345 % `name`)
346 # not defined; build a default entry:
347 entry = TableEntry(name)
348 entry.has_content = 1
349 entry.parameters.append(Parameter("content"))
350 self.table[name] = entry
351 return entry
352
353 def get_env_entry(self, name):
354 entry = self.table.get(name)
355 if entry is None:
356 # not defined; build a default entry:
357 entry = TableEntry(name, 1)
358 entry.has_content = 1
359 entry.parameters.append(Parameter("content"))
360 entry.parameters[-1].implied = 1
361 self.table[name] = entry
362 elif not entry.environment:
363 raise LaTeXFormatError(
364 name + " is defined as a macro; expected environment")
365 return entry
366
367 def dump_attr(self, pentry, value):
368 if not (pentry.name and value):
369 return
370 if _token_rx.match(value):
371 dtype = "TOKEN"
372 else:
373 dtype = "CDATA"
374 self.write("A%s %s %s\n" % (pentry.name, dtype, encode(value)))
375
376
Fred Drakeeac8abe1999-07-29 22:42:27 +0000377def convert(ifp, ofp, table):
378 c = Conversion(ifp, ofp, table)
Fred Drake96e4a061999-07-29 22:22:13 +0000379 try:
380 c.convert()
381 except IOError, (err, msg):
382 if err != errno.EPIPE:
383 raise
384
385
Fred Draked7acf021999-01-14 17:38:12 +0000386def skip_white(line):
Fred Drake96e4a061999-07-29 22:22:13 +0000387 while line and line[0] in " %\n\t\r":
Fred Draked7acf021999-01-14 17:38:12 +0000388 line = string.lstrip(line[1:])
389 return line
390
391
Fred Drake96e4a061999-07-29 22:22:13 +0000392
393class TableEntry:
394 def __init__(self, name, environment=0):
395 self.name = name
396 self.outputname = name
397 self.environment = environment
398 self.empty = not environment
399 self.has_content = 0
400 self.verbatim = 0
401 self.auto_close = 0
402 self.parameters = []
403 self.closes = []
404 self.endcloses = []
405
406class Parameter:
407 def __init__(self, type, name=None, optional=0):
408 self.type = type
409 self.name = name
410 self.optional = optional
411 self.text = ''
412 self.implied = 0
413
414
415class TableParser(XMLParser):
Fred Drake4fbdf971999-08-02 14:35:25 +0000416 def __init__(self, table=None):
417 if table is None:
418 table = {}
419 self.__table = table
Fred Drake96e4a061999-07-29 22:22:13 +0000420 self.__current = None
421 self.__buffer = ''
422 XMLParser.__init__(self)
423
424 def get_table(self):
425 for entry in self.__table.values():
426 if entry.environment and not entry.has_content:
427 p = Parameter("content")
428 p.implied = 1
429 entry.parameters.append(p)
430 entry.has_content = 1
431 return self.__table
432
433 def start_environment(self, attrs):
434 name = attrs["name"]
435 self.__current = TableEntry(name, environment=1)
436 self.__current.verbatim = attrs.get("verbatim") == "yes"
437 if attrs.has_key("outputname"):
438 self.__current.outputname = attrs.get("outputname")
439 self.__current.endcloses = string.split(attrs.get("endcloses", ""))
440 def end_environment(self):
441 self.end_macro()
442
443 def start_macro(self, attrs):
444 name = attrs["name"]
445 self.__current = TableEntry(name)
446 self.__current.closes = string.split(attrs.get("closes", ""))
447 if attrs.has_key("outputname"):
448 self.__current.outputname = attrs.get("outputname")
449 def end_macro(self):
Fred Drake96e4a061999-07-29 22:22:13 +0000450 self.__table[self.__current.name] = self.__current
451 self.__current = None
452
453 def start_attribute(self, attrs):
454 name = attrs.get("name")
455 optional = attrs.get("optional") == "yes"
456 if name:
457 p = Parameter("attribute", name, optional=optional)
458 else:
459 p = Parameter("attribute", optional=optional)
460 self.__current.parameters.append(p)
461 self.__buffer = ''
462 def end_attribute(self):
463 self.__current.parameters[-1].text = self.__buffer
464
Fred Drakef6199ed1999-08-26 17:54:16 +0000465 def start_entityref(self, attrs):
466 name = attrs["name"]
467 p = Parameter("entityref", name)
468 self.__current.parameters.append(p)
469
Fred Drake96e4a061999-07-29 22:22:13 +0000470 def start_child(self, attrs):
471 name = attrs["name"]
472 p = Parameter("child", name, attrs.get("optional") == "yes")
473 self.__current.parameters.append(p)
474 self.__current.empty = 0
475
476 def start_content(self, attrs):
477 p = Parameter("content")
478 p.implied = attrs.get("implied") == "yes"
479 if self.__current.environment:
480 p.implied = 1
481 self.__current.parameters.append(p)
482 self.__current.has_content = 1
483 self.__current.empty = 0
484
485 def start_text(self, attrs):
Fred Drake4fbdf971999-08-02 14:35:25 +0000486 self.__current.empty = 0
Fred Drake96e4a061999-07-29 22:22:13 +0000487 self.__buffer = ''
488 def end_text(self):
489 p = Parameter("text")
490 p.text = self.__buffer
491 self.__current.parameters.append(p)
492
493 def handle_data(self, data):
494 self.__buffer = self.__buffer + data
495
496
Fred Drake4fbdf971999-08-02 14:35:25 +0000497def load_table(fp, table=None):
498 parser = TableParser(table=table)
Fred Drake96e4a061999-07-29 22:22:13 +0000499 parser.feed(fp.read())
500 parser.close()
501 return parser.get_table()
502
503
Fred Drake30a68c71998-11-23 16:59:39 +0000504def main():
Fred Drake96e4a061999-07-29 22:22:13 +0000505 global DEBUG
506 #
Fred Drakeeac8abe1999-07-29 22:42:27 +0000507 opts, args = getopt.getopt(sys.argv[1:], "D", ["debug"])
Fred Drake96e4a061999-07-29 22:22:13 +0000508 for opt, arg in opts:
Fred Drakeeac8abe1999-07-29 22:42:27 +0000509 if opt in ("-D", "--debug"):
Fred Drake96e4a061999-07-29 22:22:13 +0000510 DEBUG = DEBUG + 1
511 if len(args) == 0:
512 ifp = sys.stdin
Fred Drake30a68c71998-11-23 16:59:39 +0000513 ofp = sys.stdout
Fred Drake96e4a061999-07-29 22:22:13 +0000514 elif len(args) == 1:
515 ifp = open(args)
516 ofp = sys.stdout
517 elif len(args) == 2:
518 ifp = open(args[0])
519 ofp = open(args[1], "w")
Fred Drake30a68c71998-11-23 16:59:39 +0000520 else:
521 usage()
522 sys.exit(2)
Fred Drakeeac8abe1999-07-29 22:42:27 +0000523
524 table = load_table(open(os.path.join(sys.path[0], 'conversion.xml')))
525 convert(ifp, ofp, table)
Fred Drake30a68c71998-11-23 16:59:39 +0000526
527
528if __name__ == "__main__":
529 main()