blob: bc2856261760e193e27acc000b13b01576177cd6 [file] [log] [blame]
Fred Drake30a68c71998-11-23 16:59:39 +00001#! /usr/bin/env python
2
Fred Drake0eb7b2a1999-05-19 17:37:37 +00003"""Generate ESIS events based on a LaTeX source document and
4configuration data.
5
6The conversion is not strong enough to work with arbitrary LaTeX
7documents; it has only been designed to work with the highly stylized
8markup used in the standard Python documentation. A lot of
9information about specific markup is encoded in the control table
10passed to the convert() function; changing this table can allow this
11tool to support additional LaTeX markups.
12
13The format of the table is largely undocumented; see the commented
14headers where the table is specified in main(). There is no provision
15to load an alternate table from an external file.
Fred Drake30a68c71998-11-23 16:59:39 +000016"""
17__version__ = '$Revision$'
18
Fred Drake96e4a061999-07-29 22:22:13 +000019import copy
Fred Drake30a68c71998-11-23 16:59:39 +000020import errno
Fred Drake96e4a061999-07-29 22:22:13 +000021import getopt
22import os
Fred Drake30a68c71998-11-23 16:59:39 +000023import re
24import string
25import StringIO
26import sys
Fred Drake96e4a061999-07-29 22:22:13 +000027import UserList
Fred Drake30a68c71998-11-23 16:59:39 +000028
Fred Drakeaeea9811998-12-01 19:04:12 +000029from esistools import encode
Fred Drake54fb7fb1999-05-10 19:36:03 +000030from types import ListType, StringType, TupleType
Fred Drakeaeea9811998-12-01 19:04:12 +000031
Fred Drake96e4a061999-07-29 22:22:13 +000032try:
33 from xml.parsers.xmllib import XMLParser
34except ImportError:
35 from xmllib import XMLParser
36
Fred Drake30a68c71998-11-23 16:59:39 +000037
Fred Draked7acf021999-01-14 17:38:12 +000038DEBUG = 0
39
40
Fred Drake96e4a061999-07-29 22:22:13 +000041class LaTeXFormatError(Exception):
Fred Drake30a68c71998-11-23 16:59:39 +000042 pass
43
44
Fred Drake96e4a061999-07-29 22:22:13 +000045class LaTeXStackError(LaTeXFormatError):
46 def __init__(self, found, stack):
47 msg = "environment close for %s doesn't match;\n stack = %s" \
48 % (found, stack)
49 self.found = found
50 self.stack = stack[:]
51 LaTeXFormatError.__init__(self, msg)
52
53
Fred Drake30a68c71998-11-23 16:59:39 +000054_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
55_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
Fred Drake0eb7b2a1999-05-19 17:37:37 +000056_begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)")
Fred Drake96c00b01999-05-07 19:59:02 +000057_comment_rx = re.compile("%+ ?(.*)\n[ \t]*")
Fred Drake30a68c71998-11-23 16:59:39 +000058_text_rx = re.compile(r"[^]%\\{}]+")
59_optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
Fred Drakeaeea9811998-12-01 19:04:12 +000060# _parameter_rx is this complicated to allow {...} inside a parameter;
61# this is useful to match tabular layout specifications like {c|p{24pt}}
62_parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
Fred Drake30a68c71998-11-23 16:59:39 +000063_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
64_start_group_rx = re.compile("[ \n]*{")
65_start_optional_rx = re.compile("[ \n]*[[]")
66
67
Fred Drake42f52981998-11-30 14:45:24 +000068ESCAPED_CHARS = "$%#^ {}&~"
Fred Drake30a68c71998-11-23 16:59:39 +000069
70
Fred Drakef79acbd1999-05-07 21:12:21 +000071def dbgmsg(msg):
Fred Draked7acf021999-01-14 17:38:12 +000072 if DEBUG:
Fred Drakef79acbd1999-05-07 21:12:21 +000073 sys.stderr.write(msg + "\n")
74
75def pushing(name, point, depth):
Fred Drake96e4a061999-07-29 22:22:13 +000076 dbgmsg("pushing <%s> at %s" % (name, point))
Fred Draked7acf021999-01-14 17:38:12 +000077
78def popping(name, point, depth):
Fred Drake96e4a061999-07-29 22:22:13 +000079 dbgmsg("popping </%s> at %s" % (name, point))
Fred Draked7acf021999-01-14 17:38:12 +000080
81
Fred Drake96e4a061999-07-29 22:22:13 +000082class _Stack(UserList.UserList):
83 StringType = type('')
84
85 def append(self, entry):
86 if type(entry) is not self.StringType:
87 raise LaTeXFormatError("cannot push non-string on stack: "
88 + `entry`)
89 sys.stderr.write("%s<%s>\n" % (" "*len(self.data), entry))
90 self.data.append(entry)
91
92 def pop(self, index=-1):
93 entry = self.data[index]
94 del self.data[index]
95 sys.stderr.write("%s</%s>\n" % (" "*len(self.data), entry))
96
97 def __delitem__(self, index):
98 entry = self.data[index]
99 del self.data[index]
100 sys.stderr.write("%s</%s>\n" % (" "*len(self.data), entry))
101
102
103def new_stack():
104 if DEBUG:
105 return _Stack()
106 return []
107
108
109class BaseConversion:
110 def __init__(self, ifp, ofp, table={}, discards=(), autoclosing=()):
Fred Drake96c00b01999-05-07 19:59:02 +0000111 self.ofp_stack = [ofp]
112 self.pop_output()
113 self.table = table
114 self.discards = discards
115 self.autoclosing = autoclosing
116 self.line = string.join(map(string.rstrip, ifp.readlines()), "\n")
Fred Drake96c00b01999-05-07 19:59:02 +0000117 self.preamble = 1
Fred Drake96e4a061999-07-29 22:22:13 +0000118 self.stack = new_stack()
Fred Drake96c00b01999-05-07 19:59:02 +0000119
120 def push_output(self, ofp):
121 self.ofp_stack.append(self.ofp)
122 self.ofp = ofp
123 self.write = ofp.write
124
125 def pop_output(self):
126 self.ofp = self.ofp_stack.pop()
127 self.write = self.ofp.write
128
Fred Drake96e4a061999-07-29 22:22:13 +0000129 def err_write(self, msg):
130 if DEBUG:
131 sys.stderr.write(str(msg) + "\n")
132
133 def convert(self):
134 self.subconvert()
135
136
137class Conversion(BaseConversion):
Fred Drake96e4a061999-07-29 22:22:13 +0000138 def __init__(self, ifp, ofp, table={}):
139 BaseConversion.__init__(self, ifp, ofp, table)
140 self.discards = []
141
142 def subconvert(self, endchar=None, depth=0):
143 #
144 # Parses content, including sub-structures, until the character
145 # 'endchar' is found (with no open structures), or until the end
146 # of the input data is endchar is None.
147 #
148 stack = new_stack()
149 line = self.line
150 while line:
151 if line[0] == endchar and not stack:
152 self.line = line
153 return line
154 m = _comment_rx.match(line)
155 if m:
156 text = m.group(1)
157 if text:
158 self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
159 % encode(text))
160 line = line[m.end():]
161 continue
162 m = _begin_env_rx.match(line)
163 if m:
164 name = m.group(1)
165 entry = self.get_env_entry(name)
166 # re-write to use the macro handler
167 line = r"\%s %s" % (name, line[m.end():])
168 continue
169 m = _end_env_rx.match(line)
170 if m:
171 # end of environment
172 envname = m.group(1)
173 entry = self.get_entry(envname)
174 while stack and envname != stack[-1] \
175 and stack[-1] in entry.endcloses:
176 self.write(")%s\n" % stack.pop())
177 if stack and envname == stack[-1]:
178 self.write(")%s\n" % entry.outputname)
179 del stack[-1]
180 else:
181 raise LaTeXStackError(envname, stack)
182 line = line[m.end():]
183 continue
184 m = _begin_macro_rx.match(line)
185 if m:
186 # start of macro
187 macroname = m.group(1)
188 entry = self.get_entry(macroname)
189 if entry.verbatim:
190 # magic case!
191 pos = string.find(line, "\\end{%s}" % macroname)
192 text = line[m.end(1):pos]
193 stack.append(entry.name)
194 self.write("(%s\n" % entry.outputname)
195 self.write("-%s\n" % encode(text))
196 self.write(")%s\n" % entry.outputname)
197 stack.pop()
198 line = line[pos + len("\\end{%s}" % macroname):]
199 continue
200 while stack and stack[-1] in entry.closes:
201 top = stack.pop()
202 topentry = self.get_entry(top)
203 if topentry.outputname:
204 self.write(")%s\n-\\n\n" % topentry.outputname)
205 #
206 if entry.outputname:
207 if entry.empty:
208 self.write("e\n")
209 self.push_output(self.ofp)
210 else:
211 self.push_output(StringIO.StringIO())
212 #
213 params, optional, empty, environ = self.start_macro(macroname)
214 # rip off the macroname
215 if params:
216 line = line[m.end(1):]
217 elif empty:
218 line = line[m.end(1):]
219 else:
220 line = line[m.end():]
221 opened = 0
222 implied_content = 0
223
224 # handle attribute mappings here:
225 for pentry in params:
226 if pentry.type == "attribute":
227 if pentry.optional:
228 m = _optional_rx.match(line)
229 if m:
230 line = line[m.end():]
231 self.dump_attr(pentry, m.group(1))
232 elif pentry.text:
233 # value supplied by conversion spec:
234 self.dump_attr(pentry, pentry.text)
235 else:
236 m = _parameter_rx.match(line)
237 if not m:
238 raise LaTeXFormatError(
239 "could not extract parameter %s for %s: %s"
240 % (pentry.name, macroname, `line[:100]`))
241 self.dump_attr(pentry, m.group(1))
242## if entry.name == "label":
243## sys.stderr.write("[%s]" % m.group(1))
244 line = line[m.end():]
245 elif pentry.type == "child":
246 if pentry.optional:
247 m = _optional_rx.match(line)
248 if m:
249 line = line[m.end():]
250 if entry.outputname and not opened:
251 opened = 1
252 self.write("(%s\n" % entry.outputname)
253 stack.append(macroname)
254 stack.append(pentry.name)
255 self.write("(%s\n" % pentry.name)
256 self.write("-%s\n" % encode(m.group(1)))
257 self.write(")%s\n" % pentry.name)
258 stack.pop()
259 else:
260 if entry.outputname and not opened:
261 opened = 1
262 self.write("(%s\n" % entry.outputname)
263 stack.append(entry.name)
264 self.write("(%s\n" % pentry.name)
265 stack.append(pentry.name)
266 self.line = skip_white(line)[1:]
267 line = self.subconvert(
268 "}", len(stack) + depth + 1)[1:]
269 self.write(")%s\n" % stack.pop())
270 elif pentry.type == "content":
271 if pentry.implied:
272 implied_content = 1
273 else:
274 if entry.outputname and not opened:
275 opened = 1
276 self.write("(%s\n" % entry.outputname)
277 stack.append(entry.name)
278 line = skip_white(line)
279 if line[0] != "{":
280 raise LaTeXFormatError(
281 "missing content for " + macroname)
282 self.line = line[1:]
283 line = self.subconvert("}", len(stack) + depth + 1)
284 if line and line[0] == "}":
285 line = line[1:]
286 elif pentry.type == "text":
287 if pentry.text:
288 if entry.outputname and not opened:
289 opened = 1
290 stack.append(entry.name)
291 self.write("(%s\n" % entry.outputname)
292 self.write("-%s\n" % encode(pentry.text))
293 if entry.outputname:
294 if not opened:
295 self.write("(%s\n" % entry.outputname)
296 stack.append(entry.name)
297 if not implied_content:
298 self.write(")%s\n" % entry.outputname)
299 stack.pop()
300 self.pop_output()
301 continue
302 if line[0] == endchar and not stack:
303 self.line = line[1:]
304 return self.line
305 if line[0] == "}":
306 # end of macro or group
307 macroname = stack[-1]
308 if macroname:
309 conversion = self.table.get(macroname)
310 if conversion.outputname:
311 # otherwise, it was just a bare group
312 self.write(")%s\n" % conversion.outputname)
313 del stack[-1]
314 line = line[1:]
315 continue
316 if line[0] == "{":
317 stack.append("")
318 line = line[1:]
319 continue
320 if line[0] == "\\" and line[1] in ESCAPED_CHARS:
321 self.write("-%s\n" % encode(line[1]))
322 line = line[2:]
323 continue
324 if line[:2] == r"\\":
325 self.write("(BREAK\n)BREAK\n")
326 line = line[2:]
327 continue
328 m = _text_rx.match(line)
329 if m:
330 text = encode(m.group())
331 self.write("-%s\n" % text)
332 line = line[m.end():]
333 continue
334 # special case because of \item[]
335 # XXX can we axe this???
336 if line[0] == "]":
337 self.write("-]\n")
338 line = line[1:]
339 continue
340 # avoid infinite loops
341 extra = ""
342 if len(line) > 100:
343 extra = "..."
344 raise LaTeXFormatError("could not identify markup: %s%s"
345 % (`line[:100]`, extra))
346 while stack:
347 entry = self.get_entry(stack[-1])
348 if entry.closes:
349 self.write(")%s\n-%s\n" % (entry.outputname, encode("\n")))
350 del stack[-1]
351 else:
352 break
353 if stack:
354 raise LaTeXFormatError("elements remain on stack: "
355 + string.join(stack, ", "))
356 # otherwise we just ran out of input here...
357
358 def start_macro(self, name):
359 conversion = self.get_entry(name)
360 parameters = conversion.parameters
361 optional = parameters and parameters[0].optional
362## empty = not len(parameters)
363## if empty:
364## self.write("e\n")
365## elif conversion.empty:
366## empty = 1
367 return parameters, optional, conversion.empty, conversion.environment
368
369 def get_entry(self, name):
370 entry = self.table.get(name)
371 if entry is None:
372 self.err_write("get_entry(%s) failing; building default entry!"
373 % `name`)
374 # not defined; build a default entry:
375 entry = TableEntry(name)
376 entry.has_content = 1
377 entry.parameters.append(Parameter("content"))
378 self.table[name] = entry
379 return entry
380
381 def get_env_entry(self, name):
382 entry = self.table.get(name)
383 if entry is None:
384 # not defined; build a default entry:
385 entry = TableEntry(name, 1)
386 entry.has_content = 1
387 entry.parameters.append(Parameter("content"))
388 entry.parameters[-1].implied = 1
389 self.table[name] = entry
390 elif not entry.environment:
391 raise LaTeXFormatError(
392 name + " is defined as a macro; expected environment")
393 return entry
394
395 def dump_attr(self, pentry, value):
396 if not (pentry.name and value):
397 return
398 if _token_rx.match(value):
399 dtype = "TOKEN"
400 else:
401 dtype = "CDATA"
402 self.write("A%s %s %s\n" % (pentry.name, dtype, encode(value)))
403
404
Fred Drakeeac8abe1999-07-29 22:42:27 +0000405def convert(ifp, ofp, table):
406 c = Conversion(ifp, ofp, table)
Fred Drake96e4a061999-07-29 22:22:13 +0000407 try:
408 c.convert()
409 except IOError, (err, msg):
410 if err != errno.EPIPE:
411 raise
412
413
Fred Draked7acf021999-01-14 17:38:12 +0000414def skip_white(line):
Fred Drake96e4a061999-07-29 22:22:13 +0000415 while line and line[0] in " %\n\t\r":
Fred Draked7acf021999-01-14 17:38:12 +0000416 line = string.lstrip(line[1:])
417 return line
418
419
Fred Drake96e4a061999-07-29 22:22:13 +0000420
421class TableEntry:
422 def __init__(self, name, environment=0):
423 self.name = name
424 self.outputname = name
425 self.environment = environment
426 self.empty = not environment
427 self.has_content = 0
428 self.verbatim = 0
429 self.auto_close = 0
430 self.parameters = []
431 self.closes = []
432 self.endcloses = []
433
434class Parameter:
435 def __init__(self, type, name=None, optional=0):
436 self.type = type
437 self.name = name
438 self.optional = optional
439 self.text = ''
440 self.implied = 0
441
442
443class TableParser(XMLParser):
444 def __init__(self):
445 self.__table = {}
446 self.__current = None
447 self.__buffer = ''
448 XMLParser.__init__(self)
449
450 def get_table(self):
451 for entry in self.__table.values():
452 if entry.environment and not entry.has_content:
453 p = Parameter("content")
454 p.implied = 1
455 entry.parameters.append(p)
456 entry.has_content = 1
457 return self.__table
458
459 def start_environment(self, attrs):
460 name = attrs["name"]
461 self.__current = TableEntry(name, environment=1)
462 self.__current.verbatim = attrs.get("verbatim") == "yes"
463 if attrs.has_key("outputname"):
464 self.__current.outputname = attrs.get("outputname")
465 self.__current.endcloses = string.split(attrs.get("endcloses", ""))
466 def end_environment(self):
467 self.end_macro()
468
469 def start_macro(self, attrs):
470 name = attrs["name"]
471 self.__current = TableEntry(name)
472 self.__current.closes = string.split(attrs.get("closes", ""))
473 if attrs.has_key("outputname"):
474 self.__current.outputname = attrs.get("outputname")
475 def end_macro(self):
476## if self.__current.parameters and not self.__current.outputname:
477## raise ValueError, "markup with parameters must have an output name"
478 self.__table[self.__current.name] = self.__current
479 self.__current = None
480
481 def start_attribute(self, attrs):
482 name = attrs.get("name")
483 optional = attrs.get("optional") == "yes"
484 if name:
485 p = Parameter("attribute", name, optional=optional)
486 else:
487 p = Parameter("attribute", optional=optional)
488 self.__current.parameters.append(p)
489 self.__buffer = ''
490 def end_attribute(self):
491 self.__current.parameters[-1].text = self.__buffer
492
493 def start_child(self, attrs):
494 name = attrs["name"]
495 p = Parameter("child", name, attrs.get("optional") == "yes")
496 self.__current.parameters.append(p)
497 self.__current.empty = 0
498
499 def start_content(self, attrs):
500 p = Parameter("content")
501 p.implied = attrs.get("implied") == "yes"
502 if self.__current.environment:
503 p.implied = 1
504 self.__current.parameters.append(p)
505 self.__current.has_content = 1
506 self.__current.empty = 0
507
508 def start_text(self, attrs):
509 self.__buffer = ''
510 def end_text(self):
511 p = Parameter("text")
512 p.text = self.__buffer
513 self.__current.parameters.append(p)
514
515 def handle_data(self, data):
516 self.__buffer = self.__buffer + data
517
518
519def load_table(fp):
520 parser = TableParser()
521 parser.feed(fp.read())
522 parser.close()
523 return parser.get_table()
524
525
Fred Drake30a68c71998-11-23 16:59:39 +0000526def main():
Fred Drake96e4a061999-07-29 22:22:13 +0000527 global DEBUG
528 #
Fred Drakeeac8abe1999-07-29 22:42:27 +0000529 opts, args = getopt.getopt(sys.argv[1:], "D", ["debug"])
Fred Drake96e4a061999-07-29 22:22:13 +0000530 for opt, arg in opts:
Fred Drakeeac8abe1999-07-29 22:42:27 +0000531 if opt in ("-D", "--debug"):
Fred Drake96e4a061999-07-29 22:22:13 +0000532 DEBUG = DEBUG + 1
533 if len(args) == 0:
534 ifp = sys.stdin
Fred Drake30a68c71998-11-23 16:59:39 +0000535 ofp = sys.stdout
Fred Drake96e4a061999-07-29 22:22:13 +0000536 elif len(args) == 1:
537 ifp = open(args)
538 ofp = sys.stdout
539 elif len(args) == 2:
540 ifp = open(args[0])
541 ofp = open(args[1], "w")
Fred Drake30a68c71998-11-23 16:59:39 +0000542 else:
543 usage()
544 sys.exit(2)
Fred Drakeeac8abe1999-07-29 22:42:27 +0000545
546 table = load_table(open(os.path.join(sys.path[0], 'conversion.xml')))
547 convert(ifp, ofp, table)
Fred Drake30a68c71998-11-23 16:59:39 +0000548
549
550if __name__ == "__main__":
551 main()