blob: 051c374a2bbc2aca54f3713defa34bb2ec79cecc [file] [log] [blame]
Fred Drake30a68c71998-11-23 16:59:39 +00001#! /usr/bin/env python
2
Fred Drake0eb7b2a1999-05-19 17:37:37 +00003"""Generate ESIS events based on a LaTeX source document and
4configuration data.
5
6The conversion is not strong enough to work with arbitrary LaTeX
7documents; it has only been designed to work with the highly stylized
8markup used in the standard Python documentation. A lot of
9information about specific markup is encoded in the control table
10passed to the convert() function; changing this table can allow this
11tool to support additional LaTeX markups.
12
13The format of the table is largely undocumented; see the commented
14headers where the table is specified in main(). There is no provision
15to load an alternate table from an external file.
Fred Drake30a68c71998-11-23 16:59:39 +000016"""
17__version__ = '$Revision$'
18
Fred Drake96e4a061999-07-29 22:22:13 +000019import copy
Fred Drake30a68c71998-11-23 16:59:39 +000020import errno
Fred Drake96e4a061999-07-29 22:22:13 +000021import getopt
22import os
Fred Drake30a68c71998-11-23 16:59:39 +000023import re
24import string
25import StringIO
26import sys
Fred Drake96e4a061999-07-29 22:22:13 +000027import UserList
Fred Drake30a68c71998-11-23 16:59:39 +000028
Fred Drakeaeea9811998-12-01 19:04:12 +000029from esistools import encode
Fred Drake54fb7fb1999-05-10 19:36:03 +000030from types import ListType, StringType, TupleType
Fred Drakeaeea9811998-12-01 19:04:12 +000031
Fred Drake96e4a061999-07-29 22:22:13 +000032try:
33 from xml.parsers.xmllib import XMLParser
34except ImportError:
35 from xmllib import XMLParser
36
Fred Drake30a68c71998-11-23 16:59:39 +000037
Fred Draked7acf021999-01-14 17:38:12 +000038DEBUG = 0
39
40
Fred Drake96e4a061999-07-29 22:22:13 +000041class LaTeXFormatError(Exception):
Fred Drake30a68c71998-11-23 16:59:39 +000042 pass
43
44
Fred Drake96e4a061999-07-29 22:22:13 +000045class LaTeXStackError(LaTeXFormatError):
46 def __init__(self, found, stack):
47 msg = "environment close for %s doesn't match;\n stack = %s" \
48 % (found, stack)
49 self.found = found
50 self.stack = stack[:]
51 LaTeXFormatError.__init__(self, msg)
52
53
Fred Drake30a68c71998-11-23 16:59:39 +000054_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
55_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
Fred Drake0eb7b2a1999-05-19 17:37:37 +000056_begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)")
Fred Drake96c00b01999-05-07 19:59:02 +000057_comment_rx = re.compile("%+ ?(.*)\n[ \t]*")
Fred Drake30a68c71998-11-23 16:59:39 +000058_text_rx = re.compile(r"[^]%\\{}]+")
59_optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
Fred Drakeaeea9811998-12-01 19:04:12 +000060# _parameter_rx is this complicated to allow {...} inside a parameter;
61# this is useful to match tabular layout specifications like {c|p{24pt}}
62_parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
Fred Drake30a68c71998-11-23 16:59:39 +000063_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
64_start_group_rx = re.compile("[ \n]*{")
65_start_optional_rx = re.compile("[ \n]*[[]")
66
67
Fred Drake42f52981998-11-30 14:45:24 +000068ESCAPED_CHARS = "$%#^ {}&~"
Fred Drake30a68c71998-11-23 16:59:39 +000069
70
Fred Drakef79acbd1999-05-07 21:12:21 +000071def dbgmsg(msg):
Fred Draked7acf021999-01-14 17:38:12 +000072 if DEBUG:
Fred Drakef79acbd1999-05-07 21:12:21 +000073 sys.stderr.write(msg + "\n")
74
75def pushing(name, point, depth):
Fred Drake96e4a061999-07-29 22:22:13 +000076 dbgmsg("pushing <%s> at %s" % (name, point))
Fred Draked7acf021999-01-14 17:38:12 +000077
78def popping(name, point, depth):
Fred Drake96e4a061999-07-29 22:22:13 +000079 dbgmsg("popping </%s> at %s" % (name, point))
Fred Draked7acf021999-01-14 17:38:12 +000080
81
Fred Drake96e4a061999-07-29 22:22:13 +000082class _Stack(UserList.UserList):
83 StringType = type('')
84
85 def append(self, entry):
86 if type(entry) is not self.StringType:
87 raise LaTeXFormatError("cannot push non-string on stack: "
88 + `entry`)
89 sys.stderr.write("%s<%s>\n" % (" "*len(self.data), entry))
90 self.data.append(entry)
91
92 def pop(self, index=-1):
93 entry = self.data[index]
94 del self.data[index]
95 sys.stderr.write("%s</%s>\n" % (" "*len(self.data), entry))
96
97 def __delitem__(self, index):
98 entry = self.data[index]
99 del self.data[index]
100 sys.stderr.write("%s</%s>\n" % (" "*len(self.data), entry))
101
102
103def new_stack():
104 if DEBUG:
105 return _Stack()
106 return []
107
108
109class BaseConversion:
110 def __init__(self, ifp, ofp, table={}, discards=(), autoclosing=()):
Fred Drake96c00b01999-05-07 19:59:02 +0000111 self.ofp_stack = [ofp]
112 self.pop_output()
113 self.table = table
114 self.discards = discards
115 self.autoclosing = autoclosing
116 self.line = string.join(map(string.rstrip, ifp.readlines()), "\n")
Fred Drake96c00b01999-05-07 19:59:02 +0000117 self.preamble = 1
Fred Drake96e4a061999-07-29 22:22:13 +0000118 self.stack = new_stack()
Fred Drake96c00b01999-05-07 19:59:02 +0000119
120 def push_output(self, ofp):
121 self.ofp_stack.append(self.ofp)
122 self.ofp = ofp
123 self.write = ofp.write
124
125 def pop_output(self):
126 self.ofp = self.ofp_stack.pop()
127 self.write = self.ofp.write
128
Fred Drake96e4a061999-07-29 22:22:13 +0000129 def err_write(self, msg):
130 if DEBUG:
131 sys.stderr.write(str(msg) + "\n")
132
133 def convert(self):
134 self.subconvert()
135
136
137class Conversion(BaseConversion):
Fred Drake96c00b01999-05-07 19:59:02 +0000138 def subconvert(self, endchar=None, depth=0):
Fred Drake96e4a061999-07-29 22:22:13 +0000139 stack = self.stack
Fred Drakef79acbd1999-05-07 21:12:21 +0000140 line = self.line
Fred Drake96c00b01999-05-07 19:59:02 +0000141 while line:
142 if line[0] == endchar and not stack:
Fred Drake96c00b01999-05-07 19:59:02 +0000143 self.line = line
144 return line
145 m = _comment_rx.match(line)
146 if m:
147 text = m.group(1)
148 if text:
149 self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
150 % encode(text))
Fred Drake30a68c71998-11-23 16:59:39 +0000151 line = line[m.end():]
Fred Drake30a68c71998-11-23 16:59:39 +0000152 continue
Fred Drake96c00b01999-05-07 19:59:02 +0000153 m = _begin_env_rx.match(line)
154 if m:
155 # re-write to use the macro handler
156 line = r"\%s %s" % (m.group(1), line[m.end():])
157 continue
158 m = _end_env_rx.match(line)
159 if m:
160 # end of environment
161 envname = m.group(1)
162 if envname == "document":
163 # special magic
164 for n in stack[1:]:
165 if n not in self.autoclosing:
Fred Drake96e4a061999-07-29 22:22:13 +0000166 self.err_write(stack)
Fred Drake96c00b01999-05-07 19:59:02 +0000167 raise LaTeXFormatError(
168 "open element on stack: " + `n`)
Fred Drake96c00b01999-05-07 19:59:02 +0000169 self.write(")document\n")
Fred Drakef79acbd1999-05-07 21:12:21 +0000170 elif stack and envname == stack[-1]:
Fred Drake96c00b01999-05-07 19:59:02 +0000171 self.write(")%s\n" % envname)
172 del stack[-1]
173 popping(envname, "a", len(stack) + depth)
174 else:
Fred Drake96e4a061999-07-29 22:22:13 +0000175 raise LaTeXStackError(envname, stack)
Fred Drake96c00b01999-05-07 19:59:02 +0000176 line = line[m.end():]
177 continue
178 m = _begin_macro_rx.match(line)
179 if m:
180 # start of macro
181 macroname = m.group(1)
182 if macroname == "verbatim":
183 # really magic case!
184 pos = string.find(line, "\\end{verbatim}")
185 text = line[m.end(1):pos]
186 self.write("(verbatim\n")
187 self.write("-%s\n" % encode(text))
188 self.write(")verbatim\n")
189 line = line[pos + len("\\end{verbatim}"):]
190 continue
191 numbered = 1
192 opened = 0
193 if macroname[-1] == "*":
194 macroname = macroname[:-1]
195 numbered = 0
196 if macroname in self.autoclosing and macroname in stack:
197 while stack[-1] != macroname:
198 top = stack.pop()
199 if top and top not in self.discards:
200 self.write(")%s\n-\\n\n" % top)
201 popping(top, "b", len(stack) + depth)
202 if macroname not in self.discards:
203 self.write("-\\n\n)%s\n-\\n\n" % macroname)
204 popping(macroname, "c", len(stack) + depth - 1)
205 del stack[-1]
206 #
207 if macroname in self.discards:
208 self.push_output(StringIO.StringIO())
209 else:
210 self.push_output(self.ofp)
211 #
212 params, optional, empty, environ = self.start_macro(macroname)
213 if not numbered:
214 self.write("Anumbered TOKEN no\n")
215 # rip off the macroname
216 if params:
Fred Drake96e4a061999-07-29 22:22:13 +0000217 line = line[m.end(1):]
Fred Drake96c00b01999-05-07 19:59:02 +0000218 elif empty:
219 line = line[m.end(1):]
220 else:
221 line = line[m.end():]
222 #
223 # Very ugly special case to deal with \item[]. The catch
224 # is that this needs to occur outside the for loop that
225 # handles attribute parsing so we can 'continue' the outer
226 # loop.
227 #
Fred Drake54fb7fb1999-05-10 19:36:03 +0000228 if optional and type(params[0]) is TupleType:
Fred Drake96c00b01999-05-07 19:59:02 +0000229 # the attribute name isn't used in this special case
Fred Drake96c00b01999-05-07 19:59:02 +0000230 stack.append(macroname)
231 self.write("(%s\n" % macroname)
232 m = _start_optional_rx.match(line)
233 if m:
234 self.line = line[m.end():]
235 line = self.subconvert("]", depth + len(stack))
236 line = "}" + line
237 continue
238 # handle attribute mappings here:
239 for attrname in params:
240 if optional:
241 optional = 0
Fred Drake54fb7fb1999-05-10 19:36:03 +0000242 if type(attrname) is StringType:
Fred Drake96c00b01999-05-07 19:59:02 +0000243 m = _optional_rx.match(line)
244 if m:
245 line = line[m.end():]
246 self.write("A%s TOKEN %s\n"
247 % (attrname, encode(m.group(1))))
Fred Drake54fb7fb1999-05-10 19:36:03 +0000248 elif type(attrname) is TupleType:
Fred Drake0eb7b2a1999-05-19 17:37:37 +0000249 # This is a sub-element; but place the and attribute
250 # we found on the stack (\section-like); the
251 # content of the macro will become the content
252 # of the attribute element, and the macro will
253 # have to be closed some other way (such as
254 # auto-closing).
Fred Drake96c00b01999-05-07 19:59:02 +0000255 stack.append(macroname)
256 self.write("(%s\n" % macroname)
257 macroname = attrname[0]
258 m = _start_group_rx.match(line)
Fred Drake30a68c71998-11-23 16:59:39 +0000259 if m:
260 line = line[m.end():]
Fred Drake54fb7fb1999-05-10 19:36:03 +0000261 elif type(attrname) is ListType:
Fred Drakef79acbd1999-05-07 21:12:21 +0000262 # A normal subelement: <macroname><attrname>...</>...
Fred Drake96c00b01999-05-07 19:59:02 +0000263 attrname = attrname[0]
264 if not opened:
265 opened = 1
266 self.write("(%s\n" % macroname)
267 pushing(macroname, "c", len(stack) + depth)
268 self.write("(%s\n" % attrname)
269 pushing(attrname, "sub-elem", len(stack) + depth + 1)
270 self.line = skip_white(line)[1:]
Fred Drakef79acbd1999-05-07 21:12:21 +0000271 line = self.subconvert("}", len(stack) + depth + 1)[1:]
Fred Drake96c00b01999-05-07 19:59:02 +0000272 popping(attrname, "sub-elem", len(stack) + depth + 1)
273 self.write(")%s\n" % attrname)
274 else:
275 m = _parameter_rx.match(line)
276 if not m:
277 raise LaTeXFormatError(
278 "could not extract parameter %s for %s: %s"
279 % (attrname, macroname, `line[:100]`))
280 value = m.group(1)
281 if _token_rx.match(value):
282 dtype = "TOKEN"
283 else:
284 dtype = "CDATA"
285 self.write("A%s %s %s\n"
286 % (attrname, dtype, encode(value)))
Fred Drake30a68c71998-11-23 16:59:39 +0000287 line = line[m.end():]
Fred Drake54fb7fb1999-05-10 19:36:03 +0000288 if params and type(params[-1]) is StringType \
Fred Drake96c00b01999-05-07 19:59:02 +0000289 and (not empty) and not environ:
290 # attempt to strip off next '{'
291 m = _start_group_rx.match(line)
Fred Drake30a68c71998-11-23 16:59:39 +0000292 if not m:
293 raise LaTeXFormatError(
Fred Drake96c00b01999-05-07 19:59:02 +0000294 "non-empty element '%s' has no content: %s"
295 % (macroname, line[:12]))
Fred Drake30a68c71998-11-23 16:59:39 +0000296 line = line[m.end():]
Fred Drake96c00b01999-05-07 19:59:02 +0000297 if not opened:
298 self.write("(%s\n" % macroname)
299 pushing(macroname, "d", len(stack) + depth)
300 if empty:
301 line = "}" + line
302 stack.append(macroname)
303 self.pop_output()
304 continue
305 if line[0] == endchar and not stack:
Fred Drake96c00b01999-05-07 19:59:02 +0000306 self.line = line[1:]
307 return self.line
308 if line[0] == "}":
309 # end of macro or group
310 macroname = stack[-1]
311 conversion = self.table.get(macroname)
312 if macroname \
313 and macroname not in self.discards \
Fred Drake54fb7fb1999-05-10 19:36:03 +0000314 and type(conversion) is not StringType:
Fred Drake96c00b01999-05-07 19:59:02 +0000315 # otherwise, it was just a bare group
316 self.write(")%s\n" % stack[-1])
317 popping(macroname, "d", len(stack) + depth - 1)
318 del stack[-1]
319 line = line[1:]
320 continue
321 if line[0] == "{":
322 pushing("", "e", len(stack) + depth)
323 stack.append("")
324 line = line[1:]
325 continue
326 if line[0] == "\\" and line[1] in ESCAPED_CHARS:
327 self.write("-%s\n" % encode(line[1]))
328 line = line[2:]
329 continue
330 if line[:2] == r"\\":
331 self.write("(BREAK\n)BREAK\n")
332 line = line[2:]
333 continue
334 m = _text_rx.match(line)
335 if m:
336 text = encode(m.group())
337 self.write("-%s\n" % text)
Fred Drake42f52981998-11-30 14:45:24 +0000338 line = line[m.end():]
Fred Drake96c00b01999-05-07 19:59:02 +0000339 continue
340 # special case because of \item[]
341 if line[0] == "]":
342 self.write("-]\n")
343 line = line[1:]
344 continue
345 # avoid infinite loops
346 extra = ""
347 if len(line) > 100:
348 extra = "..."
349 raise LaTeXFormatError("could not identify markup: %s%s"
350 % (`line[:100]`, extra))
351 while stack and stack[-1] in self.autoclosing:
352 self.write("-\\n\n")
353 self.write(")%s\n" % stack[-1])
354 popping(stack.pop(), "e", len(stack) + depth - 1)
355 if stack:
356 raise LaTeXFormatError("elements remain on stack: "
357 + string.join(stack, ", "))
358 # otherwise we just ran out of input here...
359
Fred Drake96c00b01999-05-07 19:59:02 +0000360 def start_macro(self, name):
361 conversion = self.table.get(name, ([], 0, 0, 0, 0))
362 params, optional, empty, environ, nocontent = conversion
363 if empty:
364 self.write("e\n")
365 elif nocontent:
366 empty = 1
367 return params, optional, empty, environ
Fred Drake30a68c71998-11-23 16:59:39 +0000368
369
Fred Drake96e4a061999-07-29 22:22:13 +0000370class NewConversion(BaseConversion):
371 def __init__(self, ifp, ofp, table={}):
372 BaseConversion.__init__(self, ifp, ofp, table)
373 self.discards = []
374
375 def subconvert(self, endchar=None, depth=0):
376 #
377 # Parses content, including sub-structures, until the character
378 # 'endchar' is found (with no open structures), or until the end
379 # of the input data is endchar is None.
380 #
381 stack = new_stack()
382 line = self.line
383 while line:
384 if line[0] == endchar and not stack:
385 self.line = line
386 return line
387 m = _comment_rx.match(line)
388 if m:
389 text = m.group(1)
390 if text:
391 self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
392 % encode(text))
393 line = line[m.end():]
394 continue
395 m = _begin_env_rx.match(line)
396 if m:
397 name = m.group(1)
398 entry = self.get_env_entry(name)
399 # re-write to use the macro handler
400 line = r"\%s %s" % (name, line[m.end():])
401 continue
402 m = _end_env_rx.match(line)
403 if m:
404 # end of environment
405 envname = m.group(1)
406 entry = self.get_entry(envname)
407 while stack and envname != stack[-1] \
408 and stack[-1] in entry.endcloses:
409 self.write(")%s\n" % stack.pop())
410 if stack and envname == stack[-1]:
411 self.write(")%s\n" % entry.outputname)
412 del stack[-1]
413 else:
414 raise LaTeXStackError(envname, stack)
415 line = line[m.end():]
416 continue
417 m = _begin_macro_rx.match(line)
418 if m:
419 # start of macro
420 macroname = m.group(1)
421 entry = self.get_entry(macroname)
422 if entry.verbatim:
423 # magic case!
424 pos = string.find(line, "\\end{%s}" % macroname)
425 text = line[m.end(1):pos]
426 stack.append(entry.name)
427 self.write("(%s\n" % entry.outputname)
428 self.write("-%s\n" % encode(text))
429 self.write(")%s\n" % entry.outputname)
430 stack.pop()
431 line = line[pos + len("\\end{%s}" % macroname):]
432 continue
433 while stack and stack[-1] in entry.closes:
434 top = stack.pop()
435 topentry = self.get_entry(top)
436 if topentry.outputname:
437 self.write(")%s\n-\\n\n" % topentry.outputname)
438 #
439 if entry.outputname:
440 if entry.empty:
441 self.write("e\n")
442 self.push_output(self.ofp)
443 else:
444 self.push_output(StringIO.StringIO())
445 #
446 params, optional, empty, environ = self.start_macro(macroname)
447 # rip off the macroname
448 if params:
449 line = line[m.end(1):]
450 elif empty:
451 line = line[m.end(1):]
452 else:
453 line = line[m.end():]
454 opened = 0
455 implied_content = 0
456
457 # handle attribute mappings here:
458 for pentry in params:
459 if pentry.type == "attribute":
460 if pentry.optional:
461 m = _optional_rx.match(line)
462 if m:
463 line = line[m.end():]
464 self.dump_attr(pentry, m.group(1))
465 elif pentry.text:
466 # value supplied by conversion spec:
467 self.dump_attr(pentry, pentry.text)
468 else:
469 m = _parameter_rx.match(line)
470 if not m:
471 raise LaTeXFormatError(
472 "could not extract parameter %s for %s: %s"
473 % (pentry.name, macroname, `line[:100]`))
474 self.dump_attr(pentry, m.group(1))
475## if entry.name == "label":
476## sys.stderr.write("[%s]" % m.group(1))
477 line = line[m.end():]
478 elif pentry.type == "child":
479 if pentry.optional:
480 m = _optional_rx.match(line)
481 if m:
482 line = line[m.end():]
483 if entry.outputname and not opened:
484 opened = 1
485 self.write("(%s\n" % entry.outputname)
486 stack.append(macroname)
487 stack.append(pentry.name)
488 self.write("(%s\n" % pentry.name)
489 self.write("-%s\n" % encode(m.group(1)))
490 self.write(")%s\n" % pentry.name)
491 stack.pop()
492 else:
493 if entry.outputname and not opened:
494 opened = 1
495 self.write("(%s\n" % entry.outputname)
496 stack.append(entry.name)
497 self.write("(%s\n" % pentry.name)
498 stack.append(pentry.name)
499 self.line = skip_white(line)[1:]
500 line = self.subconvert(
501 "}", len(stack) + depth + 1)[1:]
502 self.write(")%s\n" % stack.pop())
503 elif pentry.type == "content":
504 if pentry.implied:
505 implied_content = 1
506 else:
507 if entry.outputname and not opened:
508 opened = 1
509 self.write("(%s\n" % entry.outputname)
510 stack.append(entry.name)
511 line = skip_white(line)
512 if line[0] != "{":
513 raise LaTeXFormatError(
514 "missing content for " + macroname)
515 self.line = line[1:]
516 line = self.subconvert("}", len(stack) + depth + 1)
517 if line and line[0] == "}":
518 line = line[1:]
519 elif pentry.type == "text":
520 if pentry.text:
521 if entry.outputname and not opened:
522 opened = 1
523 stack.append(entry.name)
524 self.write("(%s\n" % entry.outputname)
525 self.write("-%s\n" % encode(pentry.text))
526 if entry.outputname:
527 if not opened:
528 self.write("(%s\n" % entry.outputname)
529 stack.append(entry.name)
530 if not implied_content:
531 self.write(")%s\n" % entry.outputname)
532 stack.pop()
533 self.pop_output()
534 continue
535 if line[0] == endchar and not stack:
536 self.line = line[1:]
537 return self.line
538 if line[0] == "}":
539 # end of macro or group
540 macroname = stack[-1]
541 if macroname:
542 conversion = self.table.get(macroname)
543 if conversion.outputname:
544 # otherwise, it was just a bare group
545 self.write(")%s\n" % conversion.outputname)
546 del stack[-1]
547 line = line[1:]
548 continue
549 if line[0] == "{":
550 stack.append("")
551 line = line[1:]
552 continue
553 if line[0] == "\\" and line[1] in ESCAPED_CHARS:
554 self.write("-%s\n" % encode(line[1]))
555 line = line[2:]
556 continue
557 if line[:2] == r"\\":
558 self.write("(BREAK\n)BREAK\n")
559 line = line[2:]
560 continue
561 m = _text_rx.match(line)
562 if m:
563 text = encode(m.group())
564 self.write("-%s\n" % text)
565 line = line[m.end():]
566 continue
567 # special case because of \item[]
568 # XXX can we axe this???
569 if line[0] == "]":
570 self.write("-]\n")
571 line = line[1:]
572 continue
573 # avoid infinite loops
574 extra = ""
575 if len(line) > 100:
576 extra = "..."
577 raise LaTeXFormatError("could not identify markup: %s%s"
578 % (`line[:100]`, extra))
579 while stack:
580 entry = self.get_entry(stack[-1])
581 if entry.closes:
582 self.write(")%s\n-%s\n" % (entry.outputname, encode("\n")))
583 del stack[-1]
584 else:
585 break
586 if stack:
587 raise LaTeXFormatError("elements remain on stack: "
588 + string.join(stack, ", "))
589 # otherwise we just ran out of input here...
590
591 def start_macro(self, name):
592 conversion = self.get_entry(name)
593 parameters = conversion.parameters
594 optional = parameters and parameters[0].optional
595## empty = not len(parameters)
596## if empty:
597## self.write("e\n")
598## elif conversion.empty:
599## empty = 1
600 return parameters, optional, conversion.empty, conversion.environment
601
602 def get_entry(self, name):
603 entry = self.table.get(name)
604 if entry is None:
605 self.err_write("get_entry(%s) failing; building default entry!"
606 % `name`)
607 # not defined; build a default entry:
608 entry = TableEntry(name)
609 entry.has_content = 1
610 entry.parameters.append(Parameter("content"))
611 self.table[name] = entry
612 return entry
613
614 def get_env_entry(self, name):
615 entry = self.table.get(name)
616 if entry is None:
617 # not defined; build a default entry:
618 entry = TableEntry(name, 1)
619 entry.has_content = 1
620 entry.parameters.append(Parameter("content"))
621 entry.parameters[-1].implied = 1
622 self.table[name] = entry
623 elif not entry.environment:
624 raise LaTeXFormatError(
625 name + " is defined as a macro; expected environment")
626 return entry
627
628 def dump_attr(self, pentry, value):
629 if not (pentry.name and value):
630 return
631 if _token_rx.match(value):
632 dtype = "TOKEN"
633 else:
634 dtype = "CDATA"
635 self.write("A%s %s %s\n" % (pentry.name, dtype, encode(value)))
636
637
638def old_convert(ifp, ofp, table={}, discards=(), autoclosing=()):
Fred Drake96c00b01999-05-07 19:59:02 +0000639 c = Conversion(ifp, ofp, table, discards, autoclosing)
Fred Drake30a68c71998-11-23 16:59:39 +0000640 try:
Fred Drake96c00b01999-05-07 19:59:02 +0000641 c.convert()
Fred Drake30a68c71998-11-23 16:59:39 +0000642 except IOError, (err, msg):
643 if err != errno.EPIPE:
644 raise
645
646
Fred Drake96e4a061999-07-29 22:22:13 +0000647def new_convert(ifp, ofp, table={}, discards=(), autoclosing=()):
648 c = NewConversion(ifp, ofp, table)
649 try:
650 c.convert()
651 except IOError, (err, msg):
652 if err != errno.EPIPE:
653 raise
654
655
Fred Draked7acf021999-01-14 17:38:12 +0000656def skip_white(line):
Fred Drake96e4a061999-07-29 22:22:13 +0000657 while line and line[0] in " %\n\t\r":
Fred Draked7acf021999-01-14 17:38:12 +0000658 line = string.lstrip(line[1:])
659 return line
660
661
Fred Drake96e4a061999-07-29 22:22:13 +0000662
663class TableEntry:
664 def __init__(self, name, environment=0):
665 self.name = name
666 self.outputname = name
667 self.environment = environment
668 self.empty = not environment
669 self.has_content = 0
670 self.verbatim = 0
671 self.auto_close = 0
672 self.parameters = []
673 self.closes = []
674 self.endcloses = []
675
676class Parameter:
677 def __init__(self, type, name=None, optional=0):
678 self.type = type
679 self.name = name
680 self.optional = optional
681 self.text = ''
682 self.implied = 0
683
684
685class TableParser(XMLParser):
686 def __init__(self):
687 self.__table = {}
688 self.__current = None
689 self.__buffer = ''
690 XMLParser.__init__(self)
691
692 def get_table(self):
693 for entry in self.__table.values():
694 if entry.environment and not entry.has_content:
695 p = Parameter("content")
696 p.implied = 1
697 entry.parameters.append(p)
698 entry.has_content = 1
699 return self.__table
700
701 def start_environment(self, attrs):
702 name = attrs["name"]
703 self.__current = TableEntry(name, environment=1)
704 self.__current.verbatim = attrs.get("verbatim") == "yes"
705 if attrs.has_key("outputname"):
706 self.__current.outputname = attrs.get("outputname")
707 self.__current.endcloses = string.split(attrs.get("endcloses", ""))
708 def end_environment(self):
709 self.end_macro()
710
711 def start_macro(self, attrs):
712 name = attrs["name"]
713 self.__current = TableEntry(name)
714 self.__current.closes = string.split(attrs.get("closes", ""))
715 if attrs.has_key("outputname"):
716 self.__current.outputname = attrs.get("outputname")
717 def end_macro(self):
718## if self.__current.parameters and not self.__current.outputname:
719## raise ValueError, "markup with parameters must have an output name"
720 self.__table[self.__current.name] = self.__current
721 self.__current = None
722
723 def start_attribute(self, attrs):
724 name = attrs.get("name")
725 optional = attrs.get("optional") == "yes"
726 if name:
727 p = Parameter("attribute", name, optional=optional)
728 else:
729 p = Parameter("attribute", optional=optional)
730 self.__current.parameters.append(p)
731 self.__buffer = ''
732 def end_attribute(self):
733 self.__current.parameters[-1].text = self.__buffer
734
735 def start_child(self, attrs):
736 name = attrs["name"]
737 p = Parameter("child", name, attrs.get("optional") == "yes")
738 self.__current.parameters.append(p)
739 self.__current.empty = 0
740
741 def start_content(self, attrs):
742 p = Parameter("content")
743 p.implied = attrs.get("implied") == "yes"
744 if self.__current.environment:
745 p.implied = 1
746 self.__current.parameters.append(p)
747 self.__current.has_content = 1
748 self.__current.empty = 0
749
750 def start_text(self, attrs):
751 self.__buffer = ''
752 def end_text(self):
753 p = Parameter("text")
754 p.text = self.__buffer
755 self.__current.parameters.append(p)
756
757 def handle_data(self, data):
758 self.__buffer = self.__buffer + data
759
760
761def load_table(fp):
762 parser = TableParser()
763 parser.feed(fp.read())
764 parser.close()
765 return parser.get_table()
766
767
Fred Drake30a68c71998-11-23 16:59:39 +0000768def main():
Fred Drake96e4a061999-07-29 22:22:13 +0000769 global DEBUG
770 #
771 convert = new_convert
772 newstyle = 1
773 opts, args = getopt.getopt(sys.argv[1:], "Dn", ["debug", "new"])
774 for opt, arg in opts:
775 if opt in ("-n", "--new"):
776 convert = new_convert
777 newstyle = 1
778 elif opt in ("-o", "--old"):
779 convert = old_convert
780 newstyle = 0
781 elif opt in ("-D", "--debug"):
782 DEBUG = DEBUG + 1
783 if len(args) == 0:
784 ifp = sys.stdin
Fred Drake30a68c71998-11-23 16:59:39 +0000785 ofp = sys.stdout
Fred Drake96e4a061999-07-29 22:22:13 +0000786 elif len(args) == 1:
787 ifp = open(args)
788 ofp = sys.stdout
789 elif len(args) == 2:
790 ifp = open(args[0])
791 ofp = open(args[1], "w")
Fred Drake30a68c71998-11-23 16:59:39 +0000792 else:
793 usage()
794 sys.exit(2)
Fred Drake96e4a061999-07-29 22:22:13 +0000795 table = {
Fred Drakeaeea9811998-12-01 19:04:12 +0000796 # entries have the form:
Fred Drake9d1c3b51999-01-14 18:10:09 +0000797 # name: ([attribute names], is1stOptional, isEmpty, isEnv, nocontent)
798 # attribute names can be:
799 # "string" -- normal attribute
800 # ("string",) -- sub-element with content of macro; like for \section
801 # ["string"] -- sub-element
Fred Draked7acf021999-01-14 17:38:12 +0000802 "bifuncindex": (["name"], 0, 1, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000803 "cfuncdesc": (["type", "name", ("args",)], 0, 0, 1, 0),
804 "chapter": ([("title",)], 0, 0, 0, 0),
805 "chapter*": ([("title",)], 0, 0, 0, 0),
Fred Drake1453a8c1999-01-28 23:10:48 +0000806 "classdesc": (["name", ("args",)], 0, 0, 1, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000807 "ctypedesc": (["name"], 0, 0, 1, 0),
808 "cvardesc": (["type", "name"], 0, 0, 1, 0),
809 "datadesc": (["name"], 0, 0, 1, 0),
810 "declaremodule": (["id", "type", "name"], 1, 1, 0, 0),
811 "deprecated": (["release"], 0, 0, 0, 0),
812 "documentclass": (["classname"], 0, 1, 0, 0),
813 "excdesc": (["name"], 0, 0, 1, 0),
814 "funcdesc": (["name", ("args",)], 0, 0, 1, 0),
815 "funcdescni": (["name", ("args",)], 0, 0, 1, 0),
Fred Drake75930b31999-01-29 20:09:27 +0000816 "funcline": (["name"], 0, 0, 0, 0),
817 "funclineni": (["name"], 0, 0, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000818 "geq": ([], 0, 1, 0, 0),
819 "hline": ([], 0, 1, 0, 0),
Fred Drake0eb7b2a1999-05-19 17:37:37 +0000820 "include": (["source"], 0, 1, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000821 "indexii": (["ie1", "ie2"], 0, 1, 0, 0),
822 "indexiii": (["ie1", "ie2", "ie3"], 0, 1, 0, 0),
823 "indexiv": (["ie1", "ie2", "ie3", "ie4"], 0, 1, 0, 0),
824 "indexname": ([], 0, 0, 0, 0),
825 "input": (["source"], 0, 1, 0, 0),
826 "item": ([("leader",)], 1, 0, 0, 0),
827 "label": (["id"], 0, 1, 0, 0),
828 "labelwidth": ([], 0, 1, 0, 0),
Fred Drakef79acbd1999-05-07 21:12:21 +0000829 "large": ([], 0, 1, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000830 "LaTeX": ([], 0, 1, 0, 0),
831 "leftmargin": ([], 0, 1, 0, 0),
832 "leq": ([], 0, 1, 0, 0),
833 "lineii": ([["entry"], ["entry"]], 0, 0, 0, 1),
834 "lineiii": ([["entry"], ["entry"], ["entry"]], 0, 0, 0, 1),
835 "lineiv": ([["entry"], ["entry"], ["entry"], ["entry"]], 0, 0, 0, 1),
836 "localmoduletable": ([], 0, 1, 0, 0),
837 "makeindex": ([], 0, 1, 0, 0),
838 "makemodindex": ([], 0, 1, 0, 0),
839 "maketitle": ([], 0, 1, 0, 0),
840 "manpage": (["name", "section"], 0, 1, 0, 0),
841 "memberdesc": (["class", "name"], 1, 0, 1, 0),
Fred Drake96e4a061999-07-29 22:22:13 +0000842 "memberdescni": (["class", "name"], 1, 0, 1, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000843 "methoddesc": (["class", "name", ("args",)], 1, 0, 1, 0),
844 "methoddescni": (["class", "name", ("args",)], 1, 0, 1, 0),
Fred Drake3f3b0961999-01-28 23:49:37 +0000845 "methodline": (["class", "name"], 1, 0, 0, 0),
846 "methodlineni": (["class", "name"], 1, 0, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000847 "moduleauthor": (["name", "email"], 0, 1, 0, 0),
848 "opcodedesc": (["name", "var"], 0, 0, 1, 0),
849 "par": ([], 0, 1, 0, 0),
850 "paragraph": ([("title",)], 0, 0, 0, 0),
Fred Drake54fb7fb1999-05-10 19:36:03 +0000851 "refbimodindex": (["name"], 0, 1, 0, 0),
852 "refexmodindex": (["name"], 0, 1, 0, 0),
853 "refmodindex": (["name"], 0, 1, 0, 0),
854 "refstmodindex": (["name"], 0, 1, 0, 0),
855 "refmodule": (["ref"], 1, 0, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000856 "renewcommand": (["macro"], 0, 0, 0, 0),
Fred Drake3effeed1999-01-14 21:18:52 +0000857 "rfc": (["num"], 0, 1, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000858 "section": ([("title",)], 0, 0, 0, 0),
859 "sectionauthor": (["name", "email"], 0, 1, 0, 0),
860 "seemodule": (["ref", "name"], 1, 0, 0, 0),
861 "stindex": (["type"], 0, 1, 0, 0),
862 "subparagraph": ([("title",)], 0, 0, 0, 0),
863 "subsection": ([("title",)], 0, 0, 0, 0),
864 "subsubsection": ([("title",)], 0, 0, 0, 0),
865 "list": (["bullet", "init"], 0, 0, 1, 0),
866 "tableii": (["colspec", "style",
867 ["entry"], ["entry"]], 0, 0, 1, 0),
868 "tableiii": (["colspec", "style",
869 ["entry"], ["entry"], ["entry"]], 0, 0, 1, 0),
870 "tableiv": (["colspec", "style",
871 ["entry"], ["entry"], ["entry"], ["entry"]], 0, 0, 1, 0),
872 "version": ([], 0, 1, 0, 0),
873 "versionadded": (["version"], 0, 1, 0, 0),
874 "versionchanged": (["version"], 0, 1, 0, 0),
875 "withsubitem": (["text"], 0, 0, 0, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000876 #
Fred Draked7acf021999-01-14 17:38:12 +0000877 "ABC": ([], 0, 1, 0, 0),
878 "ASCII": ([], 0, 1, 0, 0),
879 "C": ([], 0, 1, 0, 0),
880 "Cpp": ([], 0, 1, 0, 0),
881 "EOF": ([], 0, 1, 0, 0),
882 "e": ([], 0, 1, 0, 0),
883 "ldots": ([], 0, 1, 0, 0),
884 "NULL": ([], 0, 1, 0, 0),
885 "POSIX": ([], 0, 1, 0, 0),
886 "UNIX": ([], 0, 1, 0, 0),
Fred Drake30a68c71998-11-23 16:59:39 +0000887 #
888 # Things that will actually be going away!
889 #
Fred Drake96e4a061999-07-29 22:22:13 +0000890 "appendix": ([], 0, 1, 0, 0),
891 "catcode": ([], 0, 1, 0, 0),
Fred Draked7acf021999-01-14 17:38:12 +0000892 "fi": ([], 0, 1, 0, 0),
893 "ifhtml": ([], 0, 1, 0, 0),
894 "makeindex": ([], 0, 1, 0, 0),
895 "makemodindex": ([], 0, 1, 0, 0),
896 "maketitle": ([], 0, 1, 0, 0),
897 "noindent": ([], 0, 1, 0, 0),
898 "protect": ([], 0, 1, 0, 0),
899 "tableofcontents": ([], 0, 1, 0, 0),
Fred Drake96e4a061999-07-29 22:22:13 +0000900 }
901 if newstyle:
902 table = load_table(open(os.path.join(sys.path[0], 'conversion.xml')))
903 convert(ifp, ofp, table,
Fred Drake30a68c71998-11-23 16:59:39 +0000904 discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle",
905 "noindent", "tableofcontents"],
906 autoclosing=["chapter", "section", "subsection", "subsubsection",
Fred Drakeaeea9811998-12-01 19:04:12 +0000907 "paragraph", "subparagraph", ])
Fred Drake30a68c71998-11-23 16:59:39 +0000908
909
910if __name__ == "__main__":
911 main()