blob: 11c487d8d4a34f882abf9fd60ad1e89f6d26fc2e [file] [log] [blame]
Fred Drake03204731998-11-23 17:02:03 +00001#! /usr/bin/env python
2
Fred Drake7dab6af1999-01-28 23:59:58 +00003"""Perform massive transformations on a document tree created from the LaTeX
4of the Python documentation, and dump the ESIS data for the transformed tree.
Fred Drake03204731998-11-23 17:02:03 +00005"""
6__version__ = '$Revision$'
7
8
9import errno
Fred Drake4db5b461998-12-01 19:03:01 +000010import esistools
11import re
Fred Drake03204731998-11-23 17:02:03 +000012import string
13import sys
14import xml.dom.core
15import xml.dom.esis_builder
16
17
Fred Drakef8ebb551999-01-14 19:45:38 +000018class ConversionError(Exception):
19 pass
20
21
Fred Drakefcc59101999-01-06 22:50:52 +000022DEBUG_PARA_FIXER = 0
23
Fred Drake7dab6af1999-01-28 23:59:58 +000024if DEBUG_PARA_FIXER:
25 def para_msg(s):
26 sys.stderr.write("*** %s\n" % s)
27else:
28 def para_msg(s):
29 pass
30
Fred Drakefcc59101999-01-06 22:50:52 +000031
Fred Drake03204731998-11-23 17:02:03 +000032# Workaround to deal with invalid documents (multiple root elements). This
33# does not indicate a bug in the DOM implementation.
34#
35def get_documentElement(self):
36 docelem = None
37 for n in self._node.children:
38 if n.type == xml.dom.core.ELEMENT:
39 docelem = xml.dom.core.Element(n, self, self)
40 return docelem
41
42xml.dom.core.Document.get_documentElement = get_documentElement
43
44
45# Replace get_childNodes for the Document class; without this, children
46# accessed from the Document object via .childNodes (no matter how many
47# levels of access are used) will be given an ownerDocument of None.
48#
49def get_childNodes(self):
50 return xml.dom.core.NodeList(self._node.children, self, self)
51
52xml.dom.core.Document.get_childNodes = get_childNodes
53
54
55def get_first_element(doc, gi):
56 for n in doc.childNodes:
57 if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi:
58 return n
59
60def extract_first_element(doc, gi):
61 node = get_first_element(doc, gi)
62 if node is not None:
63 doc.removeChild(node)
64 return node
65
66
Fred Drake7dab6af1999-01-28 23:59:58 +000067def find_all_elements(doc, gi):
68 nodes = []
69 if doc.nodeType == xml.dom.core.ELEMENT and doc.tagName == gi:
70 nodes.append(doc)
71 for child in doc.childNodes:
72 if child.nodeType == xml.dom.core.ELEMENT:
73 if child.tagName == gi:
74 nodes.append(child)
75 for node in child.getElementsByTagName(gi):
76 nodes.append(node)
77 return nodes
78
79
Fred Drake03204731998-11-23 17:02:03 +000080def simplify(doc):
81 # Try to rationalize the document a bit, since these things are simply
82 # not valid SGML/XML documents as they stand, and need a little work.
83 documentclass = "document"
84 inputs = []
85 node = extract_first_element(doc, "documentclass")
86 if node is not None:
87 documentclass = node.getAttribute("classname")
88 node = extract_first_element(doc, "title")
89 if node is not None:
90 inputs.append(node)
91 # update the name of the root element
92 node = get_first_element(doc, "document")
93 if node is not None:
94 node._node.name = documentclass
95 while 1:
96 node = extract_first_element(doc, "input")
97 if node is None:
98 break
99 inputs.append(node)
100 if inputs:
101 docelem = doc.documentElement
102 inputs.reverse()
103 for node in inputs:
104 text = doc.createTextNode("\n")
105 docelem.insertBefore(text, docelem.firstChild)
106 docelem.insertBefore(node, text)
107 docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
108 while doc.firstChild.nodeType == xml.dom.core.TEXT:
109 doc.removeChild(doc.firstChild)
110
111
112def cleanup_root_text(doc):
113 discards = []
114 skip = 0
115 for n in doc.childNodes:
116 prevskip = skip
117 skip = 0
118 if n.nodeType == xml.dom.core.TEXT and not prevskip:
119 discards.append(n)
Fred Drake4db5b461998-12-01 19:03:01 +0000120 elif n.nodeType == xml.dom.core.ELEMENT and n.tagName == "COMMENT":
Fred Drake03204731998-11-23 17:02:03 +0000121 skip = 1
122 for node in discards:
123 doc.removeChild(node)
124
125
Fred Drakecb657811999-01-29 20:55:07 +0000126DESCRIPTOR_ELEMENTS = (
127 "cfuncdesc", "cvardesc", "ctypedesc",
128 "classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni",
129 "excdesc", "funcdesc", "funcdescni", "opcodedesc",
130 "datadesc", "datadescni",
131 )
132
133def fixup_descriptors(doc):
Fred Drake3a7ff991999-01-29 21:31:12 +0000134 sections = find_all_elements(doc, "section")
135 for section in sections:
136 find_and_fix_descriptors(doc, section)
137
138
139def find_and_fix_descriptors(doc, container):
140 children = container.childNodes
141 for child in children:
142 if child.nodeType == xml.dom.core.ELEMENT:
143 tagName = child.tagName
144 if tagName in DESCRIPTOR_ELEMENTS:
145 rewrite_descriptor(doc, child)
146 elif tagName == "subsection":
147 find_and_fix_descriptors(doc, child)
148
Fred Drakecb657811999-01-29 20:55:07 +0000149
150def rewrite_descriptor(doc, descriptor):
151 #
152 # Do these things:
153 # 1. Add an "index=noindex" attribute to the element if the tagName
154 # ends in 'ni', removing the 'ni' from the name.
155 # 2. Create a <signature> from the name attribute and <args>.
156 # 3. Create additional <signature>s from <*line{,ni}> elements,
157 # if found.
Fred Drake1dd152d1999-01-29 22:12:29 +0000158 # 4. If a <versionadded> is found, move it to an attribute on the
159 # descriptor.
160 # 5. Move remaining child nodes to a <description> element.
161 # 6. Put it back together.
Fred Drakecb657811999-01-29 20:55:07 +0000162 #
163 descname = descriptor.tagName
164 index = 1
165 if descname[-2:] == "ni":
166 descname = descname[:-2]
167 descriptor.setAttribute("index", "noindex")
168 descriptor._node.name = descname
169 index = 0
170 desctype = descname[:-4] # remove 'desc'
171 linename = desctype + "line"
172 if not index:
173 linename = linename + "ni"
174 # 2.
175 signature = doc.createElement("signature")
176 name = doc.createElement("name")
177 signature.appendChild(doc.createTextNode("\n "))
178 signature.appendChild(name)
179 name.appendChild(doc.createTextNode(descriptor.getAttribute("name")))
180 descriptor.removeAttribute("name")
181 if descriptor.attributes.has_key("var"):
182 variable = descriptor.getAttribute("var")
183 if variable:
184 args = doc.createElement("args")
185 args.appendChild(doc.createTextNode(variable))
Fred Drake7dab6af1999-01-28 23:59:58 +0000186 signature.appendChild(doc.createTextNode("\n "))
Fred Drakecb657811999-01-29 20:55:07 +0000187 signature.appendChild(args)
188 descriptor.removeAttribute("var")
189 newchildren = [signature]
190 children = descriptor.childNodes
191 pos = skip_leading_nodes(children, 0)
192 if pos < len(children):
193 child = children[pos]
194 if child.nodeType == xml.dom.core.ELEMENT and child.tagName == "args":
195 # create an <args> in <signature>:
196 args = doc.createElement("args")
197 argchildren = []
198 map(argchildren.append, child.childNodes)
199 for n in argchildren:
200 child.removeChild(n)
201 args.appendChild(n)
202 signature.appendChild(doc.createTextNode("\n "))
203 signature.appendChild(args)
204 signature.appendChild(doc.createTextNode("\n "))
Fred Drake1dd152d1999-01-29 22:12:29 +0000205 # 3, 4.
Fred Drakecb657811999-01-29 20:55:07 +0000206 pos = skip_leading_nodes(children, pos + 1)
207 while pos < len(children) \
208 and children[pos].nodeType == xml.dom.core.ELEMENT \
Fred Drake1dd152d1999-01-29 22:12:29 +0000209 and children[pos].tagName in (linename, "versionadded"):
210 if children[pos].tagName == linename:
211 # this is really a supplemental signature, create <signature>
212 sig = methodline_to_signature(doc, children[pos])
213 newchildren.append(sig)
214 else:
215 # <versionadded added=...>
216 descriptor.setAttribute(
217 "added", children[pos].getAttribute("version"))
Fred Drakecb657811999-01-29 20:55:07 +0000218 pos = skip_leading_nodes(children, pos + 1)
Fred Drake1dd152d1999-01-29 22:12:29 +0000219 # 5.
Fred Drakecb657811999-01-29 20:55:07 +0000220 description = doc.createElement("description")
221 description.appendChild(doc.createTextNode("\n"))
222 newchildren.append(description)
223 move_children(descriptor, description, pos)
224 last = description.childNodes[-1]
225 if last.nodeType == xml.dom.core.TEXT:
226 last.data = string.rstrip(last.data) + "\n "
Fred Drake1dd152d1999-01-29 22:12:29 +0000227 # 6.
Fred Drakecb657811999-01-29 20:55:07 +0000228 # should have nothing but whitespace and signature lines in <descriptor>;
229 # discard them
230 while descriptor.childNodes:
231 descriptor.removeChild(descriptor.childNodes[0])
232 for node in newchildren:
233 descriptor.appendChild(doc.createTextNode("\n "))
234 descriptor.appendChild(node)
235 descriptor.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000236
Fred Drake7dab6af1999-01-28 23:59:58 +0000237
238def methodline_to_signature(doc, methodline):
239 signature = doc.createElement("signature")
240 signature.appendChild(doc.createTextNode("\n "))
241 name = doc.createElement("name")
242 name.appendChild(doc.createTextNode(methodline.getAttribute("name")))
Fred Drakecb657811999-01-29 20:55:07 +0000243 methodline.removeAttribute("name")
Fred Drake7dab6af1999-01-28 23:59:58 +0000244 signature.appendChild(name)
Fred Drake7dab6af1999-01-28 23:59:58 +0000245 if len(methodline.childNodes):
Fred Drakecb657811999-01-29 20:55:07 +0000246 args = doc.createElement("args")
Fred Drake7dab6af1999-01-28 23:59:58 +0000247 signature.appendChild(doc.createTextNode("\n "))
Fred Drakecb657811999-01-29 20:55:07 +0000248 signature.appendChild(args)
249 move_children(methodline, args)
Fred Drake7dab6af1999-01-28 23:59:58 +0000250 signature.appendChild(doc.createTextNode("\n "))
251 return signature
Fred Drake03204731998-11-23 17:02:03 +0000252
253
Fred Drakecb657811999-01-29 20:55:07 +0000254def move_children(origin, dest, start=0):
255 children = origin.childNodes
256 while start < len(children):
257 node = children[start]
258 origin.removeChild(node)
259 dest.appendChild(node)
260
261
Fred Drake4db5b461998-12-01 19:03:01 +0000262def handle_appendix(doc):
263 # must be called after simplfy() if document is multi-rooted to begin with
264 docelem = doc.documentElement
265 toplevel = docelem.tagName == "manual" and "chapter" or "section"
266 appendices = 0
267 nodes = []
268 for node in docelem.childNodes:
269 if appendices:
270 nodes.append(node)
271 elif node.nodeType == xml.dom.core.ELEMENT:
272 appnodes = node.getElementsByTagName("appendix")
273 if appnodes:
274 appendices = 1
275 parent = appnodes[0].parentNode
276 parent.removeChild(appnodes[0])
277 parent.normalize()
278 if nodes:
279 map(docelem.removeChild, nodes)
280 docelem.appendChild(doc.createTextNode("\n\n\n"))
281 back = doc.createElement("back-matter")
282 docelem.appendChild(back)
283 back.appendChild(doc.createTextNode("\n"))
284 while nodes and nodes[0].nodeType == xml.dom.core.TEXT \
285 and not string.strip(nodes[0].data):
286 del nodes[0]
287 map(back.appendChild, nodes)
288 docelem.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000289
290
291def handle_labels(doc):
Fred Drake7dab6af1999-01-28 23:59:58 +0000292 for label in find_all_elements(doc, "label"):
293 id = label.getAttribute("id")
294 if not id:
295 continue
296 parent = label.parentNode
297 if parent.tagName == "title":
298 parent.parentNode.setAttribute("id", id)
299 else:
300 parent.setAttribute("id", id)
301 # now, remove <label id="..."/> from parent:
302 parent.removeChild(label)
Fred Drake03204731998-11-23 17:02:03 +0000303
304
Fred Drake1ff6db41998-11-23 23:10:35 +0000305def fixup_trailing_whitespace(doc, wsmap):
306 queue = [doc]
307 while queue:
308 node = queue[0]
309 del queue[0]
310 if node.nodeType == xml.dom.core.ELEMENT \
311 and wsmap.has_key(node.tagName):
312 ws = wsmap[node.tagName]
313 children = node.childNodes
314 children.reverse()
315 if children[0].nodeType == xml.dom.core.TEXT:
316 data = string.rstrip(children[0].data) + ws
317 children[0].data = data
318 children.reverse()
319 # hack to get the title in place:
320 if node.tagName == "title" \
321 and node.parentNode.firstChild.nodeType == xml.dom.core.ELEMENT:
322 node.parentNode.insertBefore(doc.createText("\n "),
323 node.parentNode.firstChild)
324 for child in node.childNodes:
325 if child.nodeType == xml.dom.core.ELEMENT:
326 queue.append(child)
327
328
329def normalize(doc):
330 for node in doc.childNodes:
331 if node.nodeType == xml.dom.core.ELEMENT:
332 node.normalize()
333
334
335def cleanup_trailing_parens(doc, element_names):
336 d = {}
337 for gi in element_names:
338 d[gi] = gi
339 rewrite_element = d.has_key
340 queue = []
341 for node in doc.childNodes:
342 if node.nodeType == xml.dom.core.ELEMENT:
343 queue.append(node)
344 while queue:
345 node = queue[0]
346 del queue[0]
347 if rewrite_element(node.tagName):
348 children = node.childNodes
349 if len(children) == 1 \
350 and children[0].nodeType == xml.dom.core.TEXT:
351 data = children[0].data
352 if data[-2:] == "()":
353 children[0].data = data[:-2]
354 else:
355 for child in node.childNodes:
356 if child.nodeType == xml.dom.core.ELEMENT:
357 queue.append(child)
358
359
Fred Drakeaaed9711998-12-10 20:25:30 +0000360def contents_match(left, right):
361 left_children = left.childNodes
362 right_children = right.childNodes
363 if len(left_children) != len(right_children):
364 return 0
365 for l, r in map(None, left_children, right_children):
366 nodeType = l.nodeType
367 if nodeType != r.nodeType:
368 return 0
369 if nodeType == xml.dom.core.ELEMENT:
370 if l.tagName != r.tagName:
371 return 0
372 # should check attributes, but that's not a problem here
373 if not contents_match(l, r):
374 return 0
375 elif nodeType == xml.dom.core.TEXT:
376 if l.data != r.data:
377 return 0
378 else:
379 # not quite right, but good enough
380 return 0
381 return 1
382
383
384def create_module_info(doc, section):
385 # Heavy.
386 node = extract_first_element(section, "modulesynopsis")
387 if node is None:
388 return
389 node._node.name = "synopsis"
390 lastchild = node.childNodes[-1]
391 if lastchild.nodeType == xml.dom.core.TEXT \
392 and lastchild.data[-1:] == ".":
393 lastchild.data = lastchild.data[:-1]
Fred Drake4259f0d1999-01-19 23:09:31 +0000394 modauthor = extract_first_element(section, "moduleauthor")
395 if modauthor:
396 modauthor._node.name = "author"
397 modauthor.appendChild(doc.createTextNode(
398 modauthor.getAttribute("name")))
399 modauthor.removeAttribute("name")
Fred Drake87a42cd1999-03-11 17:35:12 +0000400 platform = extract_first_element(section, "platform")
Fred Drakeaaed9711998-12-10 20:25:30 +0000401 if section.tagName == "section":
402 modinfo_pos = 2
403 modinfo = doc.createElement("moduleinfo")
404 moddecl = extract_first_element(section, "declaremodule")
405 name = None
406 if moddecl:
407 modinfo.appendChild(doc.createTextNode("\n "))
408 name = moddecl.attributes["name"].value
409 namenode = doc.createElement("name")
410 namenode.appendChild(doc.createTextNode(name))
411 modinfo.appendChild(namenode)
412 type = moddecl.attributes.get("type")
413 if type:
414 type = type.value
415 modinfo.appendChild(doc.createTextNode("\n "))
416 typenode = doc.createElement("type")
417 typenode.appendChild(doc.createTextNode(type))
418 modinfo.appendChild(typenode)
Fred Drake1dd152d1999-01-29 22:12:29 +0000419 versionadded = extract_first_element(section, "versionadded")
420 if versionadded:
421 modinfo.setAttribute("added", versionadded.getAttribute("version"))
Fred Drakeaaed9711998-12-10 20:25:30 +0000422 title = get_first_element(section, "title")
423 if title:
424 children = title.childNodes
425 if len(children) >= 2 \
426 and children[0].nodeType == xml.dom.core.ELEMENT \
427 and children[0].tagName == "module" \
428 and children[0].childNodes[0].data == name:
429 # this is it; morph the <title> into <short-synopsis>
430 first_data = children[1]
431 if first_data.data[:4] == " ---":
432 first_data.data = string.lstrip(first_data.data[4:])
433 title._node.name = "short-synopsis"
Fred Drake7dab6af1999-01-28 23:59:58 +0000434 if children[-1].nodeType == xml.dom.core.TEXT \
435 and children[-1].data[-1:] == ".":
Fred Drakeaaed9711998-12-10 20:25:30 +0000436 children[-1].data = children[-1].data[:-1]
437 section.removeChild(title)
438 section.removeChild(section.childNodes[0])
439 title.removeChild(children[0])
440 modinfo_pos = 0
441 else:
442 sys.stderr.write(
443 "module name in title doesn't match"
444 " <declaremodule>; no <short-synopsis>\n")
445 else:
446 sys.stderr.write(
447 "Unexpected condition: <section> without <title>\n")
448 modinfo.appendChild(doc.createTextNode("\n "))
449 modinfo.appendChild(node)
450 if title and not contents_match(title, node):
451 # The short synopsis is actually different,
452 # and needs to be stored:
453 modinfo.appendChild(doc.createTextNode("\n "))
454 modinfo.appendChild(title)
Fred Drake4259f0d1999-01-19 23:09:31 +0000455 if modauthor:
456 modinfo.appendChild(doc.createTextNode("\n "))
457 modinfo.appendChild(modauthor)
Fred Drake87a42cd1999-03-11 17:35:12 +0000458 if platform:
459 modinfo.appendChild(doc.createTextNode("\n "))
460 modinfo.appendChild(platform)
Fred Drakeaaed9711998-12-10 20:25:30 +0000461 modinfo.appendChild(doc.createTextNode("\n "))
462 section.insertBefore(modinfo, section.childNodes[modinfo_pos])
463 section.insertBefore(doc.createTextNode("\n "), modinfo)
Fred Drake87a42cd1999-03-11 17:35:12 +0000464 #
465 # The rest of this removes extra newlines from where we cut out
466 # a lot of elements. A lot of code for minimal value, but keeps
467 # keeps the generated SGML from being too funny looking.
468 #
469 section.normalize()
470 children = section.childNodes
471 for i in range(len(children)):
472 node = children[i]
473 if node.nodeType == xml.dom.core.ELEMENT \
474 and node.tagName == "moduleinfo":
475 nextnode = children[i+1]
476 if nextnode.nodeType == xml.dom.core.TEXT:
477 data = nextnode.data
478 if len(string.lstrip(data)) < (len(data) - 4):
479 nextnode.data = "\n\n\n" + string.lstrip(data)
Fred Drakeaaed9711998-12-10 20:25:30 +0000480
481
Fred Drakefba0ba21998-12-10 05:07:09 +0000482def cleanup_synopses(doc):
Fred Drake7dab6af1999-01-28 23:59:58 +0000483 for node in find_all_elements(doc, "section"):
484 create_module_info(doc, node)
Fred Drakeaaed9711998-12-10 20:25:30 +0000485
486
Fred Drakef8ebb551999-01-14 19:45:38 +0000487def remap_element_names(root, name_map):
488 queue = []
489 for child in root.childNodes:
490 if child.nodeType == xml.dom.core.ELEMENT:
491 queue.append(child)
492 while queue:
493 node = queue.pop()
494 tagName = node.tagName
495 if name_map.has_key(tagName):
496 name, attrs = name_map[tagName]
497 node._node.name = name
498 for attr, value in attrs.items():
499 node.setAttribute(attr, value)
500 for child in node.childNodes:
501 if child.nodeType == xml.dom.core.ELEMENT:
502 queue.append(child)
503
504
505def fixup_table_structures(doc):
506 # must be done after remap_element_names(), or the tables won't be found
Fred Drake7dab6af1999-01-28 23:59:58 +0000507 for table in find_all_elements(doc, "table"):
508 fixup_table(doc, table)
509
Fred Drakef8ebb551999-01-14 19:45:38 +0000510
511def fixup_table(doc, table):
512 # create the table head
513 thead = doc.createElement("thead")
514 row = doc.createElement("row")
515 move_elements_by_name(doc, table, row, "entry")
516 thead.appendChild(doc.createTextNode("\n "))
517 thead.appendChild(row)
518 thead.appendChild(doc.createTextNode("\n "))
519 # create the table body
520 tbody = doc.createElement("tbody")
521 prev_row = None
522 last_was_hline = 0
523 children = table.childNodes
524 for child in children:
525 if child.nodeType == xml.dom.core.ELEMENT:
526 tagName = child.tagName
527 if tagName == "hline" and prev_row is not None:
528 prev_row.setAttribute("rowsep", "1")
529 elif tagName == "row":
530 prev_row = child
531 # save the rows:
532 tbody.appendChild(doc.createTextNode("\n "))
533 move_elements_by_name(doc, table, tbody, "row", sep="\n ")
534 # and toss the rest:
535 while children:
536 child = children[0]
537 nodeType = child.nodeType
538 if nodeType == xml.dom.core.TEXT:
539 if string.strip(child.data):
540 raise ConversionError("unexpected free data in table")
541 table.removeChild(child)
542 continue
543 if nodeType == xml.dom.core.ELEMENT:
544 if child.tagName != "hline":
545 raise ConversionError(
546 "unexpected <%s> in table" % child.tagName)
547 table.removeChild(child)
548 continue
549 raise ConversionError(
550 "unexpected %s node in table" % child.__class__.__name__)
551 # nothing left in the <table>; add the <thead> and <tbody>
552 tgroup = doc.createElement("tgroup")
553 tgroup.appendChild(doc.createTextNode("\n "))
554 tgroup.appendChild(thead)
555 tgroup.appendChild(doc.createTextNode("\n "))
556 tgroup.appendChild(tbody)
557 tgroup.appendChild(doc.createTextNode("\n "))
558 table.appendChild(tgroup)
559 # now make the <entry>s look nice:
560 for row in table.getElementsByTagName("row"):
561 fixup_row(doc, row)
562
563
564def fixup_row(doc, row):
565 entries = []
566 map(entries.append, row.childNodes[1:])
567 for entry in entries:
568 row.insertBefore(doc.createTextNode("\n "), entry)
569# row.appendChild(doc.createTextNode("\n "))
570
571
572def move_elements_by_name(doc, source, dest, name, sep=None):
573 nodes = []
574 for child in source.childNodes:
575 if child.nodeType == xml.dom.core.ELEMENT and child.tagName == name:
576 nodes.append(child)
577 for node in nodes:
578 source.removeChild(node)
579 dest.appendChild(node)
580 if sep:
581 dest.appendChild(doc.createTextNode(sep))
582
583
Fred Drake7dab6af1999-01-28 23:59:58 +0000584RECURSE_INTO_PARA_CONTAINERS = (
Fred Drakecb657811999-01-29 20:55:07 +0000585 "chapter", "abstract", "enumerate",
Fred Drake7dab6af1999-01-28 23:59:58 +0000586 "section", "subsection", "subsubsection",
587 "paragraph", "subparagraph",
Fred Drakecb657811999-01-29 20:55:07 +0000588 "howto", "manual",
Fred Drake4259f0d1999-01-19 23:09:31 +0000589 )
Fred Drakefcc59101999-01-06 22:50:52 +0000590
591PARA_LEVEL_ELEMENTS = (
Fred Drakecb657811999-01-29 20:55:07 +0000592 "moduleinfo", "title", "verbatim", "enumerate", "item",
Fred Drake93d762f1999-02-18 16:32:21 +0000593 "interpreter-session",
Fred Drakecb657811999-01-29 20:55:07 +0000594 "opcodedesc", "classdesc", "datadesc",
Fred Drake7dab6af1999-01-28 23:59:58 +0000595 "funcdesc", "methoddesc", "excdesc",
596 "funcdescni", "methoddescni", "excdescni",
Fred Drakefcc59101999-01-06 22:50:52 +0000597 "tableii", "tableiii", "tableiv", "localmoduletable",
Fred Drake7dab6af1999-01-28 23:59:58 +0000598 "sectionauthor", "seealso",
Fred Drakefcc59101999-01-06 22:50:52 +0000599 # include <para>, so we can just do it again to get subsequent paras:
600 "para",
601 )
602
603PARA_LEVEL_PRECEEDERS = (
Fred Drakecb657811999-01-29 20:55:07 +0000604 "index", "indexii", "indexiii", "indexiv", "setindexsubitem",
605 "stindex", "obindex", "COMMENT", "label", "input", "title",
Fred Drakefcc59101999-01-06 22:50:52 +0000606 )
607
Fred Drake7dab6af1999-01-28 23:59:58 +0000608
Fred Drakeaaed9711998-12-10 20:25:30 +0000609def fixup_paras(doc):
Fred Drakefcc59101999-01-06 22:50:52 +0000610 for child in doc.childNodes:
611 if child.nodeType == xml.dom.core.ELEMENT \
Fred Drake7dab6af1999-01-28 23:59:58 +0000612 and child.tagName in RECURSE_INTO_PARA_CONTAINERS:
613 #
Fred Drakefcc59101999-01-06 22:50:52 +0000614 fixup_paras_helper(doc, child)
Fred Drakecb657811999-01-29 20:55:07 +0000615 descriptions = find_all_elements(doc, "description")
616 for description in descriptions:
617 fixup_paras_helper(doc, description)
Fred Drakefcc59101999-01-06 22:50:52 +0000618
619
Fred Drake7dab6af1999-01-28 23:59:58 +0000620def fixup_paras_helper(doc, container, depth=0):
Fred Drakefcc59101999-01-06 22:50:52 +0000621 # document is already normalized
622 children = container.childNodes
623 start = 0
Fred Drake7dab6af1999-01-28 23:59:58 +0000624 while len(children) > start:
625 start = skip_leading_nodes(children, start)
626 if start >= len(children):
627 break
628 #
629 # Either paragraph material or something to recurse into:
630 #
631 if (children[start].nodeType == xml.dom.core.ELEMENT) \
632 and (children[start].tagName in RECURSE_INTO_PARA_CONTAINERS):
633 fixup_paras_helper(doc, children[start])
634 start = skip_leading_nodes(children, start + 1)
635 continue
636 #
637 # paragraph material:
638 #
639 build_para(doc, container, start, len(children))
640 if DEBUG_PARA_FIXER and depth == 10:
641 sys.exit(1)
642 start = start + 1
Fred Drakefcc59101999-01-06 22:50:52 +0000643
644
645def build_para(doc, parent, start, i):
646 children = parent.childNodes
Fred Drakefcc59101999-01-06 22:50:52 +0000647 after = start + 1
648 have_last = 0
Fred Drakecb657811999-01-29 20:55:07 +0000649 BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + RECURSE_INTO_PARA_CONTAINERS
Fred Drake7dab6af1999-01-28 23:59:58 +0000650 # Collect all children until \n\n+ is found in a text node or a
651 # member of BREAK_ELEMENTS is found.
Fred Drakefcc59101999-01-06 22:50:52 +0000652 for j in range(start, i):
653 after = j + 1
654 child = children[j]
655 nodeType = child.nodeType
656 if nodeType == xml.dom.core.ELEMENT:
657 if child.tagName in BREAK_ELEMENTS:
658 after = j
659 break
660 elif nodeType == xml.dom.core.TEXT:
661 pos = string.find(child.data, "\n\n")
662 if pos == 0:
663 after = j
664 break
665 if pos >= 1:
666 child.splitText(pos)
667 break
668 else:
669 have_last = 1
Fred Drake7dab6af1999-01-28 23:59:58 +0000670 if (start + 1) > after:
671 raise ConversionError(
672 "build_para() could not identify content to turn into a paragraph")
Fred Drakefcc59101999-01-06 22:50:52 +0000673 if children[after - 1].nodeType == xml.dom.core.TEXT:
674 # we may need to split off trailing white space:
675 child = children[after - 1]
676 data = child.data
677 if string.rstrip(data) != data:
678 have_last = 0
679 child.splitText(len(string.rstrip(data)))
Fred Drakefcc59101999-01-06 22:50:52 +0000680 para = doc.createElement("para")
681 prev = None
682 indexes = range(start, after)
683 indexes.reverse()
684 for j in indexes:
Fred Drake7dab6af1999-01-28 23:59:58 +0000685 node = parent.childNodes[j]
Fred Drakefcc59101999-01-06 22:50:52 +0000686 parent.removeChild(node)
687 para.insertBefore(node, prev)
688 prev = node
689 if have_last:
690 parent.appendChild(para)
Fred Drake7dab6af1999-01-28 23:59:58 +0000691 return len(parent.childNodes)
Fred Drakefcc59101999-01-06 22:50:52 +0000692 else:
693 parent.insertBefore(para, parent.childNodes[start])
Fred Drake7dab6af1999-01-28 23:59:58 +0000694 return start + 1
Fred Drakefcc59101999-01-06 22:50:52 +0000695
696
Fred Drake7dab6af1999-01-28 23:59:58 +0000697def skip_leading_nodes(children, start):
698 """Return index into children of a node at which paragraph building should
699 begin or a recursive call to fixup_paras_helper() should be made (for
700 subsections, etc.).
701
702 When the return value >= len(children), we've built all the paras we can
703 from this list of children.
704 """
705 i = len(children)
Fred Drakefcc59101999-01-06 22:50:52 +0000706 while i > start:
707 # skip over leading comments and whitespace:
Fred Drake7dab6af1999-01-28 23:59:58 +0000708 child = children[start]
Fred Drakefcc59101999-01-06 22:50:52 +0000709 nodeType = child.nodeType
Fred Drake7dab6af1999-01-28 23:59:58 +0000710 if nodeType == xml.dom.core.TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000711 data = child.data
712 shortened = string.lstrip(data)
713 if shortened:
714 if data != shortened:
715 # break into two nodes: whitespace and non-whitespace
716 child.splitText(len(data) - len(shortened))
Fred Drake7dab6af1999-01-28 23:59:58 +0000717 return start + 1
718 return start
Fred Drakefcc59101999-01-06 22:50:52 +0000719 # all whitespace, just skip
Fred Drakefcc59101999-01-06 22:50:52 +0000720 elif nodeType == xml.dom.core.ELEMENT:
Fred Drake7dab6af1999-01-28 23:59:58 +0000721 tagName = child.tagName
722 if tagName in RECURSE_INTO_PARA_CONTAINERS:
723 return start
724 if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
725 return start
726 start = start + 1
727 return start
Fred Drakefba0ba21998-12-10 05:07:09 +0000728
729
Fred Draked24167b1999-01-14 21:18:03 +0000730def fixup_rfc_references(doc):
Fred Drake7dab6af1999-01-28 23:59:58 +0000731 for rfcnode in find_all_elements(doc, "rfc"):
732 rfcnode.appendChild(doc.createTextNode(
733 "RFC " + rfcnode.getAttribute("num")))
Fred Draked24167b1999-01-14 21:18:03 +0000734
735
736def fixup_signatures(doc):
737 for child in doc.childNodes:
738 if child.nodeType == xml.dom.core.ELEMENT:
739 args = child.getElementsByTagName("args")
740 for arg in args:
741 fixup_args(doc, arg)
Fred Drake7dab6af1999-01-28 23:59:58 +0000742 arg.normalize()
Fred Draked24167b1999-01-14 21:18:03 +0000743 args = child.getElementsByTagName("constructor-args")
744 for arg in args:
745 fixup_args(doc, arg)
746 arg.normalize()
747
748
749def fixup_args(doc, arglist):
750 for child in arglist.childNodes:
751 if child.nodeType == xml.dom.core.ELEMENT \
752 and child.tagName == "optional":
753 # found it; fix and return
754 arglist.insertBefore(doc.createTextNode("["), child)
755 optkids = child.childNodes
756 while optkids:
757 k = optkids[0]
758 child.removeChild(k)
759 arglist.insertBefore(k, child)
760 arglist.insertBefore(doc.createTextNode("]"), child)
761 arglist.removeChild(child)
762 return fixup_args(doc, arglist)
763
764
Fred Drake7dab6af1999-01-28 23:59:58 +0000765def fixup_sectionauthors(doc):
766 for sectauth in find_all_elements(doc, "sectionauthor"):
767 section = sectauth.parentNode
768 section.removeChild(sectauth)
769 sectauth._node.name = "author"
770 sectauth.appendChild(doc.createTextNode(
771 sectauth.getAttribute("name")))
772 sectauth.removeAttribute("name")
773 after = section.childNodes[2]
774 title = section.childNodes[1]
775 if title.nodeType == xml.dom.core.ELEMENT and title.tagName != "title":
776 after = section.childNodes[0]
777 section.insertBefore(doc.createTextNode("\n "), after)
778 section.insertBefore(sectauth, after)
779
780
Fred Drake93d762f1999-02-18 16:32:21 +0000781def fixup_verbatims(doc):
782 for verbatim in find_all_elements(doc, "verbatim"):
783 child = verbatim.childNodes[0]
784 if child.nodeType == xml.dom.core.TEXT \
785 and string.lstrip(child.data)[:3] == ">>>":
786 verbatim._node.name = "interpreter-session"
787 #verbatim.setAttribute("interactive", "interactive")
788
789
Fred Drake4db5b461998-12-01 19:03:01 +0000790_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
Fred Drakefcc59101999-01-06 22:50:52 +0000791
Fred Drake4db5b461998-12-01 19:03:01 +0000792def write_esis(doc, ofp, knownempty):
793 for node in doc.childNodes:
794 nodeType = node.nodeType
795 if nodeType == xml.dom.core.ELEMENT:
796 gi = node.tagName
797 if knownempty(gi):
798 if node.hasChildNodes():
799 raise ValueError, "declared-empty node has children"
800 ofp.write("e\n")
801 for k, v in node.attributes.items():
802 value = v.value
803 if _token_rx.match(value):
804 dtype = "TOKEN"
805 else:
806 dtype = "CDATA"
807 ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
808 ofp.write("(%s\n" % gi)
809 write_esis(node, ofp, knownempty)
810 ofp.write(")%s\n" % gi)
811 elif nodeType == xml.dom.core.TEXT:
812 ofp.write("-%s\n" % esistools.encode(node.data))
813 else:
814 raise RuntimeError, "unsupported node type: %s" % nodeType
815
816
Fred Drake03204731998-11-23 17:02:03 +0000817def convert(ifp, ofp):
Fred Drake4db5b461998-12-01 19:03:01 +0000818 p = esistools.ExtendedEsisBuilder()
Fred Drake03204731998-11-23 17:02:03 +0000819 p.feed(ifp.read())
820 doc = p.document
Fred Drake1ff6db41998-11-23 23:10:35 +0000821 normalize(doc)
Fred Drake03204731998-11-23 17:02:03 +0000822 simplify(doc)
823 handle_labels(doc)
Fred Drake4db5b461998-12-01 19:03:01 +0000824 handle_appendix(doc)
Fred Drake1ff6db41998-11-23 23:10:35 +0000825 fixup_trailing_whitespace(doc, {
826 "abstract": "\n",
827 "title": "",
828 "chapter": "\n\n",
829 "section": "\n\n",
830 "subsection": "\n\n",
831 "subsubsection": "\n\n",
832 "paragraph": "\n\n",
833 "subparagraph": "\n\n",
834 })
Fred Drake03204731998-11-23 17:02:03 +0000835 cleanup_root_text(doc)
Fred Drake1ff6db41998-11-23 23:10:35 +0000836 cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
Fred Drakefba0ba21998-12-10 05:07:09 +0000837 cleanup_synopses(doc)
Fred Drakecb657811999-01-29 20:55:07 +0000838 fixup_descriptors(doc)
Fred Drake93d762f1999-02-18 16:32:21 +0000839 fixup_verbatims(doc)
Fred Drakeaaed9711998-12-10 20:25:30 +0000840 normalize(doc)
841 fixup_paras(doc)
Fred Drake7dab6af1999-01-28 23:59:58 +0000842 fixup_sectionauthors(doc)
Fred Drakef8ebb551999-01-14 19:45:38 +0000843 remap_element_names(doc, {
844 "tableii": ("table", {"cols": "2"}),
845 "tableiii": ("table", {"cols": "3"}),
846 "tableiv": ("table", {"cols": "4"}),
847 "lineii": ("row", {}),
848 "lineiii": ("row", {}),
849 "lineiv": ("row", {}),
Fred Draked6ced7d1999-01-19 17:11:23 +0000850 "refmodule": ("module", {"link": "link"}),
Fred Drakef8ebb551999-01-14 19:45:38 +0000851 })
852 fixup_table_structures(doc)
Fred Draked24167b1999-01-14 21:18:03 +0000853 fixup_rfc_references(doc)
854 fixup_signatures(doc)
Fred Drake4db5b461998-12-01 19:03:01 +0000855 #
856 d = {}
857 for gi in p.get_empties():
858 d[gi] = gi
Fred Draked24167b1999-01-14 21:18:03 +0000859 if d.has_key("rfc"):
860 del d["rfc"]
Fred Drake4db5b461998-12-01 19:03:01 +0000861 knownempty = d.has_key
862 #
Fred Drake03204731998-11-23 17:02:03 +0000863 try:
Fred Drake4db5b461998-12-01 19:03:01 +0000864 write_esis(doc, ofp, knownempty)
Fred Drake03204731998-11-23 17:02:03 +0000865 except IOError, (err, msg):
866 # Ignore EPIPE; it just means that whoever we're writing to stopped
867 # reading. The rest of the output would be ignored. All other errors
868 # should still be reported,
869 if err != errno.EPIPE:
870 raise
871
872
873def main():
874 if len(sys.argv) == 1:
875 ifp = sys.stdin
876 ofp = sys.stdout
877 elif len(sys.argv) == 2:
878 ifp = open(sys.argv[1])
879 ofp = sys.stdout
880 elif len(sys.argv) == 3:
881 ifp = open(sys.argv[1])
882 ofp = open(sys.argv[2], "w")
883 else:
884 usage()
885 sys.exit(2)
886 convert(ifp, ofp)
887
888
889if __name__ == "__main__":
890 main()