blob: f700134a1bd3a46ebd01c2777a13b92519260f4f [file] [log] [blame]
Fred Drake03204731998-11-23 17:02:03 +00001#! /usr/bin/env python
2
Fred Drake7dab6af1999-01-28 23:59:58 +00003"""Perform massive transformations on a document tree created from the LaTeX
4of the Python documentation, and dump the ESIS data for the transformed tree.
Fred Drake03204731998-11-23 17:02:03 +00005"""
6__version__ = '$Revision$'
7
8
9import errno
Fred Drake4db5b461998-12-01 19:03:01 +000010import esistools
11import re
Fred Drake03204731998-11-23 17:02:03 +000012import string
13import sys
14import xml.dom.core
Fred Drakee779d4f1999-05-10 19:36:52 +000015
16from xml.dom.core import \
17 ELEMENT, \
18 TEXT
Fred Drake03204731998-11-23 17:02:03 +000019
20
Fred Drakef8ebb551999-01-14 19:45:38 +000021class ConversionError(Exception):
22 pass
23
24
Fred Drakefcc59101999-01-06 22:50:52 +000025DEBUG_PARA_FIXER = 0
26
Fred Drake7dab6af1999-01-28 23:59:58 +000027if DEBUG_PARA_FIXER:
28 def para_msg(s):
29 sys.stderr.write("*** %s\n" % s)
30else:
31 def para_msg(s):
32 pass
33
Fred Drakefcc59101999-01-06 22:50:52 +000034
Fred Drake03204731998-11-23 17:02:03 +000035# Workaround to deal with invalid documents (multiple root elements). This
36# does not indicate a bug in the DOM implementation.
37#
Fred Drakee779d4f1999-05-10 19:36:52 +000038def get_documentElement(doc):
Fred Drake03204731998-11-23 17:02:03 +000039 docelem = None
Fred Drakee779d4f1999-05-10 19:36:52 +000040 for n in doc.childNodes:
41 if n.nodeType == ELEMENT:
42 docelem = n
Fred Drake03204731998-11-23 17:02:03 +000043 return docelem
44
45xml.dom.core.Document.get_documentElement = get_documentElement
46
47
48# Replace get_childNodes for the Document class; without this, children
49# accessed from the Document object via .childNodes (no matter how many
50# levels of access are used) will be given an ownerDocument of None.
51#
Fred Drakee779d4f1999-05-10 19:36:52 +000052def get_childNodes(doc):
53 return xml.dom.core.NodeList(doc._node.children, doc._node)
Fred Drake03204731998-11-23 17:02:03 +000054
55xml.dom.core.Document.get_childNodes = get_childNodes
56
57
58def get_first_element(doc, gi):
59 for n in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +000060 if n.nodeType == ELEMENT and n.tagName == gi:
Fred Drake03204731998-11-23 17:02:03 +000061 return n
62
63def extract_first_element(doc, gi):
64 node = get_first_element(doc, gi)
65 if node is not None:
66 doc.removeChild(node)
67 return node
68
69
Fred Drake7dab6af1999-01-28 23:59:58 +000070def find_all_elements(doc, gi):
71 nodes = []
Fred Drakee779d4f1999-05-10 19:36:52 +000072 if doc.nodeType == ELEMENT and doc.tagName == gi:
Fred Drake7dab6af1999-01-28 23:59:58 +000073 nodes.append(doc)
74 for child in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +000075 if child.nodeType == ELEMENT:
Fred Drake7dab6af1999-01-28 23:59:58 +000076 if child.tagName == gi:
77 nodes.append(child)
78 for node in child.getElementsByTagName(gi):
79 nodes.append(node)
80 return nodes
81
82
Fred Drakee779d4f1999-05-10 19:36:52 +000083def simplify(doc, fragment):
Fred Drake03204731998-11-23 17:02:03 +000084 # Try to rationalize the document a bit, since these things are simply
85 # not valid SGML/XML documents as they stand, and need a little work.
86 documentclass = "document"
87 inputs = []
Fred Drakee779d4f1999-05-10 19:36:52 +000088 node = extract_first_element(fragment, "documentclass")
Fred Drake03204731998-11-23 17:02:03 +000089 if node is not None:
90 documentclass = node.getAttribute("classname")
Fred Drakee779d4f1999-05-10 19:36:52 +000091 node = extract_first_element(fragment, "title")
Fred Drake03204731998-11-23 17:02:03 +000092 if node is not None:
93 inputs.append(node)
94 # update the name of the root element
Fred Drakee779d4f1999-05-10 19:36:52 +000095 node = get_first_element(fragment, "document")
Fred Drake03204731998-11-23 17:02:03 +000096 if node is not None:
97 node._node.name = documentclass
98 while 1:
Fred Drakee779d4f1999-05-10 19:36:52 +000099 node = extract_first_element(fragment, "input")
Fred Drake03204731998-11-23 17:02:03 +0000100 if node is None:
101 break
102 inputs.append(node)
103 if inputs:
Fred Drakee779d4f1999-05-10 19:36:52 +0000104 docelem = get_documentElement(fragment)
Fred Drake03204731998-11-23 17:02:03 +0000105 inputs.reverse()
106 for node in inputs:
107 text = doc.createTextNode("\n")
108 docelem.insertBefore(text, docelem.firstChild)
109 docelem.insertBefore(node, text)
110 docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
Fred Drakee779d4f1999-05-10 19:36:52 +0000111 while fragment.firstChild.nodeType == TEXT:
112 fragment.removeChild(fragment.firstChild)
Fred Drake03204731998-11-23 17:02:03 +0000113
114
115def cleanup_root_text(doc):
116 discards = []
117 skip = 0
118 for n in doc.childNodes:
119 prevskip = skip
120 skip = 0
Fred Drakee779d4f1999-05-10 19:36:52 +0000121 if n.nodeType == TEXT and not prevskip:
Fred Drake03204731998-11-23 17:02:03 +0000122 discards.append(n)
Fred Drakee779d4f1999-05-10 19:36:52 +0000123 elif n.nodeType == ELEMENT and n.tagName == "COMMENT":
Fred Drake03204731998-11-23 17:02:03 +0000124 skip = 1
125 for node in discards:
126 doc.removeChild(node)
127
128
Fred Drakecb657811999-01-29 20:55:07 +0000129DESCRIPTOR_ELEMENTS = (
130 "cfuncdesc", "cvardesc", "ctypedesc",
131 "classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni",
132 "excdesc", "funcdesc", "funcdescni", "opcodedesc",
133 "datadesc", "datadescni",
134 )
135
Fred Drakee779d4f1999-05-10 19:36:52 +0000136def fixup_descriptors(doc, fragment):
137 sections = find_all_elements(fragment, "section")
Fred Drake3a7ff991999-01-29 21:31:12 +0000138 for section in sections:
139 find_and_fix_descriptors(doc, section)
140
141
142def find_and_fix_descriptors(doc, container):
143 children = container.childNodes
144 for child in children:
Fred Drakee779d4f1999-05-10 19:36:52 +0000145 if child.nodeType == ELEMENT:
Fred Drake3a7ff991999-01-29 21:31:12 +0000146 tagName = child.tagName
147 if tagName in DESCRIPTOR_ELEMENTS:
148 rewrite_descriptor(doc, child)
149 elif tagName == "subsection":
150 find_and_fix_descriptors(doc, child)
151
Fred Drakecb657811999-01-29 20:55:07 +0000152
153def rewrite_descriptor(doc, descriptor):
154 #
155 # Do these things:
156 # 1. Add an "index=noindex" attribute to the element if the tagName
157 # ends in 'ni', removing the 'ni' from the name.
158 # 2. Create a <signature> from the name attribute and <args>.
159 # 3. Create additional <signature>s from <*line{,ni}> elements,
160 # if found.
Fred Drake1dd152d1999-01-29 22:12:29 +0000161 # 4. If a <versionadded> is found, move it to an attribute on the
162 # descriptor.
163 # 5. Move remaining child nodes to a <description> element.
164 # 6. Put it back together.
Fred Drakecb657811999-01-29 20:55:07 +0000165 #
166 descname = descriptor.tagName
167 index = 1
168 if descname[-2:] == "ni":
169 descname = descname[:-2]
170 descriptor.setAttribute("index", "noindex")
171 descriptor._node.name = descname
172 index = 0
173 desctype = descname[:-4] # remove 'desc'
174 linename = desctype + "line"
175 if not index:
176 linename = linename + "ni"
177 # 2.
178 signature = doc.createElement("signature")
179 name = doc.createElement("name")
180 signature.appendChild(doc.createTextNode("\n "))
181 signature.appendChild(name)
182 name.appendChild(doc.createTextNode(descriptor.getAttribute("name")))
183 descriptor.removeAttribute("name")
184 if descriptor.attributes.has_key("var"):
185 variable = descriptor.getAttribute("var")
186 if variable:
187 args = doc.createElement("args")
188 args.appendChild(doc.createTextNode(variable))
Fred Drake7dab6af1999-01-28 23:59:58 +0000189 signature.appendChild(doc.createTextNode("\n "))
Fred Drakecb657811999-01-29 20:55:07 +0000190 signature.appendChild(args)
191 descriptor.removeAttribute("var")
192 newchildren = [signature]
193 children = descriptor.childNodes
194 pos = skip_leading_nodes(children, 0)
195 if pos < len(children):
196 child = children[pos]
Fred Drakee779d4f1999-05-10 19:36:52 +0000197 if child.nodeType == ELEMENT and child.tagName == "args":
Fred Drakecb657811999-01-29 20:55:07 +0000198 # create an <args> in <signature>:
199 args = doc.createElement("args")
200 argchildren = []
201 map(argchildren.append, child.childNodes)
202 for n in argchildren:
203 child.removeChild(n)
204 args.appendChild(n)
205 signature.appendChild(doc.createTextNode("\n "))
206 signature.appendChild(args)
207 signature.appendChild(doc.createTextNode("\n "))
Fred Drake1dd152d1999-01-29 22:12:29 +0000208 # 3, 4.
Fred Drakecb657811999-01-29 20:55:07 +0000209 pos = skip_leading_nodes(children, pos + 1)
210 while pos < len(children) \
Fred Drakee779d4f1999-05-10 19:36:52 +0000211 and children[pos].nodeType == ELEMENT \
Fred Drake1dd152d1999-01-29 22:12:29 +0000212 and children[pos].tagName in (linename, "versionadded"):
213 if children[pos].tagName == linename:
214 # this is really a supplemental signature, create <signature>
215 sig = methodline_to_signature(doc, children[pos])
216 newchildren.append(sig)
217 else:
218 # <versionadded added=...>
219 descriptor.setAttribute(
220 "added", children[pos].getAttribute("version"))
Fred Drakecb657811999-01-29 20:55:07 +0000221 pos = skip_leading_nodes(children, pos + 1)
Fred Drake1dd152d1999-01-29 22:12:29 +0000222 # 5.
Fred Drakecb657811999-01-29 20:55:07 +0000223 description = doc.createElement("description")
224 description.appendChild(doc.createTextNode("\n"))
225 newchildren.append(description)
226 move_children(descriptor, description, pos)
227 last = description.childNodes[-1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000228 if last.nodeType == TEXT:
Fred Drakecb657811999-01-29 20:55:07 +0000229 last.data = string.rstrip(last.data) + "\n "
Fred Drake1dd152d1999-01-29 22:12:29 +0000230 # 6.
Fred Drakecb657811999-01-29 20:55:07 +0000231 # should have nothing but whitespace and signature lines in <descriptor>;
232 # discard them
233 while descriptor.childNodes:
234 descriptor.removeChild(descriptor.childNodes[0])
235 for node in newchildren:
236 descriptor.appendChild(doc.createTextNode("\n "))
237 descriptor.appendChild(node)
238 descriptor.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000239
Fred Drake7dab6af1999-01-28 23:59:58 +0000240
241def methodline_to_signature(doc, methodline):
242 signature = doc.createElement("signature")
243 signature.appendChild(doc.createTextNode("\n "))
244 name = doc.createElement("name")
245 name.appendChild(doc.createTextNode(methodline.getAttribute("name")))
Fred Drakecb657811999-01-29 20:55:07 +0000246 methodline.removeAttribute("name")
Fred Drake7dab6af1999-01-28 23:59:58 +0000247 signature.appendChild(name)
Fred Drake7dab6af1999-01-28 23:59:58 +0000248 if len(methodline.childNodes):
Fred Drakecb657811999-01-29 20:55:07 +0000249 args = doc.createElement("args")
Fred Drake7dab6af1999-01-28 23:59:58 +0000250 signature.appendChild(doc.createTextNode("\n "))
Fred Drakecb657811999-01-29 20:55:07 +0000251 signature.appendChild(args)
252 move_children(methodline, args)
Fred Drake7dab6af1999-01-28 23:59:58 +0000253 signature.appendChild(doc.createTextNode("\n "))
254 return signature
Fred Drake03204731998-11-23 17:02:03 +0000255
256
Fred Drakecb657811999-01-29 20:55:07 +0000257def move_children(origin, dest, start=0):
258 children = origin.childNodes
259 while start < len(children):
260 node = children[start]
261 origin.removeChild(node)
262 dest.appendChild(node)
263
264
Fred Drakee779d4f1999-05-10 19:36:52 +0000265def handle_appendix(doc, fragment):
Fred Drake4db5b461998-12-01 19:03:01 +0000266 # must be called after simplfy() if document is multi-rooted to begin with
Fred Drakee779d4f1999-05-10 19:36:52 +0000267 docelem = get_documentElement(fragment)
Fred Drake4db5b461998-12-01 19:03:01 +0000268 toplevel = docelem.tagName == "manual" and "chapter" or "section"
269 appendices = 0
270 nodes = []
271 for node in docelem.childNodes:
272 if appendices:
273 nodes.append(node)
Fred Drakee779d4f1999-05-10 19:36:52 +0000274 elif node.nodeType == ELEMENT:
Fred Drake4db5b461998-12-01 19:03:01 +0000275 appnodes = node.getElementsByTagName("appendix")
276 if appnodes:
277 appendices = 1
278 parent = appnodes[0].parentNode
279 parent.removeChild(appnodes[0])
280 parent.normalize()
281 if nodes:
282 map(docelem.removeChild, nodes)
283 docelem.appendChild(doc.createTextNode("\n\n\n"))
284 back = doc.createElement("back-matter")
285 docelem.appendChild(back)
286 back.appendChild(doc.createTextNode("\n"))
Fred Drakee779d4f1999-05-10 19:36:52 +0000287 while nodes and nodes[0].nodeType == TEXT \
Fred Drake4db5b461998-12-01 19:03:01 +0000288 and not string.strip(nodes[0].data):
289 del nodes[0]
290 map(back.appendChild, nodes)
291 docelem.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000292
293
294def handle_labels(doc):
Fred Drake7dab6af1999-01-28 23:59:58 +0000295 for label in find_all_elements(doc, "label"):
296 id = label.getAttribute("id")
297 if not id:
298 continue
299 parent = label.parentNode
300 if parent.tagName == "title":
301 parent.parentNode.setAttribute("id", id)
302 else:
303 parent.setAttribute("id", id)
304 # now, remove <label id="..."/> from parent:
305 parent.removeChild(label)
Fred Drake03204731998-11-23 17:02:03 +0000306
307
Fred Drake1ff6db41998-11-23 23:10:35 +0000308def fixup_trailing_whitespace(doc, wsmap):
309 queue = [doc]
310 while queue:
311 node = queue[0]
312 del queue[0]
Fred Drakee779d4f1999-05-10 19:36:52 +0000313 if node.nodeType == ELEMENT \
Fred Drake1ff6db41998-11-23 23:10:35 +0000314 and wsmap.has_key(node.tagName):
315 ws = wsmap[node.tagName]
316 children = node.childNodes
317 children.reverse()
Fred Drakee779d4f1999-05-10 19:36:52 +0000318 if children[0].nodeType == TEXT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000319 data = string.rstrip(children[0].data) + ws
320 children[0].data = data
321 children.reverse()
322 # hack to get the title in place:
323 if node.tagName == "title" \
Fred Drakee779d4f1999-05-10 19:36:52 +0000324 and node.parentNode.firstChild.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000325 node.parentNode.insertBefore(doc.createText("\n "),
326 node.parentNode.firstChild)
327 for child in node.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000328 if child.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000329 queue.append(child)
330
331
332def normalize(doc):
333 for node in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000334 if node.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000335 node.normalize()
336
337
338def cleanup_trailing_parens(doc, element_names):
339 d = {}
340 for gi in element_names:
341 d[gi] = gi
342 rewrite_element = d.has_key
343 queue = []
344 for node in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000345 if node.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000346 queue.append(node)
347 while queue:
348 node = queue[0]
349 del queue[0]
350 if rewrite_element(node.tagName):
351 children = node.childNodes
352 if len(children) == 1 \
Fred Drakee779d4f1999-05-10 19:36:52 +0000353 and children[0].nodeType == TEXT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000354 data = children[0].data
355 if data[-2:] == "()":
356 children[0].data = data[:-2]
357 else:
358 for child in node.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000359 if child.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000360 queue.append(child)
361
362
Fred Drakeaaed9711998-12-10 20:25:30 +0000363def contents_match(left, right):
364 left_children = left.childNodes
365 right_children = right.childNodes
366 if len(left_children) != len(right_children):
367 return 0
368 for l, r in map(None, left_children, right_children):
369 nodeType = l.nodeType
370 if nodeType != r.nodeType:
371 return 0
Fred Drakee779d4f1999-05-10 19:36:52 +0000372 if nodeType == ELEMENT:
Fred Drakeaaed9711998-12-10 20:25:30 +0000373 if l.tagName != r.tagName:
374 return 0
375 # should check attributes, but that's not a problem here
376 if not contents_match(l, r):
377 return 0
Fred Drakee779d4f1999-05-10 19:36:52 +0000378 elif nodeType == TEXT:
Fred Drakeaaed9711998-12-10 20:25:30 +0000379 if l.data != r.data:
380 return 0
381 else:
382 # not quite right, but good enough
383 return 0
384 return 1
385
386
387def create_module_info(doc, section):
388 # Heavy.
389 node = extract_first_element(section, "modulesynopsis")
390 if node is None:
391 return
392 node._node.name = "synopsis"
393 lastchild = node.childNodes[-1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000394 if lastchild.nodeType == TEXT \
Fred Drakeaaed9711998-12-10 20:25:30 +0000395 and lastchild.data[-1:] == ".":
396 lastchild.data = lastchild.data[:-1]
Fred Drake4259f0d1999-01-19 23:09:31 +0000397 modauthor = extract_first_element(section, "moduleauthor")
398 if modauthor:
399 modauthor._node.name = "author"
400 modauthor.appendChild(doc.createTextNode(
401 modauthor.getAttribute("name")))
402 modauthor.removeAttribute("name")
Fred Drake87a42cd1999-03-11 17:35:12 +0000403 platform = extract_first_element(section, "platform")
Fred Drakeaaed9711998-12-10 20:25:30 +0000404 if section.tagName == "section":
405 modinfo_pos = 2
406 modinfo = doc.createElement("moduleinfo")
407 moddecl = extract_first_element(section, "declaremodule")
408 name = None
409 if moddecl:
410 modinfo.appendChild(doc.createTextNode("\n "))
411 name = moddecl.attributes["name"].value
412 namenode = doc.createElement("name")
413 namenode.appendChild(doc.createTextNode(name))
414 modinfo.appendChild(namenode)
415 type = moddecl.attributes.get("type")
416 if type:
417 type = type.value
418 modinfo.appendChild(doc.createTextNode("\n "))
419 typenode = doc.createElement("type")
420 typenode.appendChild(doc.createTextNode(type))
421 modinfo.appendChild(typenode)
Fred Drake1dd152d1999-01-29 22:12:29 +0000422 versionadded = extract_first_element(section, "versionadded")
423 if versionadded:
424 modinfo.setAttribute("added", versionadded.getAttribute("version"))
Fred Drakeaaed9711998-12-10 20:25:30 +0000425 title = get_first_element(section, "title")
426 if title:
427 children = title.childNodes
428 if len(children) >= 2 \
Fred Drakee779d4f1999-05-10 19:36:52 +0000429 and children[0].nodeType == ELEMENT \
Fred Drakeaaed9711998-12-10 20:25:30 +0000430 and children[0].tagName == "module" \
431 and children[0].childNodes[0].data == name:
432 # this is it; morph the <title> into <short-synopsis>
433 first_data = children[1]
434 if first_data.data[:4] == " ---":
435 first_data.data = string.lstrip(first_data.data[4:])
436 title._node.name = "short-synopsis"
Fred Drakee779d4f1999-05-10 19:36:52 +0000437 if children[-1].nodeType == TEXT \
Fred Drake7dab6af1999-01-28 23:59:58 +0000438 and children[-1].data[-1:] == ".":
Fred Drakeaaed9711998-12-10 20:25:30 +0000439 children[-1].data = children[-1].data[:-1]
440 section.removeChild(title)
441 section.removeChild(section.childNodes[0])
442 title.removeChild(children[0])
443 modinfo_pos = 0
444 else:
445 sys.stderr.write(
446 "module name in title doesn't match"
447 " <declaremodule>; no <short-synopsis>\n")
448 else:
449 sys.stderr.write(
450 "Unexpected condition: <section> without <title>\n")
451 modinfo.appendChild(doc.createTextNode("\n "))
452 modinfo.appendChild(node)
453 if title and not contents_match(title, node):
454 # The short synopsis is actually different,
455 # and needs to be stored:
456 modinfo.appendChild(doc.createTextNode("\n "))
457 modinfo.appendChild(title)
Fred Drake4259f0d1999-01-19 23:09:31 +0000458 if modauthor:
459 modinfo.appendChild(doc.createTextNode("\n "))
460 modinfo.appendChild(modauthor)
Fred Drake87a42cd1999-03-11 17:35:12 +0000461 if platform:
462 modinfo.appendChild(doc.createTextNode("\n "))
463 modinfo.appendChild(platform)
Fred Drakeaaed9711998-12-10 20:25:30 +0000464 modinfo.appendChild(doc.createTextNode("\n "))
465 section.insertBefore(modinfo, section.childNodes[modinfo_pos])
466 section.insertBefore(doc.createTextNode("\n "), modinfo)
Fred Drake87a42cd1999-03-11 17:35:12 +0000467 #
468 # The rest of this removes extra newlines from where we cut out
469 # a lot of elements. A lot of code for minimal value, but keeps
470 # keeps the generated SGML from being too funny looking.
471 #
472 section.normalize()
473 children = section.childNodes
474 for i in range(len(children)):
475 node = children[i]
Fred Drakee779d4f1999-05-10 19:36:52 +0000476 if node.nodeType == ELEMENT \
Fred Drake87a42cd1999-03-11 17:35:12 +0000477 and node.tagName == "moduleinfo":
478 nextnode = children[i+1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000479 if nextnode.nodeType == TEXT:
Fred Drake87a42cd1999-03-11 17:35:12 +0000480 data = nextnode.data
481 if len(string.lstrip(data)) < (len(data) - 4):
482 nextnode.data = "\n\n\n" + string.lstrip(data)
Fred Drakeaaed9711998-12-10 20:25:30 +0000483
484
Fred Drakefba0ba21998-12-10 05:07:09 +0000485def cleanup_synopses(doc):
Fred Drake7dab6af1999-01-28 23:59:58 +0000486 for node in find_all_elements(doc, "section"):
487 create_module_info(doc, node)
Fred Drakeaaed9711998-12-10 20:25:30 +0000488
489
Fred Drakef8ebb551999-01-14 19:45:38 +0000490def remap_element_names(root, name_map):
491 queue = []
492 for child in root.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000493 if child.nodeType == ELEMENT:
Fred Drakef8ebb551999-01-14 19:45:38 +0000494 queue.append(child)
495 while queue:
496 node = queue.pop()
497 tagName = node.tagName
498 if name_map.has_key(tagName):
499 name, attrs = name_map[tagName]
500 node._node.name = name
501 for attr, value in attrs.items():
502 node.setAttribute(attr, value)
503 for child in node.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000504 if child.nodeType == ELEMENT:
Fred Drakef8ebb551999-01-14 19:45:38 +0000505 queue.append(child)
506
507
Fred Drakee779d4f1999-05-10 19:36:52 +0000508def fixup_table_structures(doc, fragment):
Fred Drakef8ebb551999-01-14 19:45:38 +0000509 # must be done after remap_element_names(), or the tables won't be found
Fred Drakee779d4f1999-05-10 19:36:52 +0000510 for table in find_all_elements(fragment, "table"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000511 fixup_table(doc, table)
512
Fred Drakef8ebb551999-01-14 19:45:38 +0000513
514def fixup_table(doc, table):
515 # create the table head
516 thead = doc.createElement("thead")
517 row = doc.createElement("row")
518 move_elements_by_name(doc, table, row, "entry")
519 thead.appendChild(doc.createTextNode("\n "))
520 thead.appendChild(row)
521 thead.appendChild(doc.createTextNode("\n "))
522 # create the table body
523 tbody = doc.createElement("tbody")
524 prev_row = None
525 last_was_hline = 0
526 children = table.childNodes
527 for child in children:
Fred Drakee779d4f1999-05-10 19:36:52 +0000528 if child.nodeType == ELEMENT:
Fred Drakef8ebb551999-01-14 19:45:38 +0000529 tagName = child.tagName
530 if tagName == "hline" and prev_row is not None:
531 prev_row.setAttribute("rowsep", "1")
532 elif tagName == "row":
533 prev_row = child
534 # save the rows:
535 tbody.appendChild(doc.createTextNode("\n "))
536 move_elements_by_name(doc, table, tbody, "row", sep="\n ")
537 # and toss the rest:
538 while children:
539 child = children[0]
540 nodeType = child.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000541 if nodeType == TEXT:
Fred Drakef8ebb551999-01-14 19:45:38 +0000542 if string.strip(child.data):
543 raise ConversionError("unexpected free data in table")
544 table.removeChild(child)
545 continue
Fred Drakee779d4f1999-05-10 19:36:52 +0000546 if nodeType == ELEMENT:
Fred Drakef8ebb551999-01-14 19:45:38 +0000547 if child.tagName != "hline":
548 raise ConversionError(
549 "unexpected <%s> in table" % child.tagName)
550 table.removeChild(child)
551 continue
552 raise ConversionError(
553 "unexpected %s node in table" % child.__class__.__name__)
554 # nothing left in the <table>; add the <thead> and <tbody>
555 tgroup = doc.createElement("tgroup")
556 tgroup.appendChild(doc.createTextNode("\n "))
557 tgroup.appendChild(thead)
558 tgroup.appendChild(doc.createTextNode("\n "))
559 tgroup.appendChild(tbody)
560 tgroup.appendChild(doc.createTextNode("\n "))
561 table.appendChild(tgroup)
562 # now make the <entry>s look nice:
563 for row in table.getElementsByTagName("row"):
564 fixup_row(doc, row)
565
566
567def fixup_row(doc, row):
568 entries = []
569 map(entries.append, row.childNodes[1:])
570 for entry in entries:
571 row.insertBefore(doc.createTextNode("\n "), entry)
572# row.appendChild(doc.createTextNode("\n "))
573
574
575def move_elements_by_name(doc, source, dest, name, sep=None):
576 nodes = []
577 for child in source.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000578 if child.nodeType == ELEMENT and child.tagName == name:
Fred Drakef8ebb551999-01-14 19:45:38 +0000579 nodes.append(child)
580 for node in nodes:
581 source.removeChild(node)
582 dest.appendChild(node)
583 if sep:
584 dest.appendChild(doc.createTextNode(sep))
585
586
Fred Drake7dab6af1999-01-28 23:59:58 +0000587RECURSE_INTO_PARA_CONTAINERS = (
Fred Drakecb657811999-01-29 20:55:07 +0000588 "chapter", "abstract", "enumerate",
Fred Drake7dab6af1999-01-28 23:59:58 +0000589 "section", "subsection", "subsubsection",
590 "paragraph", "subparagraph",
Fred Drakecb657811999-01-29 20:55:07 +0000591 "howto", "manual",
Fred Drake4259f0d1999-01-19 23:09:31 +0000592 )
Fred Drakefcc59101999-01-06 22:50:52 +0000593
594PARA_LEVEL_ELEMENTS = (
Fred Drakecb657811999-01-29 20:55:07 +0000595 "moduleinfo", "title", "verbatim", "enumerate", "item",
Fred Drake93d762f1999-02-18 16:32:21 +0000596 "interpreter-session",
Fred Drakecb657811999-01-29 20:55:07 +0000597 "opcodedesc", "classdesc", "datadesc",
Fred Drake7dab6af1999-01-28 23:59:58 +0000598 "funcdesc", "methoddesc", "excdesc",
599 "funcdescni", "methoddescni", "excdescni",
Fred Drakefcc59101999-01-06 22:50:52 +0000600 "tableii", "tableiii", "tableiv", "localmoduletable",
Fred Drake7dab6af1999-01-28 23:59:58 +0000601 "sectionauthor", "seealso",
Fred Drakefcc59101999-01-06 22:50:52 +0000602 # include <para>, so we can just do it again to get subsequent paras:
603 "para",
604 )
605
606PARA_LEVEL_PRECEEDERS = (
Fred Drakecb657811999-01-29 20:55:07 +0000607 "index", "indexii", "indexiii", "indexiv", "setindexsubitem",
608 "stindex", "obindex", "COMMENT", "label", "input", "title",
Fred Drakefcc59101999-01-06 22:50:52 +0000609 )
610
Fred Drake7dab6af1999-01-28 23:59:58 +0000611
Fred Drakee779d4f1999-05-10 19:36:52 +0000612def fixup_paras(doc, fragment):
613 for child in fragment.childNodes:
614 if child.nodeType == ELEMENT \
Fred Drake7dab6af1999-01-28 23:59:58 +0000615 and child.tagName in RECURSE_INTO_PARA_CONTAINERS:
616 #
Fred Drakefcc59101999-01-06 22:50:52 +0000617 fixup_paras_helper(doc, child)
Fred Drakee779d4f1999-05-10 19:36:52 +0000618 descriptions = find_all_elements(fragment, "description")
Fred Drakecb657811999-01-29 20:55:07 +0000619 for description in descriptions:
620 fixup_paras_helper(doc, description)
Fred Drakefcc59101999-01-06 22:50:52 +0000621
622
Fred Drake7dab6af1999-01-28 23:59:58 +0000623def fixup_paras_helper(doc, container, depth=0):
Fred Drakefcc59101999-01-06 22:50:52 +0000624 # document is already normalized
625 children = container.childNodes
626 start = 0
Fred Drake7dab6af1999-01-28 23:59:58 +0000627 while len(children) > start:
628 start = skip_leading_nodes(children, start)
629 if start >= len(children):
630 break
631 #
632 # Either paragraph material or something to recurse into:
633 #
Fred Drakee779d4f1999-05-10 19:36:52 +0000634 if (children[start].nodeType == ELEMENT) \
Fred Drake7dab6af1999-01-28 23:59:58 +0000635 and (children[start].tagName in RECURSE_INTO_PARA_CONTAINERS):
636 fixup_paras_helper(doc, children[start])
637 start = skip_leading_nodes(children, start + 1)
638 continue
639 #
640 # paragraph material:
641 #
642 build_para(doc, container, start, len(children))
643 if DEBUG_PARA_FIXER and depth == 10:
644 sys.exit(1)
645 start = start + 1
Fred Drakefcc59101999-01-06 22:50:52 +0000646
647
648def build_para(doc, parent, start, i):
649 children = parent.childNodes
Fred Drakefcc59101999-01-06 22:50:52 +0000650 after = start + 1
651 have_last = 0
Fred Drakecb657811999-01-29 20:55:07 +0000652 BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + RECURSE_INTO_PARA_CONTAINERS
Fred Drake7dab6af1999-01-28 23:59:58 +0000653 # Collect all children until \n\n+ is found in a text node or a
654 # member of BREAK_ELEMENTS is found.
Fred Drakefcc59101999-01-06 22:50:52 +0000655 for j in range(start, i):
656 after = j + 1
657 child = children[j]
658 nodeType = child.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000659 if nodeType == ELEMENT:
Fred Drakefcc59101999-01-06 22:50:52 +0000660 if child.tagName in BREAK_ELEMENTS:
661 after = j
662 break
Fred Drakee779d4f1999-05-10 19:36:52 +0000663 elif nodeType == TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000664 pos = string.find(child.data, "\n\n")
665 if pos == 0:
666 after = j
667 break
668 if pos >= 1:
669 child.splitText(pos)
670 break
671 else:
672 have_last = 1
Fred Drake7dab6af1999-01-28 23:59:58 +0000673 if (start + 1) > after:
674 raise ConversionError(
675 "build_para() could not identify content to turn into a paragraph")
Fred Drakee779d4f1999-05-10 19:36:52 +0000676 if children[after - 1].nodeType == TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000677 # we may need to split off trailing white space:
678 child = children[after - 1]
679 data = child.data
680 if string.rstrip(data) != data:
681 have_last = 0
682 child.splitText(len(string.rstrip(data)))
Fred Drakefcc59101999-01-06 22:50:52 +0000683 para = doc.createElement("para")
684 prev = None
685 indexes = range(start, after)
686 indexes.reverse()
687 for j in indexes:
Fred Drake7dab6af1999-01-28 23:59:58 +0000688 node = parent.childNodes[j]
Fred Drakefcc59101999-01-06 22:50:52 +0000689 parent.removeChild(node)
690 para.insertBefore(node, prev)
691 prev = node
692 if have_last:
693 parent.appendChild(para)
Fred Drake7dab6af1999-01-28 23:59:58 +0000694 return len(parent.childNodes)
Fred Drakefcc59101999-01-06 22:50:52 +0000695 else:
696 parent.insertBefore(para, parent.childNodes[start])
Fred Drake7dab6af1999-01-28 23:59:58 +0000697 return start + 1
Fred Drakefcc59101999-01-06 22:50:52 +0000698
699
Fred Drake7dab6af1999-01-28 23:59:58 +0000700def skip_leading_nodes(children, start):
701 """Return index into children of a node at which paragraph building should
702 begin or a recursive call to fixup_paras_helper() should be made (for
703 subsections, etc.).
704
705 When the return value >= len(children), we've built all the paras we can
706 from this list of children.
707 """
708 i = len(children)
Fred Drakefcc59101999-01-06 22:50:52 +0000709 while i > start:
710 # skip over leading comments and whitespace:
Fred Drake7dab6af1999-01-28 23:59:58 +0000711 child = children[start]
Fred Drakefcc59101999-01-06 22:50:52 +0000712 nodeType = child.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000713 if nodeType == TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000714 data = child.data
715 shortened = string.lstrip(data)
716 if shortened:
717 if data != shortened:
718 # break into two nodes: whitespace and non-whitespace
719 child.splitText(len(data) - len(shortened))
Fred Drake7dab6af1999-01-28 23:59:58 +0000720 return start + 1
721 return start
Fred Drakefcc59101999-01-06 22:50:52 +0000722 # all whitespace, just skip
Fred Drakee779d4f1999-05-10 19:36:52 +0000723 elif nodeType == ELEMENT:
Fred Drake7dab6af1999-01-28 23:59:58 +0000724 tagName = child.tagName
725 if tagName in RECURSE_INTO_PARA_CONTAINERS:
726 return start
727 if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
728 return start
729 start = start + 1
730 return start
Fred Drakefba0ba21998-12-10 05:07:09 +0000731
732
Fred Drakee779d4f1999-05-10 19:36:52 +0000733def fixup_rfc_references(doc, fragment):
734 for rfcnode in find_all_elements(fragment, "rfc"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000735 rfcnode.appendChild(doc.createTextNode(
736 "RFC " + rfcnode.getAttribute("num")))
Fred Draked24167b1999-01-14 21:18:03 +0000737
738
Fred Drakee779d4f1999-05-10 19:36:52 +0000739def fixup_signatures(doc, fragment):
740 for child in fragment.childNodes:
741 if child.nodeType == ELEMENT:
Fred Draked24167b1999-01-14 21:18:03 +0000742 args = child.getElementsByTagName("args")
743 for arg in args:
744 fixup_args(doc, arg)
Fred Drake7dab6af1999-01-28 23:59:58 +0000745 arg.normalize()
Fred Draked24167b1999-01-14 21:18:03 +0000746 args = child.getElementsByTagName("constructor-args")
747 for arg in args:
748 fixup_args(doc, arg)
749 arg.normalize()
750
751
752def fixup_args(doc, arglist):
753 for child in arglist.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000754 if child.nodeType == ELEMENT \
Fred Draked24167b1999-01-14 21:18:03 +0000755 and child.tagName == "optional":
756 # found it; fix and return
757 arglist.insertBefore(doc.createTextNode("["), child)
758 optkids = child.childNodes
759 while optkids:
760 k = optkids[0]
761 child.removeChild(k)
762 arglist.insertBefore(k, child)
763 arglist.insertBefore(doc.createTextNode("]"), child)
764 arglist.removeChild(child)
765 return fixup_args(doc, arglist)
766
767
Fred Drakee779d4f1999-05-10 19:36:52 +0000768def fixup_sectionauthors(doc, fragment):
769 for sectauth in find_all_elements(fragment, "sectionauthor"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000770 section = sectauth.parentNode
771 section.removeChild(sectauth)
772 sectauth._node.name = "author"
773 sectauth.appendChild(doc.createTextNode(
774 sectauth.getAttribute("name")))
775 sectauth.removeAttribute("name")
776 after = section.childNodes[2]
777 title = section.childNodes[1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000778 if title.nodeType == ELEMENT and title.tagName != "title":
Fred Drake7dab6af1999-01-28 23:59:58 +0000779 after = section.childNodes[0]
780 section.insertBefore(doc.createTextNode("\n "), after)
781 section.insertBefore(sectauth, after)
782
783
Fred Drake93d762f1999-02-18 16:32:21 +0000784def fixup_verbatims(doc):
785 for verbatim in find_all_elements(doc, "verbatim"):
786 child = verbatim.childNodes[0]
Fred Drakee779d4f1999-05-10 19:36:52 +0000787 if child.nodeType == TEXT \
Fred Drake93d762f1999-02-18 16:32:21 +0000788 and string.lstrip(child.data)[:3] == ">>>":
Fred Drakee779d4f1999-05-10 19:36:52 +0000789 verbatim._node.name = "interactive-session"
Fred Drake93d762f1999-02-18 16:32:21 +0000790
791
Fred Drake4db5b461998-12-01 19:03:01 +0000792_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
Fred Drakefcc59101999-01-06 22:50:52 +0000793
Fred Drake4db5b461998-12-01 19:03:01 +0000794def write_esis(doc, ofp, knownempty):
795 for node in doc.childNodes:
796 nodeType = node.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000797 if nodeType == ELEMENT:
Fred Drake4db5b461998-12-01 19:03:01 +0000798 gi = node.tagName
799 if knownempty(gi):
800 if node.hasChildNodes():
801 raise ValueError, "declared-empty node has children"
802 ofp.write("e\n")
803 for k, v in node.attributes.items():
804 value = v.value
805 if _token_rx.match(value):
806 dtype = "TOKEN"
807 else:
808 dtype = "CDATA"
809 ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
810 ofp.write("(%s\n" % gi)
811 write_esis(node, ofp, knownempty)
812 ofp.write(")%s\n" % gi)
Fred Drakee779d4f1999-05-10 19:36:52 +0000813 elif nodeType == TEXT:
Fred Drake4db5b461998-12-01 19:03:01 +0000814 ofp.write("-%s\n" % esistools.encode(node.data))
815 else:
816 raise RuntimeError, "unsupported node type: %s" % nodeType
817
818
Fred Drake03204731998-11-23 17:02:03 +0000819def convert(ifp, ofp):
Fred Drake4db5b461998-12-01 19:03:01 +0000820 p = esistools.ExtendedEsisBuilder()
Fred Drake03204731998-11-23 17:02:03 +0000821 p.feed(ifp.read())
822 doc = p.document
Fred Drakee779d4f1999-05-10 19:36:52 +0000823 fragment = p.fragment
824 normalize(fragment)
825 simplify(doc, fragment)
826 handle_labels(fragment)
827 handle_appendix(doc, fragment)
Fred Drake1ff6db41998-11-23 23:10:35 +0000828 fixup_trailing_whitespace(doc, {
829 "abstract": "\n",
830 "title": "",
831 "chapter": "\n\n",
832 "section": "\n\n",
833 "subsection": "\n\n",
834 "subsubsection": "\n\n",
835 "paragraph": "\n\n",
836 "subparagraph": "\n\n",
837 })
Fred Drake03204731998-11-23 17:02:03 +0000838 cleanup_root_text(doc)
Fred Drake1ff6db41998-11-23 23:10:35 +0000839 cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
Fred Drakefba0ba21998-12-10 05:07:09 +0000840 cleanup_synopses(doc)
Fred Drakee779d4f1999-05-10 19:36:52 +0000841 fixup_descriptors(doc, fragment)
842 fixup_verbatims(fragment)
843 normalize(fragment)
844 fixup_paras(doc, fragment)
845 fixup_sectionauthors(doc, fragment)
846 remap_element_names(fragment, {
Fred Drakef8ebb551999-01-14 19:45:38 +0000847 "tableii": ("table", {"cols": "2"}),
848 "tableiii": ("table", {"cols": "3"}),
849 "tableiv": ("table", {"cols": "4"}),
850 "lineii": ("row", {}),
851 "lineiii": ("row", {}),
852 "lineiv": ("row", {}),
Fred Draked6ced7d1999-01-19 17:11:23 +0000853 "refmodule": ("module", {"link": "link"}),
Fred Drakef8ebb551999-01-14 19:45:38 +0000854 })
Fred Drakee779d4f1999-05-10 19:36:52 +0000855 fixup_table_structures(doc, fragment)
856 fixup_rfc_references(doc, fragment)
857 fixup_signatures(doc, fragment)
Fred Drake4db5b461998-12-01 19:03:01 +0000858 #
859 d = {}
860 for gi in p.get_empties():
861 d[gi] = gi
Fred Draked24167b1999-01-14 21:18:03 +0000862 if d.has_key("rfc"):
863 del d["rfc"]
Fred Drake4db5b461998-12-01 19:03:01 +0000864 knownempty = d.has_key
865 #
Fred Drake03204731998-11-23 17:02:03 +0000866 try:
Fred Drakee779d4f1999-05-10 19:36:52 +0000867 write_esis(fragment, ofp, knownempty)
Fred Drake03204731998-11-23 17:02:03 +0000868 except IOError, (err, msg):
869 # Ignore EPIPE; it just means that whoever we're writing to stopped
870 # reading. The rest of the output would be ignored. All other errors
871 # should still be reported,
872 if err != errno.EPIPE:
873 raise
874
875
876def main():
877 if len(sys.argv) == 1:
878 ifp = sys.stdin
879 ofp = sys.stdout
880 elif len(sys.argv) == 2:
881 ifp = open(sys.argv[1])
882 ofp = sys.stdout
883 elif len(sys.argv) == 3:
884 ifp = open(sys.argv[1])
885 ofp = open(sys.argv[2], "w")
886 else:
887 usage()
888 sys.exit(2)
889 convert(ifp, ofp)
890
891
892if __name__ == "__main__":
893 main()