blob: c1ea9707742acefda5a551444f2254a9f70f15e8 [file] [log] [blame]
Fred Drake03204731998-11-23 17:02:03 +00001#! /usr/bin/env python
2
Fred Drake7dab6af1999-01-28 23:59:58 +00003"""Perform massive transformations on a document tree created from the LaTeX
4of the Python documentation, and dump the ESIS data for the transformed tree.
Fred Drake03204731998-11-23 17:02:03 +00005"""
Fred Drake03204731998-11-23 17:02:03 +00006
7
8import errno
Fred Drake4db5b461998-12-01 19:03:01 +00009import esistools
10import re
Fred Drake03204731998-11-23 17:02:03 +000011import string
12import sys
Fred Drake3e8f9212001-03-23 17:01:47 +000013import xml.dom
14import xml.dom.minidom
Fred Drakee779d4f1999-05-10 19:36:52 +000015
Fred Drake3e8f9212001-03-23 17:01:47 +000016ELEMENT = xml.dom.Node.ELEMENT_NODE
17ENTITY_REFERENCE = xml.dom.Node.ENTITY_REFERENCE_NODE
18TEXT = xml.dom.Node.TEXT_NODE
Fred Drake03204731998-11-23 17:02:03 +000019
20
Fred Drakef8ebb551999-01-14 19:45:38 +000021class ConversionError(Exception):
22 pass
23
24
Fred Drake080c1b51999-08-02 14:46:15 +000025ewrite = sys.stderr.write
26try:
27 # We can only do this trick on Unix (if tput is on $PATH)!
28 if sys.platform != "posix" or not sys.stderr.isatty():
29 raise ImportError
Fred Drake080c1b51999-08-02 14:46:15 +000030 import commands
31except ImportError:
32 bwrite = ewrite
33else:
34 def bwrite(s, BOLDON=commands.getoutput("tput bold"),
35 BOLDOFF=commands.getoutput("tput sgr0")):
36 ewrite("%s%s%s" % (BOLDON, s, BOLDOFF))
37
38
Fred Drake865e9ff1999-07-29 22:23:19 +000039PARA_ELEMENT = "para"
40
Fred Drakefcc59101999-01-06 22:50:52 +000041DEBUG_PARA_FIXER = 0
42
Fred Drake7dab6af1999-01-28 23:59:58 +000043if DEBUG_PARA_FIXER:
44 def para_msg(s):
Fred Drake080c1b51999-08-02 14:46:15 +000045 ewrite("*** %s\n" % s)
Fred Drake7dab6af1999-01-28 23:59:58 +000046else:
47 def para_msg(s):
48 pass
49
Fred Drakefcc59101999-01-06 22:50:52 +000050
Fred Drake03204731998-11-23 17:02:03 +000051def get_first_element(doc, gi):
52 for n in doc.childNodes:
Fred Drake3e8f9212001-03-23 17:01:47 +000053 if n.nodeName == gi:
Fred Drake03204731998-11-23 17:02:03 +000054 return n
55
56def extract_first_element(doc, gi):
57 node = get_first_element(doc, gi)
58 if node is not None:
59 doc.removeChild(node)
60 return node
61
62
Fred Drake3e8f9212001-03-23 17:01:47 +000063def get_documentElement(node):
64 result = None
65 for child in node.childNodes:
66 if child.nodeType == ELEMENT:
67 result = child
68 return result
69
70
71def set_tagName(elem, gi):
72 elem.nodeName = elem.tagName = gi
73
74
Fred Drake7dab6af1999-01-28 23:59:58 +000075def find_all_elements(doc, gi):
76 nodes = []
Fred Drake3e8f9212001-03-23 17:01:47 +000077 if doc.nodeName == gi:
Fred Drake7dab6af1999-01-28 23:59:58 +000078 nodes.append(doc)
79 for child in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +000080 if child.nodeType == ELEMENT:
Fred Drake3e8f9212001-03-23 17:01:47 +000081 if child.tagName == gi:
Fred Drake7dab6af1999-01-28 23:59:58 +000082 nodes.append(child)
83 for node in child.getElementsByTagName(gi):
84 nodes.append(node)
Fred Drake865e9ff1999-07-29 22:23:19 +000085 return nodes
86
Fred Drake080c1b51999-08-02 14:46:15 +000087def find_all_child_elements(doc, gi):
88 nodes = []
89 for child in doc.childNodes:
Fred Drake3e8f9212001-03-23 17:01:47 +000090 if child.nodeName == gi:
Fred Drakeabf8a1d1999-08-20 14:28:29 +000091 nodes.append(child)
Fred Drake080c1b51999-08-02 14:46:15 +000092 return nodes
93
Fred Drake3e8f9212001-03-23 17:01:47 +000094
Fred Drake080c1b51999-08-02 14:46:15 +000095def find_all_elements_from_set(doc, gi_set):
96 return __find_all_elements_from_set(doc, gi_set, [])
97
98def __find_all_elements_from_set(doc, gi_set, nodes):
Fred Drake3e8f9212001-03-23 17:01:47 +000099 if doc.nodeName in gi_set:
Fred Drake865e9ff1999-07-29 22:23:19 +0000100 nodes.append(doc)
101 for child in doc.childNodes:
Fred Drake3e8f9212001-03-23 17:01:47 +0000102 if child.nodeType == ELEMENT:
Fred Drake080c1b51999-08-02 14:46:15 +0000103 __find_all_elements_from_set(child, gi_set, nodes)
Fred Drake865e9ff1999-07-29 22:23:19 +0000104 return nodes
Fred Drake7dab6af1999-01-28 23:59:58 +0000105
106
Fred Drakee779d4f1999-05-10 19:36:52 +0000107def simplify(doc, fragment):
Fred Drake03204731998-11-23 17:02:03 +0000108 # Try to rationalize the document a bit, since these things are simply
109 # not valid SGML/XML documents as they stand, and need a little work.
110 documentclass = "document"
111 inputs = []
Fred Drakee779d4f1999-05-10 19:36:52 +0000112 node = extract_first_element(fragment, "documentclass")
Fred Drake03204731998-11-23 17:02:03 +0000113 if node is not None:
114 documentclass = node.getAttribute("classname")
Fred Drakee779d4f1999-05-10 19:36:52 +0000115 node = extract_first_element(fragment, "title")
Fred Drake03204731998-11-23 17:02:03 +0000116 if node is not None:
117 inputs.append(node)
118 # update the name of the root element
Fred Drakee779d4f1999-05-10 19:36:52 +0000119 node = get_first_element(fragment, "document")
Fred Drake03204731998-11-23 17:02:03 +0000120 if node is not None:
Fred Drake3e8f9212001-03-23 17:01:47 +0000121 set_tagName(node, documentclass)
Fred Drake03204731998-11-23 17:02:03 +0000122 while 1:
Fred Drakee779d4f1999-05-10 19:36:52 +0000123 node = extract_first_element(fragment, "input")
Fred Drake03204731998-11-23 17:02:03 +0000124 if node is None:
125 break
126 inputs.append(node)
127 if inputs:
Fred Drakee779d4f1999-05-10 19:36:52 +0000128 docelem = get_documentElement(fragment)
Fred Drake03204731998-11-23 17:02:03 +0000129 inputs.reverse()
130 for node in inputs:
131 text = doc.createTextNode("\n")
132 docelem.insertBefore(text, docelem.firstChild)
133 docelem.insertBefore(node, text)
134 docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
Fred Drake3e8f9212001-03-23 17:01:47 +0000135 while fragment.firstChild and fragment.firstChild.nodeType == TEXT:
Fred Drakee779d4f1999-05-10 19:36:52 +0000136 fragment.removeChild(fragment.firstChild)
Fred Drake03204731998-11-23 17:02:03 +0000137
138
139def cleanup_root_text(doc):
140 discards = []
141 skip = 0
142 for n in doc.childNodes:
143 prevskip = skip
144 skip = 0
Fred Drake3e8f9212001-03-23 17:01:47 +0000145 if n.nodeType == TEXT and not prevskip:
Fred Drake03204731998-11-23 17:02:03 +0000146 discards.append(n)
Fred Drake3e8f9212001-03-23 17:01:47 +0000147 elif n.nodeName == "COMMENT":
Fred Drake03204731998-11-23 17:02:03 +0000148 skip = 1
149 for node in discards:
150 doc.removeChild(node)
151
152
Fred Drakecb657811999-01-29 20:55:07 +0000153DESCRIPTOR_ELEMENTS = (
154 "cfuncdesc", "cvardesc", "ctypedesc",
155 "classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni",
156 "excdesc", "funcdesc", "funcdescni", "opcodedesc",
157 "datadesc", "datadescni",
158 )
159
Fred Drakee779d4f1999-05-10 19:36:52 +0000160def fixup_descriptors(doc, fragment):
161 sections = find_all_elements(fragment, "section")
Fred Drake3a7ff991999-01-29 21:31:12 +0000162 for section in sections:
163 find_and_fix_descriptors(doc, section)
164
165
166def find_and_fix_descriptors(doc, container):
167 children = container.childNodes
168 for child in children:
Fred Drake3e8f9212001-03-23 17:01:47 +0000169 if child.nodeType == ELEMENT:
170 tagName = child.tagName
Fred Drake3a7ff991999-01-29 21:31:12 +0000171 if tagName in DESCRIPTOR_ELEMENTS:
172 rewrite_descriptor(doc, child)
173 elif tagName == "subsection":
174 find_and_fix_descriptors(doc, child)
175
Fred Drakecb657811999-01-29 20:55:07 +0000176
177def rewrite_descriptor(doc, descriptor):
178 #
179 # Do these things:
Fred Drake080c1b51999-08-02 14:46:15 +0000180 # 1. Add an "index='no'" attribute to the element if the tagName
Fred Drakecb657811999-01-29 20:55:07 +0000181 # ends in 'ni', removing the 'ni' from the name.
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000182 # 2. Create a <signature> from the name attribute
183 # 2a.Create an <args> if it appears to be available.
Fred Drakecb657811999-01-29 20:55:07 +0000184 # 3. Create additional <signature>s from <*line{,ni}> elements,
185 # if found.
Fred Drake1dd152d1999-01-29 22:12:29 +0000186 # 4. If a <versionadded> is found, move it to an attribute on the
187 # descriptor.
188 # 5. Move remaining child nodes to a <description> element.
189 # 6. Put it back together.
Fred Drakecb657811999-01-29 20:55:07 +0000190 #
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000191 # 1.
Fred Drake3e8f9212001-03-23 17:01:47 +0000192 descname = descriptor.tagName
Fred Drakedde993c2001-07-06 21:03:30 +0000193 index = descriptor.getAttribute("name") != "no"
Fred Drakecb657811999-01-29 20:55:07 +0000194 desctype = descname[:-4] # remove 'desc'
195 linename = desctype + "line"
196 if not index:
197 linename = linename + "ni"
198 # 2.
199 signature = doc.createElement("signature")
200 name = doc.createElement("name")
201 signature.appendChild(doc.createTextNode("\n "))
202 signature.appendChild(name)
203 name.appendChild(doc.createTextNode(descriptor.getAttribute("name")))
204 descriptor.removeAttribute("name")
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000205 # 2a.
Fred Drake3e8f9212001-03-23 17:01:47 +0000206 if descriptor.hasAttribute("var"):
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000207 if descname != "opcodedesc":
208 raise RuntimeError, \
209 "got 'var' attribute on descriptor other than opcodedesc"
Fred Drakecb657811999-01-29 20:55:07 +0000210 variable = descriptor.getAttribute("var")
211 if variable:
212 args = doc.createElement("args")
213 args.appendChild(doc.createTextNode(variable))
Fred Drake7dab6af1999-01-28 23:59:58 +0000214 signature.appendChild(doc.createTextNode("\n "))
Fred Drakecb657811999-01-29 20:55:07 +0000215 signature.appendChild(args)
216 descriptor.removeAttribute("var")
217 newchildren = [signature]
218 children = descriptor.childNodes
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000219 pos = skip_leading_nodes(children)
Fred Drakecb657811999-01-29 20:55:07 +0000220 if pos < len(children):
221 child = children[pos]
Fred Drake645af9f1999-11-23 21:52:03 +0000222 if child.nodeName == "args":
223 # move <args> to <signature>, or remove if empty:
224 child.parentNode.removeChild(child)
225 if len(child.childNodes):
226 signature.appendChild(doc.createTextNode("\n "))
227 signature.appendChild(child)
Fred Drakecb657811999-01-29 20:55:07 +0000228 signature.appendChild(doc.createTextNode("\n "))
Fred Drake1dd152d1999-01-29 22:12:29 +0000229 # 3, 4.
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000230 pos = skip_leading_nodes(children, pos)
Fred Drakecb657811999-01-29 20:55:07 +0000231 while pos < len(children) \
Fred Drake3e8f9212001-03-23 17:01:47 +0000232 and children[pos].nodeName in (linename, "versionadded"):
233 if children[pos].tagName == linename:
Fred Drake1dd152d1999-01-29 22:12:29 +0000234 # this is really a supplemental signature, create <signature>
Fred Drake3e8f9212001-03-23 17:01:47 +0000235 oldchild = children[pos].cloneNode(1)
236 try:
237 sig = methodline_to_signature(doc, children[pos])
238 except KeyError:
239 print oldchild.toxml()
240 raise
Fred Drake1dd152d1999-01-29 22:12:29 +0000241 newchildren.append(sig)
242 else:
243 # <versionadded added=...>
244 descriptor.setAttribute(
245 "added", children[pos].getAttribute("version"))
Fred Drakecb657811999-01-29 20:55:07 +0000246 pos = skip_leading_nodes(children, pos + 1)
Fred Drake1dd152d1999-01-29 22:12:29 +0000247 # 5.
Fred Drakecb657811999-01-29 20:55:07 +0000248 description = doc.createElement("description")
249 description.appendChild(doc.createTextNode("\n"))
250 newchildren.append(description)
251 move_children(descriptor, description, pos)
252 last = description.childNodes[-1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000253 if last.nodeType == TEXT:
Fred Drakecb657811999-01-29 20:55:07 +0000254 last.data = string.rstrip(last.data) + "\n "
Fred Drake1dd152d1999-01-29 22:12:29 +0000255 # 6.
Fred Drakecb657811999-01-29 20:55:07 +0000256 # should have nothing but whitespace and signature lines in <descriptor>;
257 # discard them
258 while descriptor.childNodes:
259 descriptor.removeChild(descriptor.childNodes[0])
260 for node in newchildren:
261 descriptor.appendChild(doc.createTextNode("\n "))
262 descriptor.appendChild(node)
263 descriptor.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000264
Fred Drake7dab6af1999-01-28 23:59:58 +0000265
266def methodline_to_signature(doc, methodline):
267 signature = doc.createElement("signature")
268 signature.appendChild(doc.createTextNode("\n "))
269 name = doc.createElement("name")
270 name.appendChild(doc.createTextNode(methodline.getAttribute("name")))
Fred Drakecb657811999-01-29 20:55:07 +0000271 methodline.removeAttribute("name")
Fred Drake7dab6af1999-01-28 23:59:58 +0000272 signature.appendChild(name)
Fred Drake7dab6af1999-01-28 23:59:58 +0000273 if len(methodline.childNodes):
Fred Drakecb657811999-01-29 20:55:07 +0000274 args = doc.createElement("args")
Fred Drake7dab6af1999-01-28 23:59:58 +0000275 signature.appendChild(doc.createTextNode("\n "))
Fred Drakecb657811999-01-29 20:55:07 +0000276 signature.appendChild(args)
277 move_children(methodline, args)
Fred Drake7dab6af1999-01-28 23:59:58 +0000278 signature.appendChild(doc.createTextNode("\n "))
279 return signature
Fred Drake03204731998-11-23 17:02:03 +0000280
281
Fred Drakecb657811999-01-29 20:55:07 +0000282def move_children(origin, dest, start=0):
283 children = origin.childNodes
284 while start < len(children):
285 node = children[start]
286 origin.removeChild(node)
287 dest.appendChild(node)
288
289
Fred Drakee779d4f1999-05-10 19:36:52 +0000290def handle_appendix(doc, fragment):
Fred Drake4db5b461998-12-01 19:03:01 +0000291 # must be called after simplfy() if document is multi-rooted to begin with
Fred Drakee779d4f1999-05-10 19:36:52 +0000292 docelem = get_documentElement(fragment)
Fred Drake3e8f9212001-03-23 17:01:47 +0000293 toplevel = docelem.tagName == "manual" and "chapter" or "section"
Fred Drake4db5b461998-12-01 19:03:01 +0000294 appendices = 0
295 nodes = []
296 for node in docelem.childNodes:
297 if appendices:
298 nodes.append(node)
Fred Drakee779d4f1999-05-10 19:36:52 +0000299 elif node.nodeType == ELEMENT:
Fred Drake4db5b461998-12-01 19:03:01 +0000300 appnodes = node.getElementsByTagName("appendix")
301 if appnodes:
302 appendices = 1
303 parent = appnodes[0].parentNode
304 parent.removeChild(appnodes[0])
305 parent.normalize()
306 if nodes:
307 map(docelem.removeChild, nodes)
308 docelem.appendChild(doc.createTextNode("\n\n\n"))
309 back = doc.createElement("back-matter")
310 docelem.appendChild(back)
311 back.appendChild(doc.createTextNode("\n"))
Fred Drakee779d4f1999-05-10 19:36:52 +0000312 while nodes and nodes[0].nodeType == TEXT \
Fred Drake4db5b461998-12-01 19:03:01 +0000313 and not string.strip(nodes[0].data):
314 del nodes[0]
315 map(back.appendChild, nodes)
316 docelem.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000317
318
Fred Drake865e9ff1999-07-29 22:23:19 +0000319def handle_labels(doc, fragment):
320 for label in find_all_elements(fragment, "label"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000321 id = label.getAttribute("id")
322 if not id:
323 continue
324 parent = label.parentNode
Fred Drake3e8f9212001-03-23 17:01:47 +0000325 parentTagName = parent.tagName
Fred Drakea20581c1999-08-26 17:51:56 +0000326 if parentTagName == "title":
Fred Drake7dab6af1999-01-28 23:59:58 +0000327 parent.parentNode.setAttribute("id", id)
328 else:
329 parent.setAttribute("id", id)
330 # now, remove <label id="..."/> from parent:
331 parent.removeChild(label)
Fred Drakea20581c1999-08-26 17:51:56 +0000332 if parentTagName == "title":
Fred Drake865e9ff1999-07-29 22:23:19 +0000333 parent.normalize()
334 children = parent.childNodes
335 if children[-1].nodeType == TEXT:
336 children[-1].data = string.rstrip(children[-1].data)
Fred Drake03204731998-11-23 17:02:03 +0000337
338
Fred Drake1ff6db41998-11-23 23:10:35 +0000339def fixup_trailing_whitespace(doc, wsmap):
340 queue = [doc]
341 while queue:
342 node = queue[0]
343 del queue[0]
Fred Drake3e8f9212001-03-23 17:01:47 +0000344 if wsmap.has_key(node.nodeName):
345 ws = wsmap[node.tagName]
Fred Drake1ff6db41998-11-23 23:10:35 +0000346 children = node.childNodes
347 children.reverse()
Fred Drakee779d4f1999-05-10 19:36:52 +0000348 if children[0].nodeType == TEXT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000349 data = string.rstrip(children[0].data) + ws
350 children[0].data = data
351 children.reverse()
352 # hack to get the title in place:
Fred Drake3e8f9212001-03-23 17:01:47 +0000353 if node.tagName == "title" \
354 and node.parentNode.firstChild.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000355 node.parentNode.insertBefore(doc.createText("\n "),
356 node.parentNode.firstChild)
357 for child in node.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000358 if child.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000359 queue.append(child)
360
361
362def normalize(doc):
363 for node in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000364 if node.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000365 node.normalize()
366
367
368def cleanup_trailing_parens(doc, element_names):
369 d = {}
370 for gi in element_names:
371 d[gi] = gi
372 rewrite_element = d.has_key
373 queue = []
374 for node in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000375 if node.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000376 queue.append(node)
377 while queue:
378 node = queue[0]
379 del queue[0]
Fred Drake3e8f9212001-03-23 17:01:47 +0000380 if rewrite_element(node.tagName):
Fred Drake1ff6db41998-11-23 23:10:35 +0000381 children = node.childNodes
382 if len(children) == 1 \
Fred Drakee779d4f1999-05-10 19:36:52 +0000383 and children[0].nodeType == TEXT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000384 data = children[0].data
385 if data[-2:] == "()":
386 children[0].data = data[:-2]
387 else:
388 for child in node.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000389 if child.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000390 queue.append(child)
391
392
Fred Drakeaaed9711998-12-10 20:25:30 +0000393def contents_match(left, right):
394 left_children = left.childNodes
395 right_children = right.childNodes
396 if len(left_children) != len(right_children):
397 return 0
398 for l, r in map(None, left_children, right_children):
399 nodeType = l.nodeType
400 if nodeType != r.nodeType:
401 return 0
Fred Drakee779d4f1999-05-10 19:36:52 +0000402 if nodeType == ELEMENT:
Fred Drake3e8f9212001-03-23 17:01:47 +0000403 if l.tagName != r.tagName:
Fred Drakeaaed9711998-12-10 20:25:30 +0000404 return 0
405 # should check attributes, but that's not a problem here
406 if not contents_match(l, r):
407 return 0
Fred Drakee779d4f1999-05-10 19:36:52 +0000408 elif nodeType == TEXT:
Fred Drakeaaed9711998-12-10 20:25:30 +0000409 if l.data != r.data:
410 return 0
411 else:
412 # not quite right, but good enough
413 return 0
414 return 1
415
416
417def create_module_info(doc, section):
418 # Heavy.
419 node = extract_first_element(section, "modulesynopsis")
420 if node is None:
421 return
Fred Drake3e8f9212001-03-23 17:01:47 +0000422 set_tagName(node, "synopsis")
Fred Drakeaaed9711998-12-10 20:25:30 +0000423 lastchild = node.childNodes[-1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000424 if lastchild.nodeType == TEXT \
Fred Drakeaaed9711998-12-10 20:25:30 +0000425 and lastchild.data[-1:] == ".":
426 lastchild.data = lastchild.data[:-1]
Fred Drake4259f0d1999-01-19 23:09:31 +0000427 modauthor = extract_first_element(section, "moduleauthor")
428 if modauthor:
Fred Drake3e8f9212001-03-23 17:01:47 +0000429 set_tagName(modauthor, "author")
Fred Drake4259f0d1999-01-19 23:09:31 +0000430 modauthor.appendChild(doc.createTextNode(
431 modauthor.getAttribute("name")))
432 modauthor.removeAttribute("name")
Fred Drake87a42cd1999-03-11 17:35:12 +0000433 platform = extract_first_element(section, "platform")
Fred Drake3e8f9212001-03-23 17:01:47 +0000434 if section.tagName == "section":
Fred Drakeaaed9711998-12-10 20:25:30 +0000435 modinfo_pos = 2
436 modinfo = doc.createElement("moduleinfo")
437 moddecl = extract_first_element(section, "declaremodule")
438 name = None
439 if moddecl:
440 modinfo.appendChild(doc.createTextNode("\n "))
441 name = moddecl.attributes["name"].value
442 namenode = doc.createElement("name")
443 namenode.appendChild(doc.createTextNode(name))
444 modinfo.appendChild(namenode)
445 type = moddecl.attributes.get("type")
446 if type:
447 type = type.value
448 modinfo.appendChild(doc.createTextNode("\n "))
449 typenode = doc.createElement("type")
450 typenode.appendChild(doc.createTextNode(type))
451 modinfo.appendChild(typenode)
Fred Drake1dd152d1999-01-29 22:12:29 +0000452 versionadded = extract_first_element(section, "versionadded")
453 if versionadded:
454 modinfo.setAttribute("added", versionadded.getAttribute("version"))
Fred Drakeaaed9711998-12-10 20:25:30 +0000455 title = get_first_element(section, "title")
456 if title:
457 children = title.childNodes
458 if len(children) >= 2 \
Fred Drake3e8f9212001-03-23 17:01:47 +0000459 and children[0].nodeName == "module" \
Fred Drakeaaed9711998-12-10 20:25:30 +0000460 and children[0].childNodes[0].data == name:
461 # this is it; morph the <title> into <short-synopsis>
462 first_data = children[1]
463 if first_data.data[:4] == " ---":
464 first_data.data = string.lstrip(first_data.data[4:])
Fred Drake3e8f9212001-03-23 17:01:47 +0000465 set_tagName(title, "short-synopsis")
Fred Drakee779d4f1999-05-10 19:36:52 +0000466 if children[-1].nodeType == TEXT \
Fred Drake7dab6af1999-01-28 23:59:58 +0000467 and children[-1].data[-1:] == ".":
Fred Drakeaaed9711998-12-10 20:25:30 +0000468 children[-1].data = children[-1].data[:-1]
469 section.removeChild(title)
470 section.removeChild(section.childNodes[0])
471 title.removeChild(children[0])
472 modinfo_pos = 0
473 else:
Fred Drake080c1b51999-08-02 14:46:15 +0000474 ewrite("module name in title doesn't match"
475 " <declaremodule/>; no <short-synopsis/>\n")
Fred Drakeaaed9711998-12-10 20:25:30 +0000476 else:
Fred Drake080c1b51999-08-02 14:46:15 +0000477 ewrite("Unexpected condition: <section/> without <title/>\n")
Fred Drakeaaed9711998-12-10 20:25:30 +0000478 modinfo.appendChild(doc.createTextNode("\n "))
479 modinfo.appendChild(node)
480 if title and not contents_match(title, node):
481 # The short synopsis is actually different,
482 # and needs to be stored:
483 modinfo.appendChild(doc.createTextNode("\n "))
484 modinfo.appendChild(title)
Fred Drake4259f0d1999-01-19 23:09:31 +0000485 if modauthor:
486 modinfo.appendChild(doc.createTextNode("\n "))
487 modinfo.appendChild(modauthor)
Fred Drake87a42cd1999-03-11 17:35:12 +0000488 if platform:
489 modinfo.appendChild(doc.createTextNode("\n "))
490 modinfo.appendChild(platform)
Fred Drakeaaed9711998-12-10 20:25:30 +0000491 modinfo.appendChild(doc.createTextNode("\n "))
492 section.insertBefore(modinfo, section.childNodes[modinfo_pos])
493 section.insertBefore(doc.createTextNode("\n "), modinfo)
Fred Drake87a42cd1999-03-11 17:35:12 +0000494 #
495 # The rest of this removes extra newlines from where we cut out
496 # a lot of elements. A lot of code for minimal value, but keeps
Fred Drake080c1b51999-08-02 14:46:15 +0000497 # keeps the generated *ML from being too funny looking.
Fred Drake87a42cd1999-03-11 17:35:12 +0000498 #
499 section.normalize()
500 children = section.childNodes
501 for i in range(len(children)):
502 node = children[i]
Fred Drake3e8f9212001-03-23 17:01:47 +0000503 if node.nodeName == "moduleinfo":
Fred Drake87a42cd1999-03-11 17:35:12 +0000504 nextnode = children[i+1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000505 if nextnode.nodeType == TEXT:
Fred Drake87a42cd1999-03-11 17:35:12 +0000506 data = nextnode.data
507 if len(string.lstrip(data)) < (len(data) - 4):
508 nextnode.data = "\n\n\n" + string.lstrip(data)
Fred Drakeaaed9711998-12-10 20:25:30 +0000509
510
Fred Drake080c1b51999-08-02 14:46:15 +0000511def cleanup_synopses(doc, fragment):
512 for node in find_all_elements(fragment, "section"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000513 create_module_info(doc, node)
Fred Drakeaaed9711998-12-10 20:25:30 +0000514
515
Fred Drakee779d4f1999-05-10 19:36:52 +0000516def fixup_table_structures(doc, fragment):
Fred Drakee779d4f1999-05-10 19:36:52 +0000517 for table in find_all_elements(fragment, "table"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000518 fixup_table(doc, table)
519
Fred Drakef8ebb551999-01-14 19:45:38 +0000520
521def fixup_table(doc, table):
522 # create the table head
523 thead = doc.createElement("thead")
524 row = doc.createElement("row")
525 move_elements_by_name(doc, table, row, "entry")
526 thead.appendChild(doc.createTextNode("\n "))
527 thead.appendChild(row)
528 thead.appendChild(doc.createTextNode("\n "))
529 # create the table body
530 tbody = doc.createElement("tbody")
531 prev_row = None
532 last_was_hline = 0
533 children = table.childNodes
534 for child in children:
Fred Drakee779d4f1999-05-10 19:36:52 +0000535 if child.nodeType == ELEMENT:
Fred Drake3e8f9212001-03-23 17:01:47 +0000536 tagName = child.tagName
Fred Drakef8ebb551999-01-14 19:45:38 +0000537 if tagName == "hline" and prev_row is not None:
538 prev_row.setAttribute("rowsep", "1")
539 elif tagName == "row":
540 prev_row = child
541 # save the rows:
542 tbody.appendChild(doc.createTextNode("\n "))
543 move_elements_by_name(doc, table, tbody, "row", sep="\n ")
544 # and toss the rest:
545 while children:
546 child = children[0]
547 nodeType = child.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000548 if nodeType == TEXT:
Fred Drakef8ebb551999-01-14 19:45:38 +0000549 if string.strip(child.data):
Fred Drake3e8f9212001-03-23 17:01:47 +0000550 raise ConversionError("unexpected free data in <%s>: %r"
551 % (table.tagName, child.data))
Fred Drakef8ebb551999-01-14 19:45:38 +0000552 table.removeChild(child)
553 continue
Fred Drakee779d4f1999-05-10 19:36:52 +0000554 if nodeType == ELEMENT:
Fred Drake3e8f9212001-03-23 17:01:47 +0000555 if child.tagName != "hline":
Fred Drakef8ebb551999-01-14 19:45:38 +0000556 raise ConversionError(
Fred Drake3e8f9212001-03-23 17:01:47 +0000557 "unexpected <%s> in table" % child.tagName)
Fred Drakef8ebb551999-01-14 19:45:38 +0000558 table.removeChild(child)
559 continue
560 raise ConversionError(
561 "unexpected %s node in table" % child.__class__.__name__)
562 # nothing left in the <table>; add the <thead> and <tbody>
563 tgroup = doc.createElement("tgroup")
564 tgroup.appendChild(doc.createTextNode("\n "))
565 tgroup.appendChild(thead)
566 tgroup.appendChild(doc.createTextNode("\n "))
567 tgroup.appendChild(tbody)
568 tgroup.appendChild(doc.createTextNode("\n "))
569 table.appendChild(tgroup)
570 # now make the <entry>s look nice:
571 for row in table.getElementsByTagName("row"):
572 fixup_row(doc, row)
573
574
575def fixup_row(doc, row):
576 entries = []
577 map(entries.append, row.childNodes[1:])
578 for entry in entries:
579 row.insertBefore(doc.createTextNode("\n "), entry)
580# row.appendChild(doc.createTextNode("\n "))
581
582
583def move_elements_by_name(doc, source, dest, name, sep=None):
584 nodes = []
585 for child in source.childNodes:
Fred Drake3e8f9212001-03-23 17:01:47 +0000586 if child.nodeName == name:
Fred Drakef8ebb551999-01-14 19:45:38 +0000587 nodes.append(child)
588 for node in nodes:
589 source.removeChild(node)
590 dest.appendChild(node)
591 if sep:
592 dest.appendChild(doc.createTextNode(sep))
593
594
Fred Drake7dab6af1999-01-28 23:59:58 +0000595RECURSE_INTO_PARA_CONTAINERS = (
Fred Drakecb657811999-01-29 20:55:07 +0000596 "chapter", "abstract", "enumerate",
Fred Drake7dab6af1999-01-28 23:59:58 +0000597 "section", "subsection", "subsubsection",
Fred Drake865e9ff1999-07-29 22:23:19 +0000598 "paragraph", "subparagraph", "back-matter",
Fred Drakecb657811999-01-29 20:55:07 +0000599 "howto", "manual",
Fred Drake82ebc271999-08-03 15:32:48 +0000600 "item", "itemize", "fulllineitems", "enumeration", "descriptionlist",
601 "definitionlist", "definition",
Fred Drake4259f0d1999-01-19 23:09:31 +0000602 )
Fred Drakefcc59101999-01-06 22:50:52 +0000603
604PARA_LEVEL_ELEMENTS = (
Fred Drakecb657811999-01-29 20:55:07 +0000605 "moduleinfo", "title", "verbatim", "enumerate", "item",
Fred Drake865e9ff1999-07-29 22:23:19 +0000606 "interpreter-session", "back-matter", "interactive-session",
Fred Drakecb657811999-01-29 20:55:07 +0000607 "opcodedesc", "classdesc", "datadesc",
Fred Drake865e9ff1999-07-29 22:23:19 +0000608 "funcdesc", "methoddesc", "excdesc", "memberdesc", "membderdescni",
Fred Drake7dab6af1999-01-28 23:59:58 +0000609 "funcdescni", "methoddescni", "excdescni",
Fred Drakefcc59101999-01-06 22:50:52 +0000610 "tableii", "tableiii", "tableiv", "localmoduletable",
Fred Drake82ebc271999-08-03 15:32:48 +0000611 "sectionauthor", "seealso", "itemize",
Fred Drakefcc59101999-01-06 22:50:52 +0000612 # include <para>, so we can just do it again to get subsequent paras:
Fred Drake865e9ff1999-07-29 22:23:19 +0000613 PARA_ELEMENT,
Fred Drakefcc59101999-01-06 22:50:52 +0000614 )
615
616PARA_LEVEL_PRECEEDERS = (
Fred Drakec81d4702001-03-29 23:31:22 +0000617 "setindexsubitem", "author",
Fred Drakebaacc082001-09-27 15:49:23 +0000618 "stindex", "obindex", "COMMENT", "label", "xi:include", "title",
Fred Drake865e9ff1999-07-29 22:23:19 +0000619 "versionadded", "versionchanged", "declaremodule", "modulesynopsis",
Fred Drake82ebc271999-08-03 15:32:48 +0000620 "moduleauthor", "indexterm", "leader",
Fred Drakefcc59101999-01-06 22:50:52 +0000621 )
622
Fred Drake7dab6af1999-01-28 23:59:58 +0000623
Fred Drakee779d4f1999-05-10 19:36:52 +0000624def fixup_paras(doc, fragment):
625 for child in fragment.childNodes:
Fred Drake3e8f9212001-03-23 17:01:47 +0000626 if child.nodeName in RECURSE_INTO_PARA_CONTAINERS:
Fred Drakefcc59101999-01-06 22:50:52 +0000627 fixup_paras_helper(doc, child)
Fred Drakee779d4f1999-05-10 19:36:52 +0000628 descriptions = find_all_elements(fragment, "description")
Fred Drakecb657811999-01-29 20:55:07 +0000629 for description in descriptions:
630 fixup_paras_helper(doc, description)
Fred Drakefcc59101999-01-06 22:50:52 +0000631
632
Fred Drake7dab6af1999-01-28 23:59:58 +0000633def fixup_paras_helper(doc, container, depth=0):
Fred Drakefcc59101999-01-06 22:50:52 +0000634 # document is already normalized
635 children = container.childNodes
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000636 start = skip_leading_nodes(children)
Fred Drake7dab6af1999-01-28 23:59:58 +0000637 while len(children) > start:
Fred Drake3e8f9212001-03-23 17:01:47 +0000638 if children[start].nodeName in RECURSE_INTO_PARA_CONTAINERS:
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000639 # Something to recurse into:
Fred Drake7dab6af1999-01-28 23:59:58 +0000640 fixup_paras_helper(doc, children[start])
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000641 else:
642 # Paragraph material:
643 build_para(doc, container, start, len(children))
644 if DEBUG_PARA_FIXER and depth == 10:
645 sys.exit(1)
646 start = skip_leading_nodes(children, start + 1)
Fred Drakefcc59101999-01-06 22:50:52 +0000647
648
649def build_para(doc, parent, start, i):
650 children = parent.childNodes
Fred Drakefcc59101999-01-06 22:50:52 +0000651 after = start + 1
652 have_last = 0
Fred Drakecb657811999-01-29 20:55:07 +0000653 BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + RECURSE_INTO_PARA_CONTAINERS
Fred Drake7dab6af1999-01-28 23:59:58 +0000654 # Collect all children until \n\n+ is found in a text node or a
655 # member of BREAK_ELEMENTS is found.
Fred Drakefcc59101999-01-06 22:50:52 +0000656 for j in range(start, i):
657 after = j + 1
658 child = children[j]
659 nodeType = child.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000660 if nodeType == ELEMENT:
Fred Drake3e8f9212001-03-23 17:01:47 +0000661 if child.tagName in BREAK_ELEMENTS:
Fred Drakefcc59101999-01-06 22:50:52 +0000662 after = j
663 break
Fred Drakee779d4f1999-05-10 19:36:52 +0000664 elif nodeType == TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000665 pos = string.find(child.data, "\n\n")
666 if pos == 0:
667 after = j
668 break
669 if pos >= 1:
670 child.splitText(pos)
671 break
672 else:
673 have_last = 1
Fred Drake7dab6af1999-01-28 23:59:58 +0000674 if (start + 1) > after:
675 raise ConversionError(
676 "build_para() could not identify content to turn into a paragraph")
Fred Drakee779d4f1999-05-10 19:36:52 +0000677 if children[after - 1].nodeType == TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000678 # we may need to split off trailing white space:
679 child = children[after - 1]
680 data = child.data
681 if string.rstrip(data) != data:
682 have_last = 0
683 child.splitText(len(string.rstrip(data)))
Fred Drake865e9ff1999-07-29 22:23:19 +0000684 para = doc.createElement(PARA_ELEMENT)
Fred Drakefcc59101999-01-06 22:50:52 +0000685 prev = None
686 indexes = range(start, after)
687 indexes.reverse()
688 for j in indexes:
Fred Drake7dab6af1999-01-28 23:59:58 +0000689 node = parent.childNodes[j]
Fred Drakefcc59101999-01-06 22:50:52 +0000690 parent.removeChild(node)
691 para.insertBefore(node, prev)
692 prev = node
693 if have_last:
694 parent.appendChild(para)
Fred Drake080c1b51999-08-02 14:46:15 +0000695 parent.appendChild(doc.createTextNode("\n\n"))
Fred Drake7dab6af1999-01-28 23:59:58 +0000696 return len(parent.childNodes)
Fred Drakefcc59101999-01-06 22:50:52 +0000697 else:
Fred Drake080c1b51999-08-02 14:46:15 +0000698 nextnode = parent.childNodes[start]
699 if nextnode.nodeType == TEXT:
700 if nextnode.data and nextnode.data[0] != "\n":
701 nextnode.data = "\n" + nextnode.data
702 else:
703 newnode = doc.createTextNode("\n")
704 parent.insertBefore(newnode, nextnode)
705 nextnode = newnode
706 start = start + 1
707 parent.insertBefore(para, nextnode)
Fred Drake7dab6af1999-01-28 23:59:58 +0000708 return start + 1
Fred Drakefcc59101999-01-06 22:50:52 +0000709
710
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000711def skip_leading_nodes(children, start=0):
Fred Drake7dab6af1999-01-28 23:59:58 +0000712 """Return index into children of a node at which paragraph building should
713 begin or a recursive call to fixup_paras_helper() should be made (for
714 subsections, etc.).
715
716 When the return value >= len(children), we've built all the paras we can
717 from this list of children.
718 """
719 i = len(children)
Fred Drakefcc59101999-01-06 22:50:52 +0000720 while i > start:
721 # skip over leading comments and whitespace:
Fred Drake7dab6af1999-01-28 23:59:58 +0000722 child = children[start]
Fred Drakefcc59101999-01-06 22:50:52 +0000723 nodeType = child.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000724 if nodeType == TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000725 data = child.data
726 shortened = string.lstrip(data)
727 if shortened:
728 if data != shortened:
729 # break into two nodes: whitespace and non-whitespace
730 child.splitText(len(data) - len(shortened))
Fred Drake7dab6af1999-01-28 23:59:58 +0000731 return start + 1
732 return start
Fred Drakefcc59101999-01-06 22:50:52 +0000733 # all whitespace, just skip
Fred Drakee779d4f1999-05-10 19:36:52 +0000734 elif nodeType == ELEMENT:
Fred Drake3e8f9212001-03-23 17:01:47 +0000735 tagName = child.tagName
Fred Drake7dab6af1999-01-28 23:59:58 +0000736 if tagName in RECURSE_INTO_PARA_CONTAINERS:
737 return start
738 if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
739 return start
740 start = start + 1
741 return start
Fred Drakefba0ba21998-12-10 05:07:09 +0000742
743
Fred Drakee779d4f1999-05-10 19:36:52 +0000744def fixup_rfc_references(doc, fragment):
745 for rfcnode in find_all_elements(fragment, "rfc"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000746 rfcnode.appendChild(doc.createTextNode(
747 "RFC " + rfcnode.getAttribute("num")))
Fred Draked24167b1999-01-14 21:18:03 +0000748
749
Fred Drakee779d4f1999-05-10 19:36:52 +0000750def fixup_signatures(doc, fragment):
751 for child in fragment.childNodes:
752 if child.nodeType == ELEMENT:
Fred Draked24167b1999-01-14 21:18:03 +0000753 args = child.getElementsByTagName("args")
754 for arg in args:
755 fixup_args(doc, arg)
Fred Drake7dab6af1999-01-28 23:59:58 +0000756 arg.normalize()
Fred Draked24167b1999-01-14 21:18:03 +0000757 args = child.getElementsByTagName("constructor-args")
758 for arg in args:
759 fixup_args(doc, arg)
760 arg.normalize()
761
762
763def fixup_args(doc, arglist):
764 for child in arglist.childNodes:
Fred Drake3e8f9212001-03-23 17:01:47 +0000765 if child.nodeName == "optional":
Fred Draked24167b1999-01-14 21:18:03 +0000766 # found it; fix and return
767 arglist.insertBefore(doc.createTextNode("["), child)
768 optkids = child.childNodes
769 while optkids:
770 k = optkids[0]
771 child.removeChild(k)
772 arglist.insertBefore(k, child)
773 arglist.insertBefore(doc.createTextNode("]"), child)
774 arglist.removeChild(child)
775 return fixup_args(doc, arglist)
776
777
Fred Drakee779d4f1999-05-10 19:36:52 +0000778def fixup_sectionauthors(doc, fragment):
779 for sectauth in find_all_elements(fragment, "sectionauthor"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000780 section = sectauth.parentNode
781 section.removeChild(sectauth)
Fred Drake3e8f9212001-03-23 17:01:47 +0000782 set_tagName(sectauth, "author")
Fred Drake7dab6af1999-01-28 23:59:58 +0000783 sectauth.appendChild(doc.createTextNode(
784 sectauth.getAttribute("name")))
785 sectauth.removeAttribute("name")
786 after = section.childNodes[2]
787 title = section.childNodes[1]
Fred Drake3e8f9212001-03-23 17:01:47 +0000788 if title.nodeName != "title":
Fred Drake7dab6af1999-01-28 23:59:58 +0000789 after = section.childNodes[0]
790 section.insertBefore(doc.createTextNode("\n "), after)
791 section.insertBefore(sectauth, after)
792
793
Fred Drake93d762f1999-02-18 16:32:21 +0000794def fixup_verbatims(doc):
795 for verbatim in find_all_elements(doc, "verbatim"):
796 child = verbatim.childNodes[0]
Fred Drakee779d4f1999-05-10 19:36:52 +0000797 if child.nodeType == TEXT \
Fred Drake93d762f1999-02-18 16:32:21 +0000798 and string.lstrip(child.data)[:3] == ">>>":
Fred Drake3e8f9212001-03-23 17:01:47 +0000799 set_tagName(verbatim, "interactive-session")
Fred Drake93d762f1999-02-18 16:32:21 +0000800
801
Fred Drake865e9ff1999-07-29 22:23:19 +0000802def add_node_ids(fragment, counter=0):
Fred Drake3e8f9212001-03-23 17:01:47 +0000803 fragment.node_id = counter
Fred Drake865e9ff1999-07-29 22:23:19 +0000804 for node in fragment.childNodes:
805 counter = counter + 1
806 if node.nodeType == ELEMENT:
807 counter = add_node_ids(node, counter)
808 else:
Fred Drake3e8f9212001-03-23 17:01:47 +0000809 node.node_id = counter
Fred Drake865e9ff1999-07-29 22:23:19 +0000810 return counter + 1
811
812
813REFMODINDEX_ELEMENTS = ('refmodindex', 'refbimodindex',
814 'refexmodindex', 'refstmodindex')
815
816def fixup_refmodindexes(fragment):
817 # Locate <ref*modindex>...</> co-located with <module>...</>, and
818 # remove the <ref*modindex>, replacing it with index=index on the
819 # <module> element.
820 nodes = find_all_elements_from_set(fragment, REFMODINDEX_ELEMENTS)
821 d = {}
822 for node in nodes:
823 parent = node.parentNode
Fred Drake3e8f9212001-03-23 17:01:47 +0000824 d[parent.node_id] = parent
Fred Drake865e9ff1999-07-29 22:23:19 +0000825 del nodes
826 map(fixup_refmodindexes_chunk, d.values())
827
828
829def fixup_refmodindexes_chunk(container):
830 # node is probably a <para>; let's see how often it isn't:
Fred Drake3e8f9212001-03-23 17:01:47 +0000831 if container.tagName != PARA_ELEMENT:
Fred Drake080c1b51999-08-02 14:46:15 +0000832 bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container)
Fred Drake865e9ff1999-07-29 22:23:19 +0000833 module_entries = find_all_elements(container, "module")
834 if not module_entries:
835 return
836 index_entries = find_all_elements_from_set(container, REFMODINDEX_ELEMENTS)
837 removes = []
838 for entry in index_entries:
839 children = entry.childNodes
840 if len(children) != 0:
Fred Drake080c1b51999-08-02 14:46:15 +0000841 bwrite("--- unexpected number of children for %s node:\n"
Fred Drake3e8f9212001-03-23 17:01:47 +0000842 % entry.tagName)
Fred Drake080c1b51999-08-02 14:46:15 +0000843 ewrite(entry.toxml() + "\n")
Fred Drake865e9ff1999-07-29 22:23:19 +0000844 continue
845 found = 0
Fred Drake82ebc271999-08-03 15:32:48 +0000846 module_name = entry.getAttribute("module")
Fred Drake865e9ff1999-07-29 22:23:19 +0000847 for node in module_entries:
848 if len(node.childNodes) != 1:
849 continue
850 this_name = node.childNodes[0].data
851 if this_name == module_name:
852 found = 1
Fred Drake080c1b51999-08-02 14:46:15 +0000853 node.setAttribute("index", "yes")
Fred Drake865e9ff1999-07-29 22:23:19 +0000854 if found:
855 removes.append(entry)
856 for node in removes:
857 container.removeChild(node)
858
859
860def fixup_bifuncindexes(fragment):
861 nodes = find_all_elements(fragment, 'bifuncindex')
862 d = {}
Fred Drake080c1b51999-08-02 14:46:15 +0000863 # make sure that each parent is only processed once:
Fred Drake865e9ff1999-07-29 22:23:19 +0000864 for node in nodes:
865 parent = node.parentNode
Fred Drake3e8f9212001-03-23 17:01:47 +0000866 d[parent.node_id] = parent
Fred Drake865e9ff1999-07-29 22:23:19 +0000867 del nodes
868 map(fixup_bifuncindexes_chunk, d.values())
869
870
871def fixup_bifuncindexes_chunk(container):
872 removes = []
Fred Drake080c1b51999-08-02 14:46:15 +0000873 entries = find_all_child_elements(container, "bifuncindex")
874 function_entries = find_all_child_elements(container, "function")
Fred Drake865e9ff1999-07-29 22:23:19 +0000875 for entry in entries:
876 function_name = entry.getAttribute("name")
877 found = 0
878 for func_entry in function_entries:
879 t2 = func_entry.childNodes[0].data
880 if t2[-2:] != "()":
881 continue
882 t2 = t2[:-2]
883 if t2 == function_name:
Fred Drake080c1b51999-08-02 14:46:15 +0000884 func_entry.setAttribute("index", "yes")
Fred Drake865e9ff1999-07-29 22:23:19 +0000885 func_entry.setAttribute("module", "__builtin__")
886 if not found:
Fred Drake865e9ff1999-07-29 22:23:19 +0000887 found = 1
Fred Drake080c1b51999-08-02 14:46:15 +0000888 removes.append(entry)
Fred Drake865e9ff1999-07-29 22:23:19 +0000889 for entry in removes:
890 container.removeChild(entry)
891
892
Fred Drake645af9f1999-11-23 21:52:03 +0000893def join_adjacent_elements(container, gi):
894 queue = [container]
895 while queue:
896 parent = queue.pop()
897 i = 0
Fred Drake3e8f9212001-03-23 17:01:47 +0000898 children = parent.childNodes
Fred Drake645af9f1999-11-23 21:52:03 +0000899 nchildren = len(children)
900 while i < (nchildren - 1):
901 child = children[i]
902 if child.nodeName == gi:
903 if children[i+1].nodeName == gi:
904 ewrite("--- merging two <%s/> elements\n" % gi)
905 child = children[i]
906 nextchild = children[i+1]
Fred Drake3e8f9212001-03-23 17:01:47 +0000907 nextchildren = nextchild.childNodes
Fred Drake645af9f1999-11-23 21:52:03 +0000908 while len(nextchildren):
909 node = nextchildren[0]
910 nextchild.removeChild(node)
911 child.appendChild(node)
912 parent.removeChild(nextchild)
913 continue
914 if child.nodeType == ELEMENT:
915 queue.append(child)
916 i = i + 1
917
918
Fred Drake4db5b461998-12-01 19:03:01 +0000919_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
Fred Drakefcc59101999-01-06 22:50:52 +0000920
Fred Drake4db5b461998-12-01 19:03:01 +0000921def write_esis(doc, ofp, knownempty):
922 for node in doc.childNodes:
923 nodeType = node.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000924 if nodeType == ELEMENT:
Fred Drake3e8f9212001-03-23 17:01:47 +0000925 gi = node.tagName
Fred Drake4db5b461998-12-01 19:03:01 +0000926 if knownempty(gi):
927 if node.hasChildNodes():
Fred Drake865e9ff1999-07-29 22:23:19 +0000928 raise ValueError, \
929 "declared-empty node <%s> has children" % gi
Fred Drake4db5b461998-12-01 19:03:01 +0000930 ofp.write("e\n")
Fred Drake3e8f9212001-03-23 17:01:47 +0000931 for k, value in node.attributes.items():
Fred Drake4db5b461998-12-01 19:03:01 +0000932 if _token_rx.match(value):
933 dtype = "TOKEN"
934 else:
935 dtype = "CDATA"
936 ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
937 ofp.write("(%s\n" % gi)
938 write_esis(node, ofp, knownempty)
939 ofp.write(")%s\n" % gi)
Fred Drakee779d4f1999-05-10 19:36:52 +0000940 elif nodeType == TEXT:
Fred Drake4db5b461998-12-01 19:03:01 +0000941 ofp.write("-%s\n" % esistools.encode(node.data))
Fred Drakea20581c1999-08-26 17:51:56 +0000942 elif nodeType == ENTITY_REFERENCE:
Fred Drake3e8f9212001-03-23 17:01:47 +0000943 ofp.write("&%s\n" % node.nodeName)
Fred Drake4db5b461998-12-01 19:03:01 +0000944 else:
945 raise RuntimeError, "unsupported node type: %s" % nodeType
946
947
Fred Drake03204731998-11-23 17:02:03 +0000948def convert(ifp, ofp):
Fred Drake3e8f9212001-03-23 17:01:47 +0000949 events = esistools.parse(ifp)
950 toktype, doc = events.getEvent()
951 fragment = doc.createDocumentFragment()
952 events.expandNode(fragment)
953
Fred Drakee779d4f1999-05-10 19:36:52 +0000954 normalize(fragment)
955 simplify(doc, fragment)
Fred Drake865e9ff1999-07-29 22:23:19 +0000956 handle_labels(doc, fragment)
Fred Drakee779d4f1999-05-10 19:36:52 +0000957 handle_appendix(doc, fragment)
Fred Drake1ff6db41998-11-23 23:10:35 +0000958 fixup_trailing_whitespace(doc, {
959 "abstract": "\n",
960 "title": "",
961 "chapter": "\n\n",
962 "section": "\n\n",
963 "subsection": "\n\n",
964 "subsubsection": "\n\n",
965 "paragraph": "\n\n",
966 "subparagraph": "\n\n",
967 })
Fred Drake03204731998-11-23 17:02:03 +0000968 cleanup_root_text(doc)
Fred Drake080c1b51999-08-02 14:46:15 +0000969 cleanup_trailing_parens(fragment, ["function", "method", "cfunction"])
970 cleanup_synopses(doc, fragment)
Fred Drakee779d4f1999-05-10 19:36:52 +0000971 fixup_descriptors(doc, fragment)
972 fixup_verbatims(fragment)
973 normalize(fragment)
974 fixup_paras(doc, fragment)
975 fixup_sectionauthors(doc, fragment)
Fred Drakee779d4f1999-05-10 19:36:52 +0000976 fixup_table_structures(doc, fragment)
977 fixup_rfc_references(doc, fragment)
978 fixup_signatures(doc, fragment)
Fred Drake865e9ff1999-07-29 22:23:19 +0000979 add_node_ids(fragment)
980 fixup_refmodindexes(fragment)
981 fixup_bifuncindexes(fragment)
Fred Drake645af9f1999-11-23 21:52:03 +0000982 # Take care of ugly hacks in the LaTeX markup to avoid LaTeX and
983 # LaTeX2HTML screwing with GNU-style long options (the '--' problem).
984 join_adjacent_elements(fragment, "option")
Fred Drake4db5b461998-12-01 19:03:01 +0000985 #
986 d = {}
Fred Drake3e8f9212001-03-23 17:01:47 +0000987 for gi in events.parser.get_empties():
Fred Drake4db5b461998-12-01 19:03:01 +0000988 d[gi] = gi
Fred Drake3e8f9212001-03-23 17:01:47 +0000989 if d.has_key("author"):
990 del d["author"]
Fred Draked24167b1999-01-14 21:18:03 +0000991 if d.has_key("rfc"):
992 del d["rfc"]
Fred Drake4db5b461998-12-01 19:03:01 +0000993 knownempty = d.has_key
994 #
Fred Drake03204731998-11-23 17:02:03 +0000995 try:
Fred Drakee779d4f1999-05-10 19:36:52 +0000996 write_esis(fragment, ofp, knownempty)
Fred Drake03204731998-11-23 17:02:03 +0000997 except IOError, (err, msg):
998 # Ignore EPIPE; it just means that whoever we're writing to stopped
999 # reading. The rest of the output would be ignored. All other errors
1000 # should still be reported,
1001 if err != errno.EPIPE:
1002 raise
1003
1004
1005def main():
1006 if len(sys.argv) == 1:
1007 ifp = sys.stdin
1008 ofp = sys.stdout
1009 elif len(sys.argv) == 2:
1010 ifp = open(sys.argv[1])
1011 ofp = sys.stdout
1012 elif len(sys.argv) == 3:
1013 ifp = open(sys.argv[1])
Fred Drake3e8f9212001-03-23 17:01:47 +00001014 import StringIO
1015 ofp = StringIO.StringIO()
Fred Drake03204731998-11-23 17:02:03 +00001016 else:
1017 usage()
1018 sys.exit(2)
1019 convert(ifp, ofp)
Fred Drake3e8f9212001-03-23 17:01:47 +00001020 if len(sys.argv) == 3:
1021 fp = open(sys.argv[2], "w")
1022 fp.write(ofp.getvalue())
1023 fp.close()
1024 ofp.close()
Fred Drake03204731998-11-23 17:02:03 +00001025
1026
1027if __name__ == "__main__":
1028 main()