blob: 1e9b5c3cfa18ef0f70c5cd442e70cfcf9b9fa8b3 [file] [log] [blame]
Fred Drake03204731998-11-23 17:02:03 +00001#! /usr/bin/env python
2
Fred Drake7dab6af1999-01-28 23:59:58 +00003"""Perform massive transformations on a document tree created from the LaTeX
4of the Python documentation, and dump the ESIS data for the transformed tree.
Fred Drake03204731998-11-23 17:02:03 +00005"""
6__version__ = '$Revision$'
7
8
9import errno
Fred Drake4db5b461998-12-01 19:03:01 +000010import esistools
11import re
Fred Drake03204731998-11-23 17:02:03 +000012import string
13import sys
Fred Drake3e8f9212001-03-23 17:01:47 +000014import xml.dom
15import xml.dom.minidom
Fred Drakee779d4f1999-05-10 19:36:52 +000016
Fred Drake3e8f9212001-03-23 17:01:47 +000017ELEMENT = xml.dom.Node.ELEMENT_NODE
18ENTITY_REFERENCE = xml.dom.Node.ENTITY_REFERENCE_NODE
19TEXT = xml.dom.Node.TEXT_NODE
Fred Drake03204731998-11-23 17:02:03 +000020
21
Fred Drakef8ebb551999-01-14 19:45:38 +000022class ConversionError(Exception):
23 pass
24
25
Fred Drake080c1b51999-08-02 14:46:15 +000026ewrite = sys.stderr.write
27try:
28 # We can only do this trick on Unix (if tput is on $PATH)!
29 if sys.platform != "posix" or not sys.stderr.isatty():
30 raise ImportError
Fred Drake080c1b51999-08-02 14:46:15 +000031 import commands
32except ImportError:
33 bwrite = ewrite
34else:
35 def bwrite(s, BOLDON=commands.getoutput("tput bold"),
36 BOLDOFF=commands.getoutput("tput sgr0")):
37 ewrite("%s%s%s" % (BOLDON, s, BOLDOFF))
38
39
Fred Drake865e9ff1999-07-29 22:23:19 +000040PARA_ELEMENT = "para"
41
Fred Drakefcc59101999-01-06 22:50:52 +000042DEBUG_PARA_FIXER = 0
43
Fred Drake7dab6af1999-01-28 23:59:58 +000044if DEBUG_PARA_FIXER:
45 def para_msg(s):
Fred Drake080c1b51999-08-02 14:46:15 +000046 ewrite("*** %s\n" % s)
Fred Drake7dab6af1999-01-28 23:59:58 +000047else:
48 def para_msg(s):
49 pass
50
Fred Drakefcc59101999-01-06 22:50:52 +000051
Fred Drake03204731998-11-23 17:02:03 +000052def get_first_element(doc, gi):
53 for n in doc.childNodes:
Fred Drake3e8f9212001-03-23 17:01:47 +000054 if n.nodeName == gi:
Fred Drake03204731998-11-23 17:02:03 +000055 return n
56
57def extract_first_element(doc, gi):
58 node = get_first_element(doc, gi)
59 if node is not None:
60 doc.removeChild(node)
61 return node
62
63
Fred Drake3e8f9212001-03-23 17:01:47 +000064def get_documentElement(node):
65 result = None
66 for child in node.childNodes:
67 if child.nodeType == ELEMENT:
68 result = child
69 return result
70
71
72def set_tagName(elem, gi):
73 elem.nodeName = elem.tagName = gi
74
75
Fred Drake7dab6af1999-01-28 23:59:58 +000076def find_all_elements(doc, gi):
77 nodes = []
Fred Drake3e8f9212001-03-23 17:01:47 +000078 if doc.nodeName == gi:
Fred Drake7dab6af1999-01-28 23:59:58 +000079 nodes.append(doc)
80 for child in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +000081 if child.nodeType == ELEMENT:
Fred Drake3e8f9212001-03-23 17:01:47 +000082 if child.tagName == gi:
Fred Drake7dab6af1999-01-28 23:59:58 +000083 nodes.append(child)
84 for node in child.getElementsByTagName(gi):
85 nodes.append(node)
Fred Drake865e9ff1999-07-29 22:23:19 +000086 return nodes
87
Fred Drake080c1b51999-08-02 14:46:15 +000088def find_all_child_elements(doc, gi):
89 nodes = []
90 for child in doc.childNodes:
Fred Drake3e8f9212001-03-23 17:01:47 +000091 if child.nodeName == gi:
Fred Drakeabf8a1d1999-08-20 14:28:29 +000092 nodes.append(child)
Fred Drake080c1b51999-08-02 14:46:15 +000093 return nodes
94
Fred Drake3e8f9212001-03-23 17:01:47 +000095
Fred Drake080c1b51999-08-02 14:46:15 +000096def find_all_elements_from_set(doc, gi_set):
97 return __find_all_elements_from_set(doc, gi_set, [])
98
99def __find_all_elements_from_set(doc, gi_set, nodes):
Fred Drake3e8f9212001-03-23 17:01:47 +0000100 if doc.nodeName in gi_set:
Fred Drake865e9ff1999-07-29 22:23:19 +0000101 nodes.append(doc)
102 for child in doc.childNodes:
Fred Drake3e8f9212001-03-23 17:01:47 +0000103 if child.nodeType == ELEMENT:
Fred Drake080c1b51999-08-02 14:46:15 +0000104 __find_all_elements_from_set(child, gi_set, nodes)
Fred Drake865e9ff1999-07-29 22:23:19 +0000105 return nodes
Fred Drake7dab6af1999-01-28 23:59:58 +0000106
107
Fred Drakee779d4f1999-05-10 19:36:52 +0000108def simplify(doc, fragment):
Fred Drake03204731998-11-23 17:02:03 +0000109 # Try to rationalize the document a bit, since these things are simply
110 # not valid SGML/XML documents as they stand, and need a little work.
111 documentclass = "document"
112 inputs = []
Fred Drakee779d4f1999-05-10 19:36:52 +0000113 node = extract_first_element(fragment, "documentclass")
Fred Drake03204731998-11-23 17:02:03 +0000114 if node is not None:
115 documentclass = node.getAttribute("classname")
Fred Drakee779d4f1999-05-10 19:36:52 +0000116 node = extract_first_element(fragment, "title")
Fred Drake03204731998-11-23 17:02:03 +0000117 if node is not None:
118 inputs.append(node)
119 # update the name of the root element
Fred Drakee779d4f1999-05-10 19:36:52 +0000120 node = get_first_element(fragment, "document")
Fred Drake03204731998-11-23 17:02:03 +0000121 if node is not None:
Fred Drake3e8f9212001-03-23 17:01:47 +0000122 set_tagName(node, documentclass)
Fred Drake03204731998-11-23 17:02:03 +0000123 while 1:
Fred Drakee779d4f1999-05-10 19:36:52 +0000124 node = extract_first_element(fragment, "input")
Fred Drake03204731998-11-23 17:02:03 +0000125 if node is None:
126 break
127 inputs.append(node)
128 if inputs:
Fred Drakee779d4f1999-05-10 19:36:52 +0000129 docelem = get_documentElement(fragment)
Fred Drake03204731998-11-23 17:02:03 +0000130 inputs.reverse()
131 for node in inputs:
132 text = doc.createTextNode("\n")
133 docelem.insertBefore(text, docelem.firstChild)
134 docelem.insertBefore(node, text)
135 docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
Fred Drake3e8f9212001-03-23 17:01:47 +0000136 while fragment.firstChild and fragment.firstChild.nodeType == TEXT:
Fred Drakee779d4f1999-05-10 19:36:52 +0000137 fragment.removeChild(fragment.firstChild)
Fred Drake03204731998-11-23 17:02:03 +0000138
139
140def cleanup_root_text(doc):
141 discards = []
142 skip = 0
143 for n in doc.childNodes:
144 prevskip = skip
145 skip = 0
Fred Drake3e8f9212001-03-23 17:01:47 +0000146 if n.nodeType == TEXT and not prevskip:
Fred Drake03204731998-11-23 17:02:03 +0000147 discards.append(n)
Fred Drake3e8f9212001-03-23 17:01:47 +0000148 elif n.nodeName == "COMMENT":
Fred Drake03204731998-11-23 17:02:03 +0000149 skip = 1
150 for node in discards:
151 doc.removeChild(node)
152
153
Fred Drakecb657811999-01-29 20:55:07 +0000154DESCRIPTOR_ELEMENTS = (
155 "cfuncdesc", "cvardesc", "ctypedesc",
156 "classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni",
157 "excdesc", "funcdesc", "funcdescni", "opcodedesc",
158 "datadesc", "datadescni",
159 )
160
Fred Drakee779d4f1999-05-10 19:36:52 +0000161def fixup_descriptors(doc, fragment):
162 sections = find_all_elements(fragment, "section")
Fred Drake3a7ff991999-01-29 21:31:12 +0000163 for section in sections:
164 find_and_fix_descriptors(doc, section)
165
166
167def find_and_fix_descriptors(doc, container):
168 children = container.childNodes
169 for child in children:
Fred Drake3e8f9212001-03-23 17:01:47 +0000170 if child.nodeType == ELEMENT:
171 tagName = child.tagName
Fred Drake3a7ff991999-01-29 21:31:12 +0000172 if tagName in DESCRIPTOR_ELEMENTS:
173 rewrite_descriptor(doc, child)
174 elif tagName == "subsection":
175 find_and_fix_descriptors(doc, child)
176
Fred Drakecb657811999-01-29 20:55:07 +0000177
178def rewrite_descriptor(doc, descriptor):
179 #
180 # Do these things:
Fred Drake080c1b51999-08-02 14:46:15 +0000181 # 1. Add an "index='no'" attribute to the element if the tagName
Fred Drakecb657811999-01-29 20:55:07 +0000182 # ends in 'ni', removing the 'ni' from the name.
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000183 # 2. Create a <signature> from the name attribute
184 # 2a.Create an <args> if it appears to be available.
Fred Drakecb657811999-01-29 20:55:07 +0000185 # 3. Create additional <signature>s from <*line{,ni}> elements,
186 # if found.
Fred Drake1dd152d1999-01-29 22:12:29 +0000187 # 4. If a <versionadded> is found, move it to an attribute on the
188 # descriptor.
189 # 5. Move remaining child nodes to a <description> element.
190 # 6. Put it back together.
Fred Drakecb657811999-01-29 20:55:07 +0000191 #
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000192 # 1.
Fred Drake3e8f9212001-03-23 17:01:47 +0000193 descname = descriptor.tagName
Fred Drakecb657811999-01-29 20:55:07 +0000194 index = 1
195 if descname[-2:] == "ni":
196 descname = descname[:-2]
Fred Drake080c1b51999-08-02 14:46:15 +0000197 descriptor.setAttribute("index", "no")
Fred Drake3e8f9212001-03-23 17:01:47 +0000198 set_tagName(descriptor, descname)
Fred Drakecb657811999-01-29 20:55:07 +0000199 index = 0
200 desctype = descname[:-4] # remove 'desc'
201 linename = desctype + "line"
202 if not index:
203 linename = linename + "ni"
204 # 2.
205 signature = doc.createElement("signature")
206 name = doc.createElement("name")
207 signature.appendChild(doc.createTextNode("\n "))
208 signature.appendChild(name)
209 name.appendChild(doc.createTextNode(descriptor.getAttribute("name")))
210 descriptor.removeAttribute("name")
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000211 # 2a.
Fred Drake3e8f9212001-03-23 17:01:47 +0000212 if descriptor.hasAttribute("var"):
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000213 if descname != "opcodedesc":
214 raise RuntimeError, \
215 "got 'var' attribute on descriptor other than opcodedesc"
Fred Drakecb657811999-01-29 20:55:07 +0000216 variable = descriptor.getAttribute("var")
217 if variable:
218 args = doc.createElement("args")
219 args.appendChild(doc.createTextNode(variable))
Fred Drake7dab6af1999-01-28 23:59:58 +0000220 signature.appendChild(doc.createTextNode("\n "))
Fred Drakecb657811999-01-29 20:55:07 +0000221 signature.appendChild(args)
222 descriptor.removeAttribute("var")
223 newchildren = [signature]
224 children = descriptor.childNodes
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000225 pos = skip_leading_nodes(children)
Fred Drakecb657811999-01-29 20:55:07 +0000226 if pos < len(children):
227 child = children[pos]
Fred Drake645af9f1999-11-23 21:52:03 +0000228 if child.nodeName == "args":
229 # move <args> to <signature>, or remove if empty:
230 child.parentNode.removeChild(child)
231 if len(child.childNodes):
232 signature.appendChild(doc.createTextNode("\n "))
233 signature.appendChild(child)
Fred Drakecb657811999-01-29 20:55:07 +0000234 signature.appendChild(doc.createTextNode("\n "))
Fred Drake1dd152d1999-01-29 22:12:29 +0000235 # 3, 4.
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000236 pos = skip_leading_nodes(children, pos)
Fred Drakecb657811999-01-29 20:55:07 +0000237 while pos < len(children) \
Fred Drake3e8f9212001-03-23 17:01:47 +0000238 and children[pos].nodeName in (linename, "versionadded"):
239 if children[pos].tagName == linename:
Fred Drake1dd152d1999-01-29 22:12:29 +0000240 # this is really a supplemental signature, create <signature>
Fred Drake3e8f9212001-03-23 17:01:47 +0000241 oldchild = children[pos].cloneNode(1)
242 try:
243 sig = methodline_to_signature(doc, children[pos])
244 except KeyError:
245 print oldchild.toxml()
246 raise
Fred Drake1dd152d1999-01-29 22:12:29 +0000247 newchildren.append(sig)
248 else:
249 # <versionadded added=...>
250 descriptor.setAttribute(
251 "added", children[pos].getAttribute("version"))
Fred Drakecb657811999-01-29 20:55:07 +0000252 pos = skip_leading_nodes(children, pos + 1)
Fred Drake1dd152d1999-01-29 22:12:29 +0000253 # 5.
Fred Drakecb657811999-01-29 20:55:07 +0000254 description = doc.createElement("description")
255 description.appendChild(doc.createTextNode("\n"))
256 newchildren.append(description)
257 move_children(descriptor, description, pos)
258 last = description.childNodes[-1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000259 if last.nodeType == TEXT:
Fred Drakecb657811999-01-29 20:55:07 +0000260 last.data = string.rstrip(last.data) + "\n "
Fred Drake1dd152d1999-01-29 22:12:29 +0000261 # 6.
Fred Drakecb657811999-01-29 20:55:07 +0000262 # should have nothing but whitespace and signature lines in <descriptor>;
263 # discard them
264 while descriptor.childNodes:
265 descriptor.removeChild(descriptor.childNodes[0])
266 for node in newchildren:
267 descriptor.appendChild(doc.createTextNode("\n "))
268 descriptor.appendChild(node)
269 descriptor.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000270
Fred Drake7dab6af1999-01-28 23:59:58 +0000271
272def methodline_to_signature(doc, methodline):
273 signature = doc.createElement("signature")
274 signature.appendChild(doc.createTextNode("\n "))
275 name = doc.createElement("name")
276 name.appendChild(doc.createTextNode(methodline.getAttribute("name")))
Fred Drakecb657811999-01-29 20:55:07 +0000277 methodline.removeAttribute("name")
Fred Drake7dab6af1999-01-28 23:59:58 +0000278 signature.appendChild(name)
Fred Drake7dab6af1999-01-28 23:59:58 +0000279 if len(methodline.childNodes):
Fred Drakecb657811999-01-29 20:55:07 +0000280 args = doc.createElement("args")
Fred Drake7dab6af1999-01-28 23:59:58 +0000281 signature.appendChild(doc.createTextNode("\n "))
Fred Drakecb657811999-01-29 20:55:07 +0000282 signature.appendChild(args)
283 move_children(methodline, args)
Fred Drake7dab6af1999-01-28 23:59:58 +0000284 signature.appendChild(doc.createTextNode("\n "))
285 return signature
Fred Drake03204731998-11-23 17:02:03 +0000286
287
Fred Drakecb657811999-01-29 20:55:07 +0000288def move_children(origin, dest, start=0):
289 children = origin.childNodes
290 while start < len(children):
291 node = children[start]
292 origin.removeChild(node)
293 dest.appendChild(node)
294
295
Fred Drakee779d4f1999-05-10 19:36:52 +0000296def handle_appendix(doc, fragment):
Fred Drake4db5b461998-12-01 19:03:01 +0000297 # must be called after simplfy() if document is multi-rooted to begin with
Fred Drakee779d4f1999-05-10 19:36:52 +0000298 docelem = get_documentElement(fragment)
Fred Drake3e8f9212001-03-23 17:01:47 +0000299 toplevel = docelem.tagName == "manual" and "chapter" or "section"
Fred Drake4db5b461998-12-01 19:03:01 +0000300 appendices = 0
301 nodes = []
302 for node in docelem.childNodes:
303 if appendices:
304 nodes.append(node)
Fred Drakee779d4f1999-05-10 19:36:52 +0000305 elif node.nodeType == ELEMENT:
Fred Drake4db5b461998-12-01 19:03:01 +0000306 appnodes = node.getElementsByTagName("appendix")
307 if appnodes:
308 appendices = 1
309 parent = appnodes[0].parentNode
310 parent.removeChild(appnodes[0])
311 parent.normalize()
312 if nodes:
313 map(docelem.removeChild, nodes)
314 docelem.appendChild(doc.createTextNode("\n\n\n"))
315 back = doc.createElement("back-matter")
316 docelem.appendChild(back)
317 back.appendChild(doc.createTextNode("\n"))
Fred Drakee779d4f1999-05-10 19:36:52 +0000318 while nodes and nodes[0].nodeType == TEXT \
Fred Drake4db5b461998-12-01 19:03:01 +0000319 and not string.strip(nodes[0].data):
320 del nodes[0]
321 map(back.appendChild, nodes)
322 docelem.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000323
324
Fred Drake865e9ff1999-07-29 22:23:19 +0000325def handle_labels(doc, fragment):
326 for label in find_all_elements(fragment, "label"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000327 id = label.getAttribute("id")
328 if not id:
329 continue
330 parent = label.parentNode
Fred Drake3e8f9212001-03-23 17:01:47 +0000331 parentTagName = parent.tagName
Fred Drakea20581c1999-08-26 17:51:56 +0000332 if parentTagName == "title":
Fred Drake7dab6af1999-01-28 23:59:58 +0000333 parent.parentNode.setAttribute("id", id)
334 else:
335 parent.setAttribute("id", id)
336 # now, remove <label id="..."/> from parent:
337 parent.removeChild(label)
Fred Drakea20581c1999-08-26 17:51:56 +0000338 if parentTagName == "title":
Fred Drake865e9ff1999-07-29 22:23:19 +0000339 parent.normalize()
340 children = parent.childNodes
341 if children[-1].nodeType == TEXT:
342 children[-1].data = string.rstrip(children[-1].data)
Fred Drake03204731998-11-23 17:02:03 +0000343
344
Fred Drake1ff6db41998-11-23 23:10:35 +0000345def fixup_trailing_whitespace(doc, wsmap):
346 queue = [doc]
347 while queue:
348 node = queue[0]
349 del queue[0]
Fred Drake3e8f9212001-03-23 17:01:47 +0000350 if wsmap.has_key(node.nodeName):
351 ws = wsmap[node.tagName]
Fred Drake1ff6db41998-11-23 23:10:35 +0000352 children = node.childNodes
353 children.reverse()
Fred Drakee779d4f1999-05-10 19:36:52 +0000354 if children[0].nodeType == TEXT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000355 data = string.rstrip(children[0].data) + ws
356 children[0].data = data
357 children.reverse()
358 # hack to get the title in place:
Fred Drake3e8f9212001-03-23 17:01:47 +0000359 if node.tagName == "title" \
360 and node.parentNode.firstChild.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000361 node.parentNode.insertBefore(doc.createText("\n "),
362 node.parentNode.firstChild)
363 for child in node.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000364 if child.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000365 queue.append(child)
366
367
368def normalize(doc):
369 for node in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000370 if node.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000371 node.normalize()
372
373
374def cleanup_trailing_parens(doc, element_names):
375 d = {}
376 for gi in element_names:
377 d[gi] = gi
378 rewrite_element = d.has_key
379 queue = []
380 for node in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000381 if node.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000382 queue.append(node)
383 while queue:
384 node = queue[0]
385 del queue[0]
Fred Drake3e8f9212001-03-23 17:01:47 +0000386 if rewrite_element(node.tagName):
Fred Drake1ff6db41998-11-23 23:10:35 +0000387 children = node.childNodes
388 if len(children) == 1 \
Fred Drakee779d4f1999-05-10 19:36:52 +0000389 and children[0].nodeType == TEXT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000390 data = children[0].data
391 if data[-2:] == "()":
392 children[0].data = data[:-2]
393 else:
394 for child in node.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000395 if child.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000396 queue.append(child)
397
398
Fred Drakeaaed9711998-12-10 20:25:30 +0000399def contents_match(left, right):
400 left_children = left.childNodes
401 right_children = right.childNodes
402 if len(left_children) != len(right_children):
403 return 0
404 for l, r in map(None, left_children, right_children):
405 nodeType = l.nodeType
406 if nodeType != r.nodeType:
407 return 0
Fred Drakee779d4f1999-05-10 19:36:52 +0000408 if nodeType == ELEMENT:
Fred Drake3e8f9212001-03-23 17:01:47 +0000409 if l.tagName != r.tagName:
Fred Drakeaaed9711998-12-10 20:25:30 +0000410 return 0
411 # should check attributes, but that's not a problem here
412 if not contents_match(l, r):
413 return 0
Fred Drakee779d4f1999-05-10 19:36:52 +0000414 elif nodeType == TEXT:
Fred Drakeaaed9711998-12-10 20:25:30 +0000415 if l.data != r.data:
416 return 0
417 else:
418 # not quite right, but good enough
419 return 0
420 return 1
421
422
423def create_module_info(doc, section):
424 # Heavy.
425 node = extract_first_element(section, "modulesynopsis")
426 if node is None:
427 return
Fred Drake3e8f9212001-03-23 17:01:47 +0000428 set_tagName(node, "synopsis")
Fred Drakeaaed9711998-12-10 20:25:30 +0000429 lastchild = node.childNodes[-1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000430 if lastchild.nodeType == TEXT \
Fred Drakeaaed9711998-12-10 20:25:30 +0000431 and lastchild.data[-1:] == ".":
432 lastchild.data = lastchild.data[:-1]
Fred Drake4259f0d1999-01-19 23:09:31 +0000433 modauthor = extract_first_element(section, "moduleauthor")
434 if modauthor:
Fred Drake3e8f9212001-03-23 17:01:47 +0000435 set_tagName(modauthor, "author")
Fred Drake4259f0d1999-01-19 23:09:31 +0000436 modauthor.appendChild(doc.createTextNode(
437 modauthor.getAttribute("name")))
438 modauthor.removeAttribute("name")
Fred Drake87a42cd1999-03-11 17:35:12 +0000439 platform = extract_first_element(section, "platform")
Fred Drake3e8f9212001-03-23 17:01:47 +0000440 if section.tagName == "section":
Fred Drakeaaed9711998-12-10 20:25:30 +0000441 modinfo_pos = 2
442 modinfo = doc.createElement("moduleinfo")
443 moddecl = extract_first_element(section, "declaremodule")
444 name = None
445 if moddecl:
446 modinfo.appendChild(doc.createTextNode("\n "))
447 name = moddecl.attributes["name"].value
448 namenode = doc.createElement("name")
449 namenode.appendChild(doc.createTextNode(name))
450 modinfo.appendChild(namenode)
451 type = moddecl.attributes.get("type")
452 if type:
453 type = type.value
454 modinfo.appendChild(doc.createTextNode("\n "))
455 typenode = doc.createElement("type")
456 typenode.appendChild(doc.createTextNode(type))
457 modinfo.appendChild(typenode)
Fred Drake1dd152d1999-01-29 22:12:29 +0000458 versionadded = extract_first_element(section, "versionadded")
459 if versionadded:
460 modinfo.setAttribute("added", versionadded.getAttribute("version"))
Fred Drakeaaed9711998-12-10 20:25:30 +0000461 title = get_first_element(section, "title")
462 if title:
463 children = title.childNodes
464 if len(children) >= 2 \
Fred Drake3e8f9212001-03-23 17:01:47 +0000465 and children[0].nodeName == "module" \
Fred Drakeaaed9711998-12-10 20:25:30 +0000466 and children[0].childNodes[0].data == name:
467 # this is it; morph the <title> into <short-synopsis>
468 first_data = children[1]
469 if first_data.data[:4] == " ---":
470 first_data.data = string.lstrip(first_data.data[4:])
Fred Drake3e8f9212001-03-23 17:01:47 +0000471 set_tagName(title, "short-synopsis")
Fred Drakee779d4f1999-05-10 19:36:52 +0000472 if children[-1].nodeType == TEXT \
Fred Drake7dab6af1999-01-28 23:59:58 +0000473 and children[-1].data[-1:] == ".":
Fred Drakeaaed9711998-12-10 20:25:30 +0000474 children[-1].data = children[-1].data[:-1]
475 section.removeChild(title)
476 section.removeChild(section.childNodes[0])
477 title.removeChild(children[0])
478 modinfo_pos = 0
479 else:
Fred Drake080c1b51999-08-02 14:46:15 +0000480 ewrite("module name in title doesn't match"
481 " <declaremodule/>; no <short-synopsis/>\n")
Fred Drakeaaed9711998-12-10 20:25:30 +0000482 else:
Fred Drake080c1b51999-08-02 14:46:15 +0000483 ewrite("Unexpected condition: <section/> without <title/>\n")
Fred Drakeaaed9711998-12-10 20:25:30 +0000484 modinfo.appendChild(doc.createTextNode("\n "))
485 modinfo.appendChild(node)
486 if title and not contents_match(title, node):
487 # The short synopsis is actually different,
488 # and needs to be stored:
489 modinfo.appendChild(doc.createTextNode("\n "))
490 modinfo.appendChild(title)
Fred Drake4259f0d1999-01-19 23:09:31 +0000491 if modauthor:
492 modinfo.appendChild(doc.createTextNode("\n "))
493 modinfo.appendChild(modauthor)
Fred Drake87a42cd1999-03-11 17:35:12 +0000494 if platform:
495 modinfo.appendChild(doc.createTextNode("\n "))
496 modinfo.appendChild(platform)
Fred Drakeaaed9711998-12-10 20:25:30 +0000497 modinfo.appendChild(doc.createTextNode("\n "))
498 section.insertBefore(modinfo, section.childNodes[modinfo_pos])
499 section.insertBefore(doc.createTextNode("\n "), modinfo)
Fred Drake87a42cd1999-03-11 17:35:12 +0000500 #
501 # The rest of this removes extra newlines from where we cut out
502 # a lot of elements. A lot of code for minimal value, but keeps
Fred Drake080c1b51999-08-02 14:46:15 +0000503 # keeps the generated *ML from being too funny looking.
Fred Drake87a42cd1999-03-11 17:35:12 +0000504 #
505 section.normalize()
506 children = section.childNodes
507 for i in range(len(children)):
508 node = children[i]
Fred Drake3e8f9212001-03-23 17:01:47 +0000509 if node.nodeName == "moduleinfo":
Fred Drake87a42cd1999-03-11 17:35:12 +0000510 nextnode = children[i+1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000511 if nextnode.nodeType == TEXT:
Fred Drake87a42cd1999-03-11 17:35:12 +0000512 data = nextnode.data
513 if len(string.lstrip(data)) < (len(data) - 4):
514 nextnode.data = "\n\n\n" + string.lstrip(data)
Fred Drakeaaed9711998-12-10 20:25:30 +0000515
516
Fred Drake080c1b51999-08-02 14:46:15 +0000517def cleanup_synopses(doc, fragment):
518 for node in find_all_elements(fragment, "section"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000519 create_module_info(doc, node)
Fred Drakeaaed9711998-12-10 20:25:30 +0000520
521
Fred Drakee779d4f1999-05-10 19:36:52 +0000522def fixup_table_structures(doc, fragment):
Fred Drakee779d4f1999-05-10 19:36:52 +0000523 for table in find_all_elements(fragment, "table"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000524 fixup_table(doc, table)
525
Fred Drakef8ebb551999-01-14 19:45:38 +0000526
527def fixup_table(doc, table):
528 # create the table head
529 thead = doc.createElement("thead")
530 row = doc.createElement("row")
531 move_elements_by_name(doc, table, row, "entry")
532 thead.appendChild(doc.createTextNode("\n "))
533 thead.appendChild(row)
534 thead.appendChild(doc.createTextNode("\n "))
535 # create the table body
536 tbody = doc.createElement("tbody")
537 prev_row = None
538 last_was_hline = 0
539 children = table.childNodes
540 for child in children:
Fred Drakee779d4f1999-05-10 19:36:52 +0000541 if child.nodeType == ELEMENT:
Fred Drake3e8f9212001-03-23 17:01:47 +0000542 tagName = child.tagName
Fred Drakef8ebb551999-01-14 19:45:38 +0000543 if tagName == "hline" and prev_row is not None:
544 prev_row.setAttribute("rowsep", "1")
545 elif tagName == "row":
546 prev_row = child
547 # save the rows:
548 tbody.appendChild(doc.createTextNode("\n "))
549 move_elements_by_name(doc, table, tbody, "row", sep="\n ")
550 # and toss the rest:
551 while children:
552 child = children[0]
553 nodeType = child.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000554 if nodeType == TEXT:
Fred Drakef8ebb551999-01-14 19:45:38 +0000555 if string.strip(child.data):
Fred Drake3e8f9212001-03-23 17:01:47 +0000556 raise ConversionError("unexpected free data in <%s>: %r"
557 % (table.tagName, child.data))
Fred Drakef8ebb551999-01-14 19:45:38 +0000558 table.removeChild(child)
559 continue
Fred Drakee779d4f1999-05-10 19:36:52 +0000560 if nodeType == ELEMENT:
Fred Drake3e8f9212001-03-23 17:01:47 +0000561 if child.tagName != "hline":
Fred Drakef8ebb551999-01-14 19:45:38 +0000562 raise ConversionError(
Fred Drake3e8f9212001-03-23 17:01:47 +0000563 "unexpected <%s> in table" % child.tagName)
Fred Drakef8ebb551999-01-14 19:45:38 +0000564 table.removeChild(child)
565 continue
566 raise ConversionError(
567 "unexpected %s node in table" % child.__class__.__name__)
568 # nothing left in the <table>; add the <thead> and <tbody>
569 tgroup = doc.createElement("tgroup")
570 tgroup.appendChild(doc.createTextNode("\n "))
571 tgroup.appendChild(thead)
572 tgroup.appendChild(doc.createTextNode("\n "))
573 tgroup.appendChild(tbody)
574 tgroup.appendChild(doc.createTextNode("\n "))
575 table.appendChild(tgroup)
576 # now make the <entry>s look nice:
577 for row in table.getElementsByTagName("row"):
578 fixup_row(doc, row)
579
580
581def fixup_row(doc, row):
582 entries = []
583 map(entries.append, row.childNodes[1:])
584 for entry in entries:
585 row.insertBefore(doc.createTextNode("\n "), entry)
586# row.appendChild(doc.createTextNode("\n "))
587
588
589def move_elements_by_name(doc, source, dest, name, sep=None):
590 nodes = []
591 for child in source.childNodes:
Fred Drake3e8f9212001-03-23 17:01:47 +0000592 if child.nodeName == name:
Fred Drakef8ebb551999-01-14 19:45:38 +0000593 nodes.append(child)
594 for node in nodes:
595 source.removeChild(node)
596 dest.appendChild(node)
597 if sep:
598 dest.appendChild(doc.createTextNode(sep))
599
600
Fred Drake7dab6af1999-01-28 23:59:58 +0000601RECURSE_INTO_PARA_CONTAINERS = (
Fred Drakecb657811999-01-29 20:55:07 +0000602 "chapter", "abstract", "enumerate",
Fred Drake7dab6af1999-01-28 23:59:58 +0000603 "section", "subsection", "subsubsection",
Fred Drake865e9ff1999-07-29 22:23:19 +0000604 "paragraph", "subparagraph", "back-matter",
Fred Drakecb657811999-01-29 20:55:07 +0000605 "howto", "manual",
Fred Drake82ebc271999-08-03 15:32:48 +0000606 "item", "itemize", "fulllineitems", "enumeration", "descriptionlist",
607 "definitionlist", "definition",
Fred Drake4259f0d1999-01-19 23:09:31 +0000608 )
Fred Drakefcc59101999-01-06 22:50:52 +0000609
610PARA_LEVEL_ELEMENTS = (
Fred Drakecb657811999-01-29 20:55:07 +0000611 "moduleinfo", "title", "verbatim", "enumerate", "item",
Fred Drake865e9ff1999-07-29 22:23:19 +0000612 "interpreter-session", "back-matter", "interactive-session",
Fred Drakecb657811999-01-29 20:55:07 +0000613 "opcodedesc", "classdesc", "datadesc",
Fred Drake865e9ff1999-07-29 22:23:19 +0000614 "funcdesc", "methoddesc", "excdesc", "memberdesc", "membderdescni",
Fred Drake7dab6af1999-01-28 23:59:58 +0000615 "funcdescni", "methoddescni", "excdescni",
Fred Drakefcc59101999-01-06 22:50:52 +0000616 "tableii", "tableiii", "tableiv", "localmoduletable",
Fred Drake82ebc271999-08-03 15:32:48 +0000617 "sectionauthor", "seealso", "itemize",
Fred Drakefcc59101999-01-06 22:50:52 +0000618 # include <para>, so we can just do it again to get subsequent paras:
Fred Drake865e9ff1999-07-29 22:23:19 +0000619 PARA_ELEMENT,
Fred Drakefcc59101999-01-06 22:50:52 +0000620 )
621
622PARA_LEVEL_PRECEEDERS = (
Fred Drake82ebc271999-08-03 15:32:48 +0000623 "setindexsubitem",
Fred Drakecb657811999-01-29 20:55:07 +0000624 "stindex", "obindex", "COMMENT", "label", "input", "title",
Fred Drake865e9ff1999-07-29 22:23:19 +0000625 "versionadded", "versionchanged", "declaremodule", "modulesynopsis",
Fred Drake82ebc271999-08-03 15:32:48 +0000626 "moduleauthor", "indexterm", "leader",
Fred Drakefcc59101999-01-06 22:50:52 +0000627 )
628
Fred Drake7dab6af1999-01-28 23:59:58 +0000629
Fred Drakee779d4f1999-05-10 19:36:52 +0000630def fixup_paras(doc, fragment):
631 for child in fragment.childNodes:
Fred Drake3e8f9212001-03-23 17:01:47 +0000632 if child.nodeName in RECURSE_INTO_PARA_CONTAINERS:
Fred Drakefcc59101999-01-06 22:50:52 +0000633 fixup_paras_helper(doc, child)
Fred Drakee779d4f1999-05-10 19:36:52 +0000634 descriptions = find_all_elements(fragment, "description")
Fred Drakecb657811999-01-29 20:55:07 +0000635 for description in descriptions:
636 fixup_paras_helper(doc, description)
Fred Drakefcc59101999-01-06 22:50:52 +0000637
638
Fred Drake7dab6af1999-01-28 23:59:58 +0000639def fixup_paras_helper(doc, container, depth=0):
Fred Drakefcc59101999-01-06 22:50:52 +0000640 # document is already normalized
641 children = container.childNodes
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000642 start = skip_leading_nodes(children)
Fred Drake7dab6af1999-01-28 23:59:58 +0000643 while len(children) > start:
Fred Drake3e8f9212001-03-23 17:01:47 +0000644 if children[start].nodeName in RECURSE_INTO_PARA_CONTAINERS:
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000645 # Something to recurse into:
Fred Drake7dab6af1999-01-28 23:59:58 +0000646 fixup_paras_helper(doc, children[start])
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000647 else:
648 # Paragraph material:
649 build_para(doc, container, start, len(children))
650 if DEBUG_PARA_FIXER and depth == 10:
651 sys.exit(1)
652 start = skip_leading_nodes(children, start + 1)
Fred Drakefcc59101999-01-06 22:50:52 +0000653
654
655def build_para(doc, parent, start, i):
656 children = parent.childNodes
Fred Drakefcc59101999-01-06 22:50:52 +0000657 after = start + 1
658 have_last = 0
Fred Drakecb657811999-01-29 20:55:07 +0000659 BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + RECURSE_INTO_PARA_CONTAINERS
Fred Drake7dab6af1999-01-28 23:59:58 +0000660 # Collect all children until \n\n+ is found in a text node or a
661 # member of BREAK_ELEMENTS is found.
Fred Drakefcc59101999-01-06 22:50:52 +0000662 for j in range(start, i):
663 after = j + 1
664 child = children[j]
665 nodeType = child.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000666 if nodeType == ELEMENT:
Fred Drake3e8f9212001-03-23 17:01:47 +0000667 if child.tagName in BREAK_ELEMENTS:
Fred Drakefcc59101999-01-06 22:50:52 +0000668 after = j
669 break
Fred Drakee779d4f1999-05-10 19:36:52 +0000670 elif nodeType == TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000671 pos = string.find(child.data, "\n\n")
672 if pos == 0:
673 after = j
674 break
675 if pos >= 1:
676 child.splitText(pos)
677 break
678 else:
679 have_last = 1
Fred Drake7dab6af1999-01-28 23:59:58 +0000680 if (start + 1) > after:
681 raise ConversionError(
682 "build_para() could not identify content to turn into a paragraph")
Fred Drakee779d4f1999-05-10 19:36:52 +0000683 if children[after - 1].nodeType == TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000684 # we may need to split off trailing white space:
685 child = children[after - 1]
686 data = child.data
687 if string.rstrip(data) != data:
688 have_last = 0
689 child.splitText(len(string.rstrip(data)))
Fred Drake865e9ff1999-07-29 22:23:19 +0000690 para = doc.createElement(PARA_ELEMENT)
Fred Drakefcc59101999-01-06 22:50:52 +0000691 prev = None
692 indexes = range(start, after)
693 indexes.reverse()
694 for j in indexes:
Fred Drake7dab6af1999-01-28 23:59:58 +0000695 node = parent.childNodes[j]
Fred Drakefcc59101999-01-06 22:50:52 +0000696 parent.removeChild(node)
697 para.insertBefore(node, prev)
698 prev = node
699 if have_last:
700 parent.appendChild(para)
Fred Drake080c1b51999-08-02 14:46:15 +0000701 parent.appendChild(doc.createTextNode("\n\n"))
Fred Drake7dab6af1999-01-28 23:59:58 +0000702 return len(parent.childNodes)
Fred Drakefcc59101999-01-06 22:50:52 +0000703 else:
Fred Drake080c1b51999-08-02 14:46:15 +0000704 nextnode = parent.childNodes[start]
705 if nextnode.nodeType == TEXT:
706 if nextnode.data and nextnode.data[0] != "\n":
707 nextnode.data = "\n" + nextnode.data
708 else:
709 newnode = doc.createTextNode("\n")
710 parent.insertBefore(newnode, nextnode)
711 nextnode = newnode
712 start = start + 1
713 parent.insertBefore(para, nextnode)
Fred Drake7dab6af1999-01-28 23:59:58 +0000714 return start + 1
Fred Drakefcc59101999-01-06 22:50:52 +0000715
716
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000717def skip_leading_nodes(children, start=0):
Fred Drake7dab6af1999-01-28 23:59:58 +0000718 """Return index into children of a node at which paragraph building should
719 begin or a recursive call to fixup_paras_helper() should be made (for
720 subsections, etc.).
721
722 When the return value >= len(children), we've built all the paras we can
723 from this list of children.
724 """
725 i = len(children)
Fred Drakefcc59101999-01-06 22:50:52 +0000726 while i > start:
727 # skip over leading comments and whitespace:
Fred Drake7dab6af1999-01-28 23:59:58 +0000728 child = children[start]
Fred Drakefcc59101999-01-06 22:50:52 +0000729 nodeType = child.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000730 if nodeType == TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000731 data = child.data
732 shortened = string.lstrip(data)
733 if shortened:
734 if data != shortened:
735 # break into two nodes: whitespace and non-whitespace
736 child.splitText(len(data) - len(shortened))
Fred Drake7dab6af1999-01-28 23:59:58 +0000737 return start + 1
738 return start
Fred Drakefcc59101999-01-06 22:50:52 +0000739 # all whitespace, just skip
Fred Drakee779d4f1999-05-10 19:36:52 +0000740 elif nodeType == ELEMENT:
Fred Drake3e8f9212001-03-23 17:01:47 +0000741 tagName = child.tagName
Fred Drake7dab6af1999-01-28 23:59:58 +0000742 if tagName in RECURSE_INTO_PARA_CONTAINERS:
743 return start
744 if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
745 return start
746 start = start + 1
747 return start
Fred Drakefba0ba21998-12-10 05:07:09 +0000748
749
Fred Drakee779d4f1999-05-10 19:36:52 +0000750def fixup_rfc_references(doc, fragment):
751 for rfcnode in find_all_elements(fragment, "rfc"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000752 rfcnode.appendChild(doc.createTextNode(
753 "RFC " + rfcnode.getAttribute("num")))
Fred Draked24167b1999-01-14 21:18:03 +0000754
755
Fred Drakee779d4f1999-05-10 19:36:52 +0000756def fixup_signatures(doc, fragment):
757 for child in fragment.childNodes:
758 if child.nodeType == ELEMENT:
Fred Draked24167b1999-01-14 21:18:03 +0000759 args = child.getElementsByTagName("args")
760 for arg in args:
761 fixup_args(doc, arg)
Fred Drake7dab6af1999-01-28 23:59:58 +0000762 arg.normalize()
Fred Draked24167b1999-01-14 21:18:03 +0000763 args = child.getElementsByTagName("constructor-args")
764 for arg in args:
765 fixup_args(doc, arg)
766 arg.normalize()
767
768
769def fixup_args(doc, arglist):
770 for child in arglist.childNodes:
Fred Drake3e8f9212001-03-23 17:01:47 +0000771 if child.nodeName == "optional":
Fred Draked24167b1999-01-14 21:18:03 +0000772 # found it; fix and return
773 arglist.insertBefore(doc.createTextNode("["), child)
774 optkids = child.childNodes
775 while optkids:
776 k = optkids[0]
777 child.removeChild(k)
778 arglist.insertBefore(k, child)
779 arglist.insertBefore(doc.createTextNode("]"), child)
780 arglist.removeChild(child)
781 return fixup_args(doc, arglist)
782
783
Fred Drakee779d4f1999-05-10 19:36:52 +0000784def fixup_sectionauthors(doc, fragment):
785 for sectauth in find_all_elements(fragment, "sectionauthor"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000786 section = sectauth.parentNode
787 section.removeChild(sectauth)
Fred Drake3e8f9212001-03-23 17:01:47 +0000788 set_tagName(sectauth, "author")
Fred Drake7dab6af1999-01-28 23:59:58 +0000789 sectauth.appendChild(doc.createTextNode(
790 sectauth.getAttribute("name")))
791 sectauth.removeAttribute("name")
792 after = section.childNodes[2]
793 title = section.childNodes[1]
Fred Drake3e8f9212001-03-23 17:01:47 +0000794 if title.nodeName != "title":
Fred Drake7dab6af1999-01-28 23:59:58 +0000795 after = section.childNodes[0]
796 section.insertBefore(doc.createTextNode("\n "), after)
797 section.insertBefore(sectauth, after)
798
799
Fred Drake93d762f1999-02-18 16:32:21 +0000800def fixup_verbatims(doc):
801 for verbatim in find_all_elements(doc, "verbatim"):
802 child = verbatim.childNodes[0]
Fred Drakee779d4f1999-05-10 19:36:52 +0000803 if child.nodeType == TEXT \
Fred Drake93d762f1999-02-18 16:32:21 +0000804 and string.lstrip(child.data)[:3] == ">>>":
Fred Drake3e8f9212001-03-23 17:01:47 +0000805 set_tagName(verbatim, "interactive-session")
Fred Drake93d762f1999-02-18 16:32:21 +0000806
807
Fred Drake865e9ff1999-07-29 22:23:19 +0000808def add_node_ids(fragment, counter=0):
Fred Drake3e8f9212001-03-23 17:01:47 +0000809 fragment.node_id = counter
Fred Drake865e9ff1999-07-29 22:23:19 +0000810 for node in fragment.childNodes:
811 counter = counter + 1
812 if node.nodeType == ELEMENT:
813 counter = add_node_ids(node, counter)
814 else:
Fred Drake3e8f9212001-03-23 17:01:47 +0000815 node.node_id = counter
Fred Drake865e9ff1999-07-29 22:23:19 +0000816 return counter + 1
817
818
819REFMODINDEX_ELEMENTS = ('refmodindex', 'refbimodindex',
820 'refexmodindex', 'refstmodindex')
821
822def fixup_refmodindexes(fragment):
823 # Locate <ref*modindex>...</> co-located with <module>...</>, and
824 # remove the <ref*modindex>, replacing it with index=index on the
825 # <module> element.
826 nodes = find_all_elements_from_set(fragment, REFMODINDEX_ELEMENTS)
827 d = {}
828 for node in nodes:
829 parent = node.parentNode
Fred Drake3e8f9212001-03-23 17:01:47 +0000830 d[parent.node_id] = parent
Fred Drake865e9ff1999-07-29 22:23:19 +0000831 del nodes
832 map(fixup_refmodindexes_chunk, d.values())
833
834
835def fixup_refmodindexes_chunk(container):
836 # node is probably a <para>; let's see how often it isn't:
Fred Drake3e8f9212001-03-23 17:01:47 +0000837 if container.tagName != PARA_ELEMENT:
Fred Drake080c1b51999-08-02 14:46:15 +0000838 bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container)
Fred Drake865e9ff1999-07-29 22:23:19 +0000839 module_entries = find_all_elements(container, "module")
840 if not module_entries:
841 return
842 index_entries = find_all_elements_from_set(container, REFMODINDEX_ELEMENTS)
843 removes = []
844 for entry in index_entries:
845 children = entry.childNodes
846 if len(children) != 0:
Fred Drake080c1b51999-08-02 14:46:15 +0000847 bwrite("--- unexpected number of children for %s node:\n"
Fred Drake3e8f9212001-03-23 17:01:47 +0000848 % entry.tagName)
Fred Drake080c1b51999-08-02 14:46:15 +0000849 ewrite(entry.toxml() + "\n")
Fred Drake865e9ff1999-07-29 22:23:19 +0000850 continue
851 found = 0
Fred Drake82ebc271999-08-03 15:32:48 +0000852 module_name = entry.getAttribute("module")
Fred Drake865e9ff1999-07-29 22:23:19 +0000853 for node in module_entries:
854 if len(node.childNodes) != 1:
855 continue
856 this_name = node.childNodes[0].data
857 if this_name == module_name:
858 found = 1
Fred Drake080c1b51999-08-02 14:46:15 +0000859 node.setAttribute("index", "yes")
Fred Drake865e9ff1999-07-29 22:23:19 +0000860 if found:
861 removes.append(entry)
862 for node in removes:
863 container.removeChild(node)
864
865
866def fixup_bifuncindexes(fragment):
867 nodes = find_all_elements(fragment, 'bifuncindex')
868 d = {}
Fred Drake080c1b51999-08-02 14:46:15 +0000869 # make sure that each parent is only processed once:
Fred Drake865e9ff1999-07-29 22:23:19 +0000870 for node in nodes:
871 parent = node.parentNode
Fred Drake3e8f9212001-03-23 17:01:47 +0000872 d[parent.node_id] = parent
Fred Drake865e9ff1999-07-29 22:23:19 +0000873 del nodes
874 map(fixup_bifuncindexes_chunk, d.values())
875
876
877def fixup_bifuncindexes_chunk(container):
878 removes = []
Fred Drake080c1b51999-08-02 14:46:15 +0000879 entries = find_all_child_elements(container, "bifuncindex")
880 function_entries = find_all_child_elements(container, "function")
Fred Drake865e9ff1999-07-29 22:23:19 +0000881 for entry in entries:
882 function_name = entry.getAttribute("name")
883 found = 0
884 for func_entry in function_entries:
885 t2 = func_entry.childNodes[0].data
886 if t2[-2:] != "()":
887 continue
888 t2 = t2[:-2]
889 if t2 == function_name:
Fred Drake080c1b51999-08-02 14:46:15 +0000890 func_entry.setAttribute("index", "yes")
Fred Drake865e9ff1999-07-29 22:23:19 +0000891 func_entry.setAttribute("module", "__builtin__")
892 if not found:
Fred Drake865e9ff1999-07-29 22:23:19 +0000893 found = 1
Fred Drake080c1b51999-08-02 14:46:15 +0000894 removes.append(entry)
Fred Drake865e9ff1999-07-29 22:23:19 +0000895 for entry in removes:
896 container.removeChild(entry)
897
898
Fred Drake645af9f1999-11-23 21:52:03 +0000899def join_adjacent_elements(container, gi):
900 queue = [container]
901 while queue:
902 parent = queue.pop()
903 i = 0
Fred Drake3e8f9212001-03-23 17:01:47 +0000904 children = parent.childNodes
Fred Drake645af9f1999-11-23 21:52:03 +0000905 nchildren = len(children)
906 while i < (nchildren - 1):
907 child = children[i]
908 if child.nodeName == gi:
909 if children[i+1].nodeName == gi:
910 ewrite("--- merging two <%s/> elements\n" % gi)
911 child = children[i]
912 nextchild = children[i+1]
Fred Drake3e8f9212001-03-23 17:01:47 +0000913 nextchildren = nextchild.childNodes
Fred Drake645af9f1999-11-23 21:52:03 +0000914 while len(nextchildren):
915 node = nextchildren[0]
916 nextchild.removeChild(node)
917 child.appendChild(node)
918 parent.removeChild(nextchild)
919 continue
920 if child.nodeType == ELEMENT:
921 queue.append(child)
922 i = i + 1
923
924
Fred Drake4db5b461998-12-01 19:03:01 +0000925_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
Fred Drakefcc59101999-01-06 22:50:52 +0000926
Fred Drake4db5b461998-12-01 19:03:01 +0000927def write_esis(doc, ofp, knownempty):
928 for node in doc.childNodes:
929 nodeType = node.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000930 if nodeType == ELEMENT:
Fred Drake3e8f9212001-03-23 17:01:47 +0000931 gi = node.tagName
Fred Drake4db5b461998-12-01 19:03:01 +0000932 if knownempty(gi):
933 if node.hasChildNodes():
Fred Drake865e9ff1999-07-29 22:23:19 +0000934 raise ValueError, \
935 "declared-empty node <%s> has children" % gi
Fred Drake4db5b461998-12-01 19:03:01 +0000936 ofp.write("e\n")
Fred Drake3e8f9212001-03-23 17:01:47 +0000937 for k, value in node.attributes.items():
Fred Drake4db5b461998-12-01 19:03:01 +0000938 if _token_rx.match(value):
939 dtype = "TOKEN"
940 else:
941 dtype = "CDATA"
942 ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
943 ofp.write("(%s\n" % gi)
944 write_esis(node, ofp, knownempty)
945 ofp.write(")%s\n" % gi)
Fred Drakee779d4f1999-05-10 19:36:52 +0000946 elif nodeType == TEXT:
Fred Drake4db5b461998-12-01 19:03:01 +0000947 ofp.write("-%s\n" % esistools.encode(node.data))
Fred Drakea20581c1999-08-26 17:51:56 +0000948 elif nodeType == ENTITY_REFERENCE:
Fred Drake3e8f9212001-03-23 17:01:47 +0000949 ofp.write("&%s\n" % node.nodeName)
Fred Drake4db5b461998-12-01 19:03:01 +0000950 else:
951 raise RuntimeError, "unsupported node type: %s" % nodeType
952
953
Fred Drake03204731998-11-23 17:02:03 +0000954def convert(ifp, ofp):
Fred Drake3e8f9212001-03-23 17:01:47 +0000955 events = esistools.parse(ifp)
956 toktype, doc = events.getEvent()
957 fragment = doc.createDocumentFragment()
958 events.expandNode(fragment)
959
Fred Drakee779d4f1999-05-10 19:36:52 +0000960 normalize(fragment)
961 simplify(doc, fragment)
Fred Drake865e9ff1999-07-29 22:23:19 +0000962 handle_labels(doc, fragment)
Fred Drakee779d4f1999-05-10 19:36:52 +0000963 handle_appendix(doc, fragment)
Fred Drake1ff6db41998-11-23 23:10:35 +0000964 fixup_trailing_whitespace(doc, {
965 "abstract": "\n",
966 "title": "",
967 "chapter": "\n\n",
968 "section": "\n\n",
969 "subsection": "\n\n",
970 "subsubsection": "\n\n",
971 "paragraph": "\n\n",
972 "subparagraph": "\n\n",
973 })
Fred Drake03204731998-11-23 17:02:03 +0000974 cleanup_root_text(doc)
Fred Drake080c1b51999-08-02 14:46:15 +0000975 cleanup_trailing_parens(fragment, ["function", "method", "cfunction"])
976 cleanup_synopses(doc, fragment)
Fred Drakee779d4f1999-05-10 19:36:52 +0000977 fixup_descriptors(doc, fragment)
978 fixup_verbatims(fragment)
979 normalize(fragment)
980 fixup_paras(doc, fragment)
981 fixup_sectionauthors(doc, fragment)
Fred Drakee779d4f1999-05-10 19:36:52 +0000982 fixup_table_structures(doc, fragment)
983 fixup_rfc_references(doc, fragment)
984 fixup_signatures(doc, fragment)
Fred Drake865e9ff1999-07-29 22:23:19 +0000985 add_node_ids(fragment)
986 fixup_refmodindexes(fragment)
987 fixup_bifuncindexes(fragment)
Fred Drake645af9f1999-11-23 21:52:03 +0000988 # Take care of ugly hacks in the LaTeX markup to avoid LaTeX and
989 # LaTeX2HTML screwing with GNU-style long options (the '--' problem).
990 join_adjacent_elements(fragment, "option")
Fred Drake4db5b461998-12-01 19:03:01 +0000991 #
992 d = {}
Fred Drake3e8f9212001-03-23 17:01:47 +0000993 for gi in events.parser.get_empties():
Fred Drake4db5b461998-12-01 19:03:01 +0000994 d[gi] = gi
Fred Drake3e8f9212001-03-23 17:01:47 +0000995 if d.has_key("author"):
996 del d["author"]
Fred Draked24167b1999-01-14 21:18:03 +0000997 if d.has_key("rfc"):
998 del d["rfc"]
Fred Drake4db5b461998-12-01 19:03:01 +0000999 knownempty = d.has_key
1000 #
Fred Drake03204731998-11-23 17:02:03 +00001001 try:
Fred Drakee779d4f1999-05-10 19:36:52 +00001002 write_esis(fragment, ofp, knownempty)
Fred Drake03204731998-11-23 17:02:03 +00001003 except IOError, (err, msg):
1004 # Ignore EPIPE; it just means that whoever we're writing to stopped
1005 # reading. The rest of the output would be ignored. All other errors
1006 # should still be reported,
1007 if err != errno.EPIPE:
1008 raise
1009
1010
1011def main():
1012 if len(sys.argv) == 1:
1013 ifp = sys.stdin
1014 ofp = sys.stdout
1015 elif len(sys.argv) == 2:
1016 ifp = open(sys.argv[1])
1017 ofp = sys.stdout
1018 elif len(sys.argv) == 3:
1019 ifp = open(sys.argv[1])
Fred Drake3e8f9212001-03-23 17:01:47 +00001020 import StringIO
1021 ofp = StringIO.StringIO()
Fred Drake03204731998-11-23 17:02:03 +00001022 else:
1023 usage()
1024 sys.exit(2)
1025 convert(ifp, ofp)
Fred Drake3e8f9212001-03-23 17:01:47 +00001026 if len(sys.argv) == 3:
1027 fp = open(sys.argv[2], "w")
1028 fp.write(ofp.getvalue())
1029 fp.close()
1030 ofp.close()
Fred Drake03204731998-11-23 17:02:03 +00001031
1032
1033if __name__ == "__main__":
1034 main()