blob: 21d3522cf51923bf3567b3f8204c39109d70caf9 [file] [log] [blame]
Fred Drake03204731998-11-23 17:02:03 +00001#! /usr/bin/env python
2
Fred Drake7dab6af1999-01-28 23:59:58 +00003"""Perform massive transformations on a document tree created from the LaTeX
4of the Python documentation, and dump the ESIS data for the transformed tree.
Fred Drake03204731998-11-23 17:02:03 +00005"""
6__version__ = '$Revision$'
7
8
9import errno
Fred Drake4db5b461998-12-01 19:03:01 +000010import esistools
11import re
Fred Drake03204731998-11-23 17:02:03 +000012import string
13import sys
14import xml.dom.core
Fred Drakee779d4f1999-05-10 19:36:52 +000015
16from xml.dom.core import \
17 ELEMENT, \
Fred Drakea20581c1999-08-26 17:51:56 +000018 ENTITY_REFERENCE, \
Fred Drakee779d4f1999-05-10 19:36:52 +000019 TEXT
Fred Drake03204731998-11-23 17:02:03 +000020
21
Fred Drakef8ebb551999-01-14 19:45:38 +000022class ConversionError(Exception):
23 pass
24
25
Fred Drake080c1b51999-08-02 14:46:15 +000026ewrite = sys.stderr.write
27try:
28 # We can only do this trick on Unix (if tput is on $PATH)!
29 if sys.platform != "posix" or not sys.stderr.isatty():
30 raise ImportError
31 import curses
32 import commands
33except ImportError:
34 bwrite = ewrite
35else:
36 def bwrite(s, BOLDON=commands.getoutput("tput bold"),
37 BOLDOFF=commands.getoutput("tput sgr0")):
38 ewrite("%s%s%s" % (BOLDON, s, BOLDOFF))
39
40
Fred Drake865e9ff1999-07-29 22:23:19 +000041PARA_ELEMENT = "para"
42
Fred Drakefcc59101999-01-06 22:50:52 +000043DEBUG_PARA_FIXER = 0
44
Fred Drake7dab6af1999-01-28 23:59:58 +000045if DEBUG_PARA_FIXER:
46 def para_msg(s):
Fred Drake080c1b51999-08-02 14:46:15 +000047 ewrite("*** %s\n" % s)
Fred Drake7dab6af1999-01-28 23:59:58 +000048else:
49 def para_msg(s):
50 pass
51
Fred Drakefcc59101999-01-06 22:50:52 +000052
Fred Drake03204731998-11-23 17:02:03 +000053# Workaround to deal with invalid documents (multiple root elements). This
54# does not indicate a bug in the DOM implementation.
55#
Fred Drakee779d4f1999-05-10 19:36:52 +000056def get_documentElement(doc):
Fred Drake03204731998-11-23 17:02:03 +000057 docelem = None
Fred Drakee779d4f1999-05-10 19:36:52 +000058 for n in doc.childNodes:
59 if n.nodeType == ELEMENT:
60 docelem = n
Fred Drake03204731998-11-23 17:02:03 +000061 return docelem
62
63xml.dom.core.Document.get_documentElement = get_documentElement
64
65
66# Replace get_childNodes for the Document class; without this, children
67# accessed from the Document object via .childNodes (no matter how many
68# levels of access are used) will be given an ownerDocument of None.
69#
Fred Drakee779d4f1999-05-10 19:36:52 +000070def get_childNodes(doc):
71 return xml.dom.core.NodeList(doc._node.children, doc._node)
Fred Drake03204731998-11-23 17:02:03 +000072
73xml.dom.core.Document.get_childNodes = get_childNodes
74
75
76def get_first_element(doc, gi):
77 for n in doc.childNodes:
Fred Drakea20581c1999-08-26 17:51:56 +000078 if n.get_nodeName() == gi:
Fred Drake03204731998-11-23 17:02:03 +000079 return n
80
81def extract_first_element(doc, gi):
82 node = get_first_element(doc, gi)
83 if node is not None:
84 doc.removeChild(node)
85 return node
86
87
Fred Drake7dab6af1999-01-28 23:59:58 +000088def find_all_elements(doc, gi):
89 nodes = []
Fred Drakea20581c1999-08-26 17:51:56 +000090 if doc.get_nodeName() == gi:
Fred Drake7dab6af1999-01-28 23:59:58 +000091 nodes.append(doc)
92 for child in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +000093 if child.nodeType == ELEMENT:
Fred Drakea20581c1999-08-26 17:51:56 +000094 if child.get_tagName() == gi:
Fred Drake7dab6af1999-01-28 23:59:58 +000095 nodes.append(child)
96 for node in child.getElementsByTagName(gi):
97 nodes.append(node)
Fred Drake865e9ff1999-07-29 22:23:19 +000098 return nodes
99
Fred Drake080c1b51999-08-02 14:46:15 +0000100def find_all_child_elements(doc, gi):
101 nodes = []
102 for child in doc.childNodes:
Fred Drakea20581c1999-08-26 17:51:56 +0000103 if child.get_nodeName() == gi:
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000104 nodes.append(child)
Fred Drake080c1b51999-08-02 14:46:15 +0000105 return nodes
106
107def find_all_elements_from_set(doc, gi_set):
108 return __find_all_elements_from_set(doc, gi_set, [])
109
110def __find_all_elements_from_set(doc, gi_set, nodes):
Fred Drakea20581c1999-08-26 17:51:56 +0000111 if doc.get_nodeName() in gi_set:
Fred Drake865e9ff1999-07-29 22:23:19 +0000112 nodes.append(doc)
113 for child in doc.childNodes:
Fred Drakea20581c1999-08-26 17:51:56 +0000114 if child.get_nodeType() == ELEMENT:
Fred Drake080c1b51999-08-02 14:46:15 +0000115 __find_all_elements_from_set(child, gi_set, nodes)
Fred Drake865e9ff1999-07-29 22:23:19 +0000116 return nodes
Fred Drake7dab6af1999-01-28 23:59:58 +0000117
118
Fred Drakee779d4f1999-05-10 19:36:52 +0000119def simplify(doc, fragment):
Fred Drake03204731998-11-23 17:02:03 +0000120 # Try to rationalize the document a bit, since these things are simply
121 # not valid SGML/XML documents as they stand, and need a little work.
122 documentclass = "document"
123 inputs = []
Fred Drakee779d4f1999-05-10 19:36:52 +0000124 node = extract_first_element(fragment, "documentclass")
Fred Drake03204731998-11-23 17:02:03 +0000125 if node is not None:
126 documentclass = node.getAttribute("classname")
Fred Drakee779d4f1999-05-10 19:36:52 +0000127 node = extract_first_element(fragment, "title")
Fred Drake03204731998-11-23 17:02:03 +0000128 if node is not None:
129 inputs.append(node)
130 # update the name of the root element
Fred Drakee779d4f1999-05-10 19:36:52 +0000131 node = get_first_element(fragment, "document")
Fred Drake03204731998-11-23 17:02:03 +0000132 if node is not None:
133 node._node.name = documentclass
134 while 1:
Fred Drakee779d4f1999-05-10 19:36:52 +0000135 node = extract_first_element(fragment, "input")
Fred Drake03204731998-11-23 17:02:03 +0000136 if node is None:
137 break
138 inputs.append(node)
139 if inputs:
Fred Drakee779d4f1999-05-10 19:36:52 +0000140 docelem = get_documentElement(fragment)
Fred Drake03204731998-11-23 17:02:03 +0000141 inputs.reverse()
142 for node in inputs:
143 text = doc.createTextNode("\n")
144 docelem.insertBefore(text, docelem.firstChild)
145 docelem.insertBefore(node, text)
146 docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
Fred Drakea20581c1999-08-26 17:51:56 +0000147 while fragment.firstChild and fragment.firstChild.get_nodeType() == TEXT:
Fred Drakee779d4f1999-05-10 19:36:52 +0000148 fragment.removeChild(fragment.firstChild)
Fred Drake03204731998-11-23 17:02:03 +0000149
150
151def cleanup_root_text(doc):
152 discards = []
153 skip = 0
154 for n in doc.childNodes:
155 prevskip = skip
156 skip = 0
Fred Drakea20581c1999-08-26 17:51:56 +0000157 if n.get_nodeType() == TEXT and not prevskip:
Fred Drake03204731998-11-23 17:02:03 +0000158 discards.append(n)
Fred Drakea20581c1999-08-26 17:51:56 +0000159 elif n.get_nodeName() == "COMMENT":
Fred Drake03204731998-11-23 17:02:03 +0000160 skip = 1
161 for node in discards:
162 doc.removeChild(node)
163
164
Fred Drakecb657811999-01-29 20:55:07 +0000165DESCRIPTOR_ELEMENTS = (
166 "cfuncdesc", "cvardesc", "ctypedesc",
167 "classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni",
168 "excdesc", "funcdesc", "funcdescni", "opcodedesc",
169 "datadesc", "datadescni",
170 )
171
Fred Drakee779d4f1999-05-10 19:36:52 +0000172def fixup_descriptors(doc, fragment):
173 sections = find_all_elements(fragment, "section")
Fred Drake3a7ff991999-01-29 21:31:12 +0000174 for section in sections:
175 find_and_fix_descriptors(doc, section)
176
177
178def find_and_fix_descriptors(doc, container):
179 children = container.childNodes
180 for child in children:
Fred Drakea20581c1999-08-26 17:51:56 +0000181 if child.get_nodeType() == ELEMENT:
182 tagName = child.get_tagName()
Fred Drake3a7ff991999-01-29 21:31:12 +0000183 if tagName in DESCRIPTOR_ELEMENTS:
184 rewrite_descriptor(doc, child)
185 elif tagName == "subsection":
186 find_and_fix_descriptors(doc, child)
187
Fred Drakecb657811999-01-29 20:55:07 +0000188
189def rewrite_descriptor(doc, descriptor):
190 #
191 # Do these things:
Fred Drake080c1b51999-08-02 14:46:15 +0000192 # 1. Add an "index='no'" attribute to the element if the tagName
Fred Drakecb657811999-01-29 20:55:07 +0000193 # ends in 'ni', removing the 'ni' from the name.
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000194 # 2. Create a <signature> from the name attribute
195 # 2a.Create an <args> if it appears to be available.
Fred Drakecb657811999-01-29 20:55:07 +0000196 # 3. Create additional <signature>s from <*line{,ni}> elements,
197 # if found.
Fred Drake1dd152d1999-01-29 22:12:29 +0000198 # 4. If a <versionadded> is found, move it to an attribute on the
199 # descriptor.
200 # 5. Move remaining child nodes to a <description> element.
201 # 6. Put it back together.
Fred Drakecb657811999-01-29 20:55:07 +0000202 #
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000203 # 1.
Fred Drakea20581c1999-08-26 17:51:56 +0000204 descname = descriptor.get_tagName()
Fred Drakecb657811999-01-29 20:55:07 +0000205 index = 1
206 if descname[-2:] == "ni":
207 descname = descname[:-2]
Fred Drake080c1b51999-08-02 14:46:15 +0000208 descriptor.setAttribute("index", "no")
Fred Drakecb657811999-01-29 20:55:07 +0000209 descriptor._node.name = descname
210 index = 0
211 desctype = descname[:-4] # remove 'desc'
212 linename = desctype + "line"
213 if not index:
214 linename = linename + "ni"
215 # 2.
216 signature = doc.createElement("signature")
217 name = doc.createElement("name")
218 signature.appendChild(doc.createTextNode("\n "))
219 signature.appendChild(name)
220 name.appendChild(doc.createTextNode(descriptor.getAttribute("name")))
221 descriptor.removeAttribute("name")
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000222 # 2a.
Fred Drakecb657811999-01-29 20:55:07 +0000223 if descriptor.attributes.has_key("var"):
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000224 if descname != "opcodedesc":
225 raise RuntimeError, \
226 "got 'var' attribute on descriptor other than opcodedesc"
Fred Drakecb657811999-01-29 20:55:07 +0000227 variable = descriptor.getAttribute("var")
228 if variable:
229 args = doc.createElement("args")
230 args.appendChild(doc.createTextNode(variable))
Fred Drake7dab6af1999-01-28 23:59:58 +0000231 signature.appendChild(doc.createTextNode("\n "))
Fred Drakecb657811999-01-29 20:55:07 +0000232 signature.appendChild(args)
233 descriptor.removeAttribute("var")
234 newchildren = [signature]
235 children = descriptor.childNodes
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000236 pos = skip_leading_nodes(children)
Fred Drakecb657811999-01-29 20:55:07 +0000237 if pos < len(children):
238 child = children[pos]
Fred Drakea20581c1999-08-26 17:51:56 +0000239 if child.get_nodeName() == "args":
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000240## bwrite("found <args> in descriptor, moving to <signature>\n")
241## ewrite(descriptor.toxml() + "\n---\n")
Fred Drakecb657811999-01-29 20:55:07 +0000242 # create an <args> in <signature>:
243 args = doc.createElement("args")
244 argchildren = []
245 map(argchildren.append, child.childNodes)
246 for n in argchildren:
247 child.removeChild(n)
248 args.appendChild(n)
249 signature.appendChild(doc.createTextNode("\n "))
250 signature.appendChild(args)
251 signature.appendChild(doc.createTextNode("\n "))
Fred Drake1dd152d1999-01-29 22:12:29 +0000252 # 3, 4.
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000253 pos = skip_leading_nodes(children, pos)
Fred Drakecb657811999-01-29 20:55:07 +0000254 while pos < len(children) \
Fred Drakea20581c1999-08-26 17:51:56 +0000255 and children[pos].get_nodeName() in (linename, "versionadded"):
256 if children[pos].get_tagName() == linename:
Fred Drake1dd152d1999-01-29 22:12:29 +0000257 # this is really a supplemental signature, create <signature>
258 sig = methodline_to_signature(doc, children[pos])
259 newchildren.append(sig)
260 else:
261 # <versionadded added=...>
262 descriptor.setAttribute(
263 "added", children[pos].getAttribute("version"))
Fred Drakecb657811999-01-29 20:55:07 +0000264 pos = skip_leading_nodes(children, pos + 1)
Fred Drake1dd152d1999-01-29 22:12:29 +0000265 # 5.
Fred Drakecb657811999-01-29 20:55:07 +0000266 description = doc.createElement("description")
267 description.appendChild(doc.createTextNode("\n"))
268 newchildren.append(description)
269 move_children(descriptor, description, pos)
270 last = description.childNodes[-1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000271 if last.nodeType == TEXT:
Fred Drakecb657811999-01-29 20:55:07 +0000272 last.data = string.rstrip(last.data) + "\n "
Fred Drake1dd152d1999-01-29 22:12:29 +0000273 # 6.
Fred Drakecb657811999-01-29 20:55:07 +0000274 # should have nothing but whitespace and signature lines in <descriptor>;
275 # discard them
276 while descriptor.childNodes:
277 descriptor.removeChild(descriptor.childNodes[0])
278 for node in newchildren:
279 descriptor.appendChild(doc.createTextNode("\n "))
280 descriptor.appendChild(node)
281 descriptor.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000282
Fred Drake7dab6af1999-01-28 23:59:58 +0000283
284def methodline_to_signature(doc, methodline):
285 signature = doc.createElement("signature")
286 signature.appendChild(doc.createTextNode("\n "))
287 name = doc.createElement("name")
288 name.appendChild(doc.createTextNode(methodline.getAttribute("name")))
Fred Drakecb657811999-01-29 20:55:07 +0000289 methodline.removeAttribute("name")
Fred Drake7dab6af1999-01-28 23:59:58 +0000290 signature.appendChild(name)
Fred Drake7dab6af1999-01-28 23:59:58 +0000291 if len(methodline.childNodes):
Fred Drakecb657811999-01-29 20:55:07 +0000292 args = doc.createElement("args")
Fred Drake7dab6af1999-01-28 23:59:58 +0000293 signature.appendChild(doc.createTextNode("\n "))
Fred Drakecb657811999-01-29 20:55:07 +0000294 signature.appendChild(args)
295 move_children(methodline, args)
Fred Drake7dab6af1999-01-28 23:59:58 +0000296 signature.appendChild(doc.createTextNode("\n "))
297 return signature
Fred Drake03204731998-11-23 17:02:03 +0000298
299
Fred Drakecb657811999-01-29 20:55:07 +0000300def move_children(origin, dest, start=0):
301 children = origin.childNodes
302 while start < len(children):
303 node = children[start]
304 origin.removeChild(node)
305 dest.appendChild(node)
306
307
Fred Drakee779d4f1999-05-10 19:36:52 +0000308def handle_appendix(doc, fragment):
Fred Drake4db5b461998-12-01 19:03:01 +0000309 # must be called after simplfy() if document is multi-rooted to begin with
Fred Drakee779d4f1999-05-10 19:36:52 +0000310 docelem = get_documentElement(fragment)
Fred Drakea20581c1999-08-26 17:51:56 +0000311 toplevel = docelem.get_tagName() == "manual" and "chapter" or "section"
Fred Drake4db5b461998-12-01 19:03:01 +0000312 appendices = 0
313 nodes = []
314 for node in docelem.childNodes:
315 if appendices:
316 nodes.append(node)
Fred Drakee779d4f1999-05-10 19:36:52 +0000317 elif node.nodeType == ELEMENT:
Fred Drake4db5b461998-12-01 19:03:01 +0000318 appnodes = node.getElementsByTagName("appendix")
319 if appnodes:
320 appendices = 1
321 parent = appnodes[0].parentNode
322 parent.removeChild(appnodes[0])
323 parent.normalize()
324 if nodes:
325 map(docelem.removeChild, nodes)
326 docelem.appendChild(doc.createTextNode("\n\n\n"))
327 back = doc.createElement("back-matter")
328 docelem.appendChild(back)
329 back.appendChild(doc.createTextNode("\n"))
Fred Drakee779d4f1999-05-10 19:36:52 +0000330 while nodes and nodes[0].nodeType == TEXT \
Fred Drake4db5b461998-12-01 19:03:01 +0000331 and not string.strip(nodes[0].data):
332 del nodes[0]
333 map(back.appendChild, nodes)
334 docelem.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000335
336
Fred Drake865e9ff1999-07-29 22:23:19 +0000337def handle_labels(doc, fragment):
338 for label in find_all_elements(fragment, "label"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000339 id = label.getAttribute("id")
340 if not id:
341 continue
342 parent = label.parentNode
Fred Drakea20581c1999-08-26 17:51:56 +0000343 parentTagName = parent.get_tagName()
344 if parentTagName == "title":
Fred Drake7dab6af1999-01-28 23:59:58 +0000345 parent.parentNode.setAttribute("id", id)
346 else:
347 parent.setAttribute("id", id)
348 # now, remove <label id="..."/> from parent:
349 parent.removeChild(label)
Fred Drakea20581c1999-08-26 17:51:56 +0000350 if parentTagName == "title":
Fred Drake865e9ff1999-07-29 22:23:19 +0000351 parent.normalize()
352 children = parent.childNodes
353 if children[-1].nodeType == TEXT:
354 children[-1].data = string.rstrip(children[-1].data)
Fred Drake03204731998-11-23 17:02:03 +0000355
356
Fred Drake1ff6db41998-11-23 23:10:35 +0000357def fixup_trailing_whitespace(doc, wsmap):
358 queue = [doc]
359 while queue:
360 node = queue[0]
361 del queue[0]
Fred Drakea20581c1999-08-26 17:51:56 +0000362 if wsmap.has_key(node.get_nodeName()):
363 ws = wsmap[node.get_tagName()]
Fred Drake1ff6db41998-11-23 23:10:35 +0000364 children = node.childNodes
365 children.reverse()
Fred Drakee779d4f1999-05-10 19:36:52 +0000366 if children[0].nodeType == TEXT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000367 data = string.rstrip(children[0].data) + ws
368 children[0].data = data
369 children.reverse()
370 # hack to get the title in place:
Fred Drakea20581c1999-08-26 17:51:56 +0000371 if node.get_tagName() == "title" \
372 and node.parentNode.firstChild.get_nodeType() == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000373 node.parentNode.insertBefore(doc.createText("\n "),
374 node.parentNode.firstChild)
375 for child in node.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000376 if child.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000377 queue.append(child)
378
379
380def normalize(doc):
381 for node in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000382 if node.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000383 node.normalize()
384
385
386def cleanup_trailing_parens(doc, element_names):
387 d = {}
388 for gi in element_names:
389 d[gi] = gi
390 rewrite_element = d.has_key
391 queue = []
392 for node in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000393 if node.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000394 queue.append(node)
395 while queue:
396 node = queue[0]
397 del queue[0]
Fred Drakea20581c1999-08-26 17:51:56 +0000398 if rewrite_element(node.get_tagName()):
Fred Drake1ff6db41998-11-23 23:10:35 +0000399 children = node.childNodes
400 if len(children) == 1 \
Fred Drakee779d4f1999-05-10 19:36:52 +0000401 and children[0].nodeType == TEXT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000402 data = children[0].data
403 if data[-2:] == "()":
404 children[0].data = data[:-2]
405 else:
406 for child in node.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000407 if child.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000408 queue.append(child)
409
410
Fred Drakeaaed9711998-12-10 20:25:30 +0000411def contents_match(left, right):
412 left_children = left.childNodes
413 right_children = right.childNodes
414 if len(left_children) != len(right_children):
415 return 0
416 for l, r in map(None, left_children, right_children):
417 nodeType = l.nodeType
418 if nodeType != r.nodeType:
419 return 0
Fred Drakee779d4f1999-05-10 19:36:52 +0000420 if nodeType == ELEMENT:
Fred Drakea20581c1999-08-26 17:51:56 +0000421 if l.get_tagName() != r.get_tagName():
Fred Drakeaaed9711998-12-10 20:25:30 +0000422 return 0
423 # should check attributes, but that's not a problem here
424 if not contents_match(l, r):
425 return 0
Fred Drakee779d4f1999-05-10 19:36:52 +0000426 elif nodeType == TEXT:
Fred Drakeaaed9711998-12-10 20:25:30 +0000427 if l.data != r.data:
428 return 0
429 else:
430 # not quite right, but good enough
431 return 0
432 return 1
433
434
435def create_module_info(doc, section):
436 # Heavy.
437 node = extract_first_element(section, "modulesynopsis")
438 if node is None:
439 return
440 node._node.name = "synopsis"
441 lastchild = node.childNodes[-1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000442 if lastchild.nodeType == TEXT \
Fred Drakeaaed9711998-12-10 20:25:30 +0000443 and lastchild.data[-1:] == ".":
444 lastchild.data = lastchild.data[:-1]
Fred Drake4259f0d1999-01-19 23:09:31 +0000445 modauthor = extract_first_element(section, "moduleauthor")
446 if modauthor:
447 modauthor._node.name = "author"
448 modauthor.appendChild(doc.createTextNode(
449 modauthor.getAttribute("name")))
450 modauthor.removeAttribute("name")
Fred Drake87a42cd1999-03-11 17:35:12 +0000451 platform = extract_first_element(section, "platform")
Fred Drakea20581c1999-08-26 17:51:56 +0000452 if section.get_tagName() == "section":
Fred Drakeaaed9711998-12-10 20:25:30 +0000453 modinfo_pos = 2
454 modinfo = doc.createElement("moduleinfo")
455 moddecl = extract_first_element(section, "declaremodule")
456 name = None
457 if moddecl:
458 modinfo.appendChild(doc.createTextNode("\n "))
459 name = moddecl.attributes["name"].value
460 namenode = doc.createElement("name")
461 namenode.appendChild(doc.createTextNode(name))
462 modinfo.appendChild(namenode)
463 type = moddecl.attributes.get("type")
464 if type:
465 type = type.value
466 modinfo.appendChild(doc.createTextNode("\n "))
467 typenode = doc.createElement("type")
468 typenode.appendChild(doc.createTextNode(type))
469 modinfo.appendChild(typenode)
Fred Drake1dd152d1999-01-29 22:12:29 +0000470 versionadded = extract_first_element(section, "versionadded")
471 if versionadded:
472 modinfo.setAttribute("added", versionadded.getAttribute("version"))
Fred Drakeaaed9711998-12-10 20:25:30 +0000473 title = get_first_element(section, "title")
474 if title:
475 children = title.childNodes
476 if len(children) >= 2 \
Fred Drakea20581c1999-08-26 17:51:56 +0000477 and children[0].get_nodeName() == "module" \
Fred Drakeaaed9711998-12-10 20:25:30 +0000478 and children[0].childNodes[0].data == name:
479 # this is it; morph the <title> into <short-synopsis>
480 first_data = children[1]
481 if first_data.data[:4] == " ---":
482 first_data.data = string.lstrip(first_data.data[4:])
483 title._node.name = "short-synopsis"
Fred Drakee779d4f1999-05-10 19:36:52 +0000484 if children[-1].nodeType == TEXT \
Fred Drake7dab6af1999-01-28 23:59:58 +0000485 and children[-1].data[-1:] == ".":
Fred Drakeaaed9711998-12-10 20:25:30 +0000486 children[-1].data = children[-1].data[:-1]
487 section.removeChild(title)
488 section.removeChild(section.childNodes[0])
489 title.removeChild(children[0])
490 modinfo_pos = 0
491 else:
Fred Drake080c1b51999-08-02 14:46:15 +0000492 ewrite("module name in title doesn't match"
493 " <declaremodule/>; no <short-synopsis/>\n")
Fred Drakeaaed9711998-12-10 20:25:30 +0000494 else:
Fred Drake080c1b51999-08-02 14:46:15 +0000495 ewrite("Unexpected condition: <section/> without <title/>\n")
Fred Drakeaaed9711998-12-10 20:25:30 +0000496 modinfo.appendChild(doc.createTextNode("\n "))
497 modinfo.appendChild(node)
498 if title and not contents_match(title, node):
499 # The short synopsis is actually different,
500 # and needs to be stored:
501 modinfo.appendChild(doc.createTextNode("\n "))
502 modinfo.appendChild(title)
Fred Drake4259f0d1999-01-19 23:09:31 +0000503 if modauthor:
504 modinfo.appendChild(doc.createTextNode("\n "))
505 modinfo.appendChild(modauthor)
Fred Drake87a42cd1999-03-11 17:35:12 +0000506 if platform:
507 modinfo.appendChild(doc.createTextNode("\n "))
508 modinfo.appendChild(platform)
Fred Drakeaaed9711998-12-10 20:25:30 +0000509 modinfo.appendChild(doc.createTextNode("\n "))
510 section.insertBefore(modinfo, section.childNodes[modinfo_pos])
511 section.insertBefore(doc.createTextNode("\n "), modinfo)
Fred Drake87a42cd1999-03-11 17:35:12 +0000512 #
513 # The rest of this removes extra newlines from where we cut out
514 # a lot of elements. A lot of code for minimal value, but keeps
Fred Drake080c1b51999-08-02 14:46:15 +0000515 # keeps the generated *ML from being too funny looking.
Fred Drake87a42cd1999-03-11 17:35:12 +0000516 #
517 section.normalize()
518 children = section.childNodes
519 for i in range(len(children)):
520 node = children[i]
Fred Drakea20581c1999-08-26 17:51:56 +0000521 if node.get_nodeName() == "moduleinfo":
Fred Drake87a42cd1999-03-11 17:35:12 +0000522 nextnode = children[i+1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000523 if nextnode.nodeType == TEXT:
Fred Drake87a42cd1999-03-11 17:35:12 +0000524 data = nextnode.data
525 if len(string.lstrip(data)) < (len(data) - 4):
526 nextnode.data = "\n\n\n" + string.lstrip(data)
Fred Drakeaaed9711998-12-10 20:25:30 +0000527
528
Fred Drake080c1b51999-08-02 14:46:15 +0000529def cleanup_synopses(doc, fragment):
530 for node in find_all_elements(fragment, "section"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000531 create_module_info(doc, node)
Fred Drakeaaed9711998-12-10 20:25:30 +0000532
533
Fred Drakee779d4f1999-05-10 19:36:52 +0000534def fixup_table_structures(doc, fragment):
Fred Drakee779d4f1999-05-10 19:36:52 +0000535 for table in find_all_elements(fragment, "table"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000536 fixup_table(doc, table)
537
Fred Drakef8ebb551999-01-14 19:45:38 +0000538
539def fixup_table(doc, table):
540 # create the table head
541 thead = doc.createElement("thead")
542 row = doc.createElement("row")
543 move_elements_by_name(doc, table, row, "entry")
544 thead.appendChild(doc.createTextNode("\n "))
545 thead.appendChild(row)
546 thead.appendChild(doc.createTextNode("\n "))
547 # create the table body
548 tbody = doc.createElement("tbody")
549 prev_row = None
550 last_was_hline = 0
551 children = table.childNodes
552 for child in children:
Fred Drakee779d4f1999-05-10 19:36:52 +0000553 if child.nodeType == ELEMENT:
Fred Drakea20581c1999-08-26 17:51:56 +0000554 tagName = child.get_tagName()
Fred Drakef8ebb551999-01-14 19:45:38 +0000555 if tagName == "hline" and prev_row is not None:
556 prev_row.setAttribute("rowsep", "1")
557 elif tagName == "row":
558 prev_row = child
559 # save the rows:
560 tbody.appendChild(doc.createTextNode("\n "))
561 move_elements_by_name(doc, table, tbody, "row", sep="\n ")
562 # and toss the rest:
563 while children:
564 child = children[0]
565 nodeType = child.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000566 if nodeType == TEXT:
Fred Drakef8ebb551999-01-14 19:45:38 +0000567 if string.strip(child.data):
568 raise ConversionError("unexpected free data in table")
569 table.removeChild(child)
570 continue
Fred Drakee779d4f1999-05-10 19:36:52 +0000571 if nodeType == ELEMENT:
Fred Drakea20581c1999-08-26 17:51:56 +0000572 if child.get_tagName() != "hline":
Fred Drakef8ebb551999-01-14 19:45:38 +0000573 raise ConversionError(
Fred Drakea20581c1999-08-26 17:51:56 +0000574 "unexpected <%s> in table" % child.get_tagName())
Fred Drakef8ebb551999-01-14 19:45:38 +0000575 table.removeChild(child)
576 continue
577 raise ConversionError(
578 "unexpected %s node in table" % child.__class__.__name__)
579 # nothing left in the <table>; add the <thead> and <tbody>
580 tgroup = doc.createElement("tgroup")
581 tgroup.appendChild(doc.createTextNode("\n "))
582 tgroup.appendChild(thead)
583 tgroup.appendChild(doc.createTextNode("\n "))
584 tgroup.appendChild(tbody)
585 tgroup.appendChild(doc.createTextNode("\n "))
586 table.appendChild(tgroup)
587 # now make the <entry>s look nice:
588 for row in table.getElementsByTagName("row"):
589 fixup_row(doc, row)
590
591
592def fixup_row(doc, row):
593 entries = []
594 map(entries.append, row.childNodes[1:])
595 for entry in entries:
596 row.insertBefore(doc.createTextNode("\n "), entry)
597# row.appendChild(doc.createTextNode("\n "))
598
599
600def move_elements_by_name(doc, source, dest, name, sep=None):
601 nodes = []
602 for child in source.childNodes:
Fred Drakea20581c1999-08-26 17:51:56 +0000603 if child.get_nodeName() == name:
Fred Drakef8ebb551999-01-14 19:45:38 +0000604 nodes.append(child)
605 for node in nodes:
606 source.removeChild(node)
607 dest.appendChild(node)
608 if sep:
609 dest.appendChild(doc.createTextNode(sep))
610
611
Fred Drake7dab6af1999-01-28 23:59:58 +0000612RECURSE_INTO_PARA_CONTAINERS = (
Fred Drakecb657811999-01-29 20:55:07 +0000613 "chapter", "abstract", "enumerate",
Fred Drake7dab6af1999-01-28 23:59:58 +0000614 "section", "subsection", "subsubsection",
Fred Drake865e9ff1999-07-29 22:23:19 +0000615 "paragraph", "subparagraph", "back-matter",
Fred Drakecb657811999-01-29 20:55:07 +0000616 "howto", "manual",
Fred Drake82ebc271999-08-03 15:32:48 +0000617 "item", "itemize", "fulllineitems", "enumeration", "descriptionlist",
618 "definitionlist", "definition",
Fred Drake4259f0d1999-01-19 23:09:31 +0000619 )
Fred Drakefcc59101999-01-06 22:50:52 +0000620
621PARA_LEVEL_ELEMENTS = (
Fred Drakecb657811999-01-29 20:55:07 +0000622 "moduleinfo", "title", "verbatim", "enumerate", "item",
Fred Drake865e9ff1999-07-29 22:23:19 +0000623 "interpreter-session", "back-matter", "interactive-session",
Fred Drakecb657811999-01-29 20:55:07 +0000624 "opcodedesc", "classdesc", "datadesc",
Fred Drake865e9ff1999-07-29 22:23:19 +0000625 "funcdesc", "methoddesc", "excdesc", "memberdesc", "membderdescni",
Fred Drake7dab6af1999-01-28 23:59:58 +0000626 "funcdescni", "methoddescni", "excdescni",
Fred Drakefcc59101999-01-06 22:50:52 +0000627 "tableii", "tableiii", "tableiv", "localmoduletable",
Fred Drake82ebc271999-08-03 15:32:48 +0000628 "sectionauthor", "seealso", "itemize",
Fred Drakefcc59101999-01-06 22:50:52 +0000629 # include <para>, so we can just do it again to get subsequent paras:
Fred Drake865e9ff1999-07-29 22:23:19 +0000630 PARA_ELEMENT,
Fred Drakefcc59101999-01-06 22:50:52 +0000631 )
632
633PARA_LEVEL_PRECEEDERS = (
Fred Drake82ebc271999-08-03 15:32:48 +0000634 "setindexsubitem",
Fred Drakecb657811999-01-29 20:55:07 +0000635 "stindex", "obindex", "COMMENT", "label", "input", "title",
Fred Drake865e9ff1999-07-29 22:23:19 +0000636 "versionadded", "versionchanged", "declaremodule", "modulesynopsis",
Fred Drake82ebc271999-08-03 15:32:48 +0000637 "moduleauthor", "indexterm", "leader",
Fred Drakefcc59101999-01-06 22:50:52 +0000638 )
639
Fred Drake7dab6af1999-01-28 23:59:58 +0000640
Fred Drakee779d4f1999-05-10 19:36:52 +0000641def fixup_paras(doc, fragment):
642 for child in fragment.childNodes:
Fred Drakea20581c1999-08-26 17:51:56 +0000643 if child.get_nodeName() in RECURSE_INTO_PARA_CONTAINERS:
Fred Drakefcc59101999-01-06 22:50:52 +0000644 fixup_paras_helper(doc, child)
Fred Drakee779d4f1999-05-10 19:36:52 +0000645 descriptions = find_all_elements(fragment, "description")
Fred Drakecb657811999-01-29 20:55:07 +0000646 for description in descriptions:
647 fixup_paras_helper(doc, description)
Fred Drakefcc59101999-01-06 22:50:52 +0000648
649
Fred Drake7dab6af1999-01-28 23:59:58 +0000650def fixup_paras_helper(doc, container, depth=0):
Fred Drakefcc59101999-01-06 22:50:52 +0000651 # document is already normalized
652 children = container.childNodes
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000653 start = skip_leading_nodes(children)
Fred Drake7dab6af1999-01-28 23:59:58 +0000654 while len(children) > start:
Fred Drakea20581c1999-08-26 17:51:56 +0000655 if children[start].get_nodeName() in RECURSE_INTO_PARA_CONTAINERS:
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000656 # Something to recurse into:
Fred Drake7dab6af1999-01-28 23:59:58 +0000657 fixup_paras_helper(doc, children[start])
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000658 else:
659 # Paragraph material:
660 build_para(doc, container, start, len(children))
661 if DEBUG_PARA_FIXER and depth == 10:
662 sys.exit(1)
663 start = skip_leading_nodes(children, start + 1)
Fred Drakefcc59101999-01-06 22:50:52 +0000664
665
666def build_para(doc, parent, start, i):
667 children = parent.childNodes
Fred Drakefcc59101999-01-06 22:50:52 +0000668 after = start + 1
669 have_last = 0
Fred Drakecb657811999-01-29 20:55:07 +0000670 BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + RECURSE_INTO_PARA_CONTAINERS
Fred Drake7dab6af1999-01-28 23:59:58 +0000671 # Collect all children until \n\n+ is found in a text node or a
672 # member of BREAK_ELEMENTS is found.
Fred Drakefcc59101999-01-06 22:50:52 +0000673 for j in range(start, i):
674 after = j + 1
675 child = children[j]
676 nodeType = child.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000677 if nodeType == ELEMENT:
Fred Drakea20581c1999-08-26 17:51:56 +0000678 if child.get_tagName() in BREAK_ELEMENTS:
Fred Drakefcc59101999-01-06 22:50:52 +0000679 after = j
680 break
Fred Drakee779d4f1999-05-10 19:36:52 +0000681 elif nodeType == TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000682 pos = string.find(child.data, "\n\n")
683 if pos == 0:
684 after = j
685 break
686 if pos >= 1:
687 child.splitText(pos)
688 break
689 else:
690 have_last = 1
Fred Drake7dab6af1999-01-28 23:59:58 +0000691 if (start + 1) > after:
692 raise ConversionError(
693 "build_para() could not identify content to turn into a paragraph")
Fred Drakee779d4f1999-05-10 19:36:52 +0000694 if children[after - 1].nodeType == TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000695 # we may need to split off trailing white space:
696 child = children[after - 1]
697 data = child.data
698 if string.rstrip(data) != data:
699 have_last = 0
700 child.splitText(len(string.rstrip(data)))
Fred Drake865e9ff1999-07-29 22:23:19 +0000701 para = doc.createElement(PARA_ELEMENT)
Fred Drakefcc59101999-01-06 22:50:52 +0000702 prev = None
703 indexes = range(start, after)
704 indexes.reverse()
705 for j in indexes:
Fred Drake7dab6af1999-01-28 23:59:58 +0000706 node = parent.childNodes[j]
Fred Drakefcc59101999-01-06 22:50:52 +0000707 parent.removeChild(node)
708 para.insertBefore(node, prev)
709 prev = node
710 if have_last:
711 parent.appendChild(para)
Fred Drake080c1b51999-08-02 14:46:15 +0000712 parent.appendChild(doc.createTextNode("\n\n"))
Fred Drake7dab6af1999-01-28 23:59:58 +0000713 return len(parent.childNodes)
Fred Drakefcc59101999-01-06 22:50:52 +0000714 else:
Fred Drake080c1b51999-08-02 14:46:15 +0000715 nextnode = parent.childNodes[start]
716 if nextnode.nodeType == TEXT:
717 if nextnode.data and nextnode.data[0] != "\n":
718 nextnode.data = "\n" + nextnode.data
719 else:
720 newnode = doc.createTextNode("\n")
721 parent.insertBefore(newnode, nextnode)
722 nextnode = newnode
723 start = start + 1
724 parent.insertBefore(para, nextnode)
Fred Drake7dab6af1999-01-28 23:59:58 +0000725 return start + 1
Fred Drakefcc59101999-01-06 22:50:52 +0000726
727
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000728def skip_leading_nodes(children, start=0):
Fred Drake7dab6af1999-01-28 23:59:58 +0000729 """Return index into children of a node at which paragraph building should
730 begin or a recursive call to fixup_paras_helper() should be made (for
731 subsections, etc.).
732
733 When the return value >= len(children), we've built all the paras we can
734 from this list of children.
735 """
736 i = len(children)
Fred Drakefcc59101999-01-06 22:50:52 +0000737 while i > start:
738 # skip over leading comments and whitespace:
Fred Drake7dab6af1999-01-28 23:59:58 +0000739 child = children[start]
Fred Drakefcc59101999-01-06 22:50:52 +0000740 nodeType = child.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000741 if nodeType == TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000742 data = child.data
743 shortened = string.lstrip(data)
744 if shortened:
745 if data != shortened:
746 # break into two nodes: whitespace and non-whitespace
747 child.splitText(len(data) - len(shortened))
Fred Drake7dab6af1999-01-28 23:59:58 +0000748 return start + 1
749 return start
Fred Drakefcc59101999-01-06 22:50:52 +0000750 # all whitespace, just skip
Fred Drakee779d4f1999-05-10 19:36:52 +0000751 elif nodeType == ELEMENT:
Fred Drakea20581c1999-08-26 17:51:56 +0000752 tagName = child.get_tagName()
Fred Drake7dab6af1999-01-28 23:59:58 +0000753 if tagName in RECURSE_INTO_PARA_CONTAINERS:
754 return start
755 if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
756 return start
757 start = start + 1
758 return start
Fred Drakefba0ba21998-12-10 05:07:09 +0000759
760
Fred Drakee779d4f1999-05-10 19:36:52 +0000761def fixup_rfc_references(doc, fragment):
762 for rfcnode in find_all_elements(fragment, "rfc"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000763 rfcnode.appendChild(doc.createTextNode(
764 "RFC " + rfcnode.getAttribute("num")))
Fred Draked24167b1999-01-14 21:18:03 +0000765
766
Fred Drakee779d4f1999-05-10 19:36:52 +0000767def fixup_signatures(doc, fragment):
768 for child in fragment.childNodes:
769 if child.nodeType == ELEMENT:
Fred Draked24167b1999-01-14 21:18:03 +0000770 args = child.getElementsByTagName("args")
771 for arg in args:
772 fixup_args(doc, arg)
Fred Drake7dab6af1999-01-28 23:59:58 +0000773 arg.normalize()
Fred Draked24167b1999-01-14 21:18:03 +0000774 args = child.getElementsByTagName("constructor-args")
775 for arg in args:
776 fixup_args(doc, arg)
777 arg.normalize()
778
779
780def fixup_args(doc, arglist):
781 for child in arglist.childNodes:
Fred Drakea20581c1999-08-26 17:51:56 +0000782 if child.get_nodeName() == "optional":
Fred Draked24167b1999-01-14 21:18:03 +0000783 # found it; fix and return
784 arglist.insertBefore(doc.createTextNode("["), child)
785 optkids = child.childNodes
786 while optkids:
787 k = optkids[0]
788 child.removeChild(k)
789 arglist.insertBefore(k, child)
790 arglist.insertBefore(doc.createTextNode("]"), child)
791 arglist.removeChild(child)
792 return fixup_args(doc, arglist)
793
794
Fred Drakee779d4f1999-05-10 19:36:52 +0000795def fixup_sectionauthors(doc, fragment):
796 for sectauth in find_all_elements(fragment, "sectionauthor"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000797 section = sectauth.parentNode
798 section.removeChild(sectauth)
799 sectauth._node.name = "author"
800 sectauth.appendChild(doc.createTextNode(
801 sectauth.getAttribute("name")))
802 sectauth.removeAttribute("name")
803 after = section.childNodes[2]
804 title = section.childNodes[1]
Fred Drakea20581c1999-08-26 17:51:56 +0000805 if title.get_nodeName() != "title":
Fred Drake7dab6af1999-01-28 23:59:58 +0000806 after = section.childNodes[0]
807 section.insertBefore(doc.createTextNode("\n "), after)
808 section.insertBefore(sectauth, after)
809
810
Fred Drake93d762f1999-02-18 16:32:21 +0000811def fixup_verbatims(doc):
812 for verbatim in find_all_elements(doc, "verbatim"):
813 child = verbatim.childNodes[0]
Fred Drakee779d4f1999-05-10 19:36:52 +0000814 if child.nodeType == TEXT \
Fred Drake93d762f1999-02-18 16:32:21 +0000815 and string.lstrip(child.data)[:3] == ">>>":
Fred Drakee779d4f1999-05-10 19:36:52 +0000816 verbatim._node.name = "interactive-session"
Fred Drake93d762f1999-02-18 16:32:21 +0000817
818
Fred Drake865e9ff1999-07-29 22:23:19 +0000819def add_node_ids(fragment, counter=0):
820 fragment._node.node_id = counter
821 for node in fragment.childNodes:
822 counter = counter + 1
823 if node.nodeType == ELEMENT:
824 counter = add_node_ids(node, counter)
825 else:
826 node._node.node_id = counter
827 return counter + 1
828
829
830REFMODINDEX_ELEMENTS = ('refmodindex', 'refbimodindex',
831 'refexmodindex', 'refstmodindex')
832
833def fixup_refmodindexes(fragment):
834 # Locate <ref*modindex>...</> co-located with <module>...</>, and
835 # remove the <ref*modindex>, replacing it with index=index on the
836 # <module> element.
837 nodes = find_all_elements_from_set(fragment, REFMODINDEX_ELEMENTS)
838 d = {}
839 for node in nodes:
840 parent = node.parentNode
841 d[parent._node.node_id] = parent
842 del nodes
843 map(fixup_refmodindexes_chunk, d.values())
844
845
846def fixup_refmodindexes_chunk(container):
847 # node is probably a <para>; let's see how often it isn't:
Fred Drakea20581c1999-08-26 17:51:56 +0000848 if container.get_tagName() != PARA_ELEMENT:
Fred Drake080c1b51999-08-02 14:46:15 +0000849 bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container)
Fred Drake865e9ff1999-07-29 22:23:19 +0000850 module_entries = find_all_elements(container, "module")
851 if not module_entries:
852 return
853 index_entries = find_all_elements_from_set(container, REFMODINDEX_ELEMENTS)
854 removes = []
855 for entry in index_entries:
856 children = entry.childNodes
857 if len(children) != 0:
Fred Drake080c1b51999-08-02 14:46:15 +0000858 bwrite("--- unexpected number of children for %s node:\n"
Fred Drakea20581c1999-08-26 17:51:56 +0000859 % entry.get_tagName())
Fred Drake080c1b51999-08-02 14:46:15 +0000860 ewrite(entry.toxml() + "\n")
Fred Drake865e9ff1999-07-29 22:23:19 +0000861 continue
862 found = 0
Fred Drake82ebc271999-08-03 15:32:48 +0000863 module_name = entry.getAttribute("module")
Fred Drake865e9ff1999-07-29 22:23:19 +0000864 for node in module_entries:
865 if len(node.childNodes) != 1:
866 continue
867 this_name = node.childNodes[0].data
868 if this_name == module_name:
869 found = 1
Fred Drake080c1b51999-08-02 14:46:15 +0000870 node.setAttribute("index", "yes")
Fred Drake865e9ff1999-07-29 22:23:19 +0000871 if found:
872 removes.append(entry)
873 for node in removes:
874 container.removeChild(node)
875
876
877def fixup_bifuncindexes(fragment):
878 nodes = find_all_elements(fragment, 'bifuncindex')
879 d = {}
Fred Drake080c1b51999-08-02 14:46:15 +0000880 # make sure that each parent is only processed once:
Fred Drake865e9ff1999-07-29 22:23:19 +0000881 for node in nodes:
882 parent = node.parentNode
883 d[parent._node.node_id] = parent
884 del nodes
885 map(fixup_bifuncindexes_chunk, d.values())
886
887
888def fixup_bifuncindexes_chunk(container):
889 removes = []
Fred Drake080c1b51999-08-02 14:46:15 +0000890 entries = find_all_child_elements(container, "bifuncindex")
891 function_entries = find_all_child_elements(container, "function")
Fred Drake865e9ff1999-07-29 22:23:19 +0000892 for entry in entries:
893 function_name = entry.getAttribute("name")
894 found = 0
895 for func_entry in function_entries:
896 t2 = func_entry.childNodes[0].data
897 if t2[-2:] != "()":
898 continue
899 t2 = t2[:-2]
900 if t2 == function_name:
Fred Drake080c1b51999-08-02 14:46:15 +0000901 func_entry.setAttribute("index", "yes")
Fred Drake865e9ff1999-07-29 22:23:19 +0000902 func_entry.setAttribute("module", "__builtin__")
903 if not found:
Fred Drake865e9ff1999-07-29 22:23:19 +0000904 found = 1
Fred Drake080c1b51999-08-02 14:46:15 +0000905 removes.append(entry)
Fred Drake865e9ff1999-07-29 22:23:19 +0000906 for entry in removes:
907 container.removeChild(entry)
908
909
Fred Drake4db5b461998-12-01 19:03:01 +0000910_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
Fred Drakefcc59101999-01-06 22:50:52 +0000911
Fred Drake4db5b461998-12-01 19:03:01 +0000912def write_esis(doc, ofp, knownempty):
913 for node in doc.childNodes:
914 nodeType = node.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000915 if nodeType == ELEMENT:
Fred Drakea20581c1999-08-26 17:51:56 +0000916 gi = node.get_tagName()
Fred Drake4db5b461998-12-01 19:03:01 +0000917 if knownempty(gi):
918 if node.hasChildNodes():
Fred Drake865e9ff1999-07-29 22:23:19 +0000919 raise ValueError, \
920 "declared-empty node <%s> has children" % gi
Fred Drake4db5b461998-12-01 19:03:01 +0000921 ofp.write("e\n")
922 for k, v in node.attributes.items():
923 value = v.value
924 if _token_rx.match(value):
925 dtype = "TOKEN"
926 else:
927 dtype = "CDATA"
928 ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
929 ofp.write("(%s\n" % gi)
930 write_esis(node, ofp, knownempty)
931 ofp.write(")%s\n" % gi)
Fred Drakee779d4f1999-05-10 19:36:52 +0000932 elif nodeType == TEXT:
Fred Drake4db5b461998-12-01 19:03:01 +0000933 ofp.write("-%s\n" % esistools.encode(node.data))
Fred Drakea20581c1999-08-26 17:51:56 +0000934 elif nodeType == ENTITY_REFERENCE:
935 ofp.write("&%s\n" % node.get_nodeName())
Fred Drake4db5b461998-12-01 19:03:01 +0000936 else:
937 raise RuntimeError, "unsupported node type: %s" % nodeType
938
939
Fred Drake03204731998-11-23 17:02:03 +0000940def convert(ifp, ofp):
Fred Drake4db5b461998-12-01 19:03:01 +0000941 p = esistools.ExtendedEsisBuilder()
Fred Drake03204731998-11-23 17:02:03 +0000942 p.feed(ifp.read())
943 doc = p.document
Fred Drakee779d4f1999-05-10 19:36:52 +0000944 fragment = p.fragment
945 normalize(fragment)
946 simplify(doc, fragment)
Fred Drake865e9ff1999-07-29 22:23:19 +0000947 handle_labels(doc, fragment)
Fred Drakee779d4f1999-05-10 19:36:52 +0000948 handle_appendix(doc, fragment)
Fred Drake1ff6db41998-11-23 23:10:35 +0000949 fixup_trailing_whitespace(doc, {
950 "abstract": "\n",
951 "title": "",
952 "chapter": "\n\n",
953 "section": "\n\n",
954 "subsection": "\n\n",
955 "subsubsection": "\n\n",
956 "paragraph": "\n\n",
957 "subparagraph": "\n\n",
958 })
Fred Drake03204731998-11-23 17:02:03 +0000959 cleanup_root_text(doc)
Fred Drake080c1b51999-08-02 14:46:15 +0000960 cleanup_trailing_parens(fragment, ["function", "method", "cfunction"])
961 cleanup_synopses(doc, fragment)
Fred Drakee779d4f1999-05-10 19:36:52 +0000962 fixup_descriptors(doc, fragment)
963 fixup_verbatims(fragment)
964 normalize(fragment)
965 fixup_paras(doc, fragment)
966 fixup_sectionauthors(doc, fragment)
Fred Drakee779d4f1999-05-10 19:36:52 +0000967 fixup_table_structures(doc, fragment)
968 fixup_rfc_references(doc, fragment)
969 fixup_signatures(doc, fragment)
Fred Drake865e9ff1999-07-29 22:23:19 +0000970 add_node_ids(fragment)
971 fixup_refmodindexes(fragment)
972 fixup_bifuncindexes(fragment)
Fred Drake4db5b461998-12-01 19:03:01 +0000973 #
974 d = {}
975 for gi in p.get_empties():
976 d[gi] = gi
Fred Draked24167b1999-01-14 21:18:03 +0000977 if d.has_key("rfc"):
978 del d["rfc"]
Fred Drake4db5b461998-12-01 19:03:01 +0000979 knownempty = d.has_key
980 #
Fred Drake03204731998-11-23 17:02:03 +0000981 try:
Fred Drakee779d4f1999-05-10 19:36:52 +0000982 write_esis(fragment, ofp, knownempty)
Fred Drake03204731998-11-23 17:02:03 +0000983 except IOError, (err, msg):
984 # Ignore EPIPE; it just means that whoever we're writing to stopped
985 # reading. The rest of the output would be ignored. All other errors
986 # should still be reported,
987 if err != errno.EPIPE:
988 raise
989
990
991def main():
992 if len(sys.argv) == 1:
993 ifp = sys.stdin
994 ofp = sys.stdout
995 elif len(sys.argv) == 2:
996 ifp = open(sys.argv[1])
997 ofp = sys.stdout
998 elif len(sys.argv) == 3:
999 ifp = open(sys.argv[1])
1000 ofp = open(sys.argv[2], "w")
1001 else:
1002 usage()
1003 sys.exit(2)
1004 convert(ifp, ofp)
1005
1006
1007if __name__ == "__main__":
1008 main()