blob: 9f93966077131c5eaec04fdc7c03ffc205dfc6a9 [file] [log] [blame]
Fred Drake03204731998-11-23 17:02:03 +00001#! /usr/bin/env python
2
Fred Drake7dab6af1999-01-28 23:59:58 +00003"""Perform massive transformations on a document tree created from the LaTeX
4of the Python documentation, and dump the ESIS data for the transformed tree.
Fred Drake03204731998-11-23 17:02:03 +00005"""
Fred Drake03204731998-11-23 17:02:03 +00006
7
8import errno
Fred Drake4db5b461998-12-01 19:03:01 +00009import esistools
10import re
Fred Drake03204731998-11-23 17:02:03 +000011import sys
Fred Drake3e8f9212001-03-23 17:01:47 +000012import xml.dom
13import xml.dom.minidom
Fred Drakee779d4f1999-05-10 19:36:52 +000014
Fred Drake3e8f9212001-03-23 17:01:47 +000015ELEMENT = xml.dom.Node.ELEMENT_NODE
16ENTITY_REFERENCE = xml.dom.Node.ENTITY_REFERENCE_NODE
17TEXT = xml.dom.Node.TEXT_NODE
Fred Drake03204731998-11-23 17:02:03 +000018
19
Fred Drakef8ebb551999-01-14 19:45:38 +000020class ConversionError(Exception):
21 pass
22
23
Fred Drake080c1b51999-08-02 14:46:15 +000024ewrite = sys.stderr.write
25try:
26 # We can only do this trick on Unix (if tput is on $PATH)!
27 if sys.platform != "posix" or not sys.stderr.isatty():
28 raise ImportError
Fred Drake080c1b51999-08-02 14:46:15 +000029 import commands
30except ImportError:
31 bwrite = ewrite
32else:
33 def bwrite(s, BOLDON=commands.getoutput("tput bold"),
34 BOLDOFF=commands.getoutput("tput sgr0")):
35 ewrite("%s%s%s" % (BOLDON, s, BOLDOFF))
36
37
Fred Drake865e9ff1999-07-29 22:23:19 +000038PARA_ELEMENT = "para"
39
Fred Drakefcc59101999-01-06 22:50:52 +000040DEBUG_PARA_FIXER = 0
41
Fred Drake7dab6af1999-01-28 23:59:58 +000042if DEBUG_PARA_FIXER:
43 def para_msg(s):
Fred Drake080c1b51999-08-02 14:46:15 +000044 ewrite("*** %s\n" % s)
Fred Drake7dab6af1999-01-28 23:59:58 +000045else:
46 def para_msg(s):
47 pass
48
Fred Drakefcc59101999-01-06 22:50:52 +000049
Fred Drake03204731998-11-23 17:02:03 +000050def get_first_element(doc, gi):
51 for n in doc.childNodes:
Fred Drake3e8f9212001-03-23 17:01:47 +000052 if n.nodeName == gi:
Fred Drake03204731998-11-23 17:02:03 +000053 return n
54
55def extract_first_element(doc, gi):
56 node = get_first_element(doc, gi)
57 if node is not None:
58 doc.removeChild(node)
59 return node
60
61
Fred Drake3e8f9212001-03-23 17:01:47 +000062def get_documentElement(node):
63 result = None
64 for child in node.childNodes:
65 if child.nodeType == ELEMENT:
66 result = child
67 return result
68
69
70def set_tagName(elem, gi):
71 elem.nodeName = elem.tagName = gi
72
73
Fred Drake7dab6af1999-01-28 23:59:58 +000074def find_all_elements(doc, gi):
75 nodes = []
Fred Drake3e8f9212001-03-23 17:01:47 +000076 if doc.nodeName == gi:
Fred Drake7dab6af1999-01-28 23:59:58 +000077 nodes.append(doc)
78 for child in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +000079 if child.nodeType == ELEMENT:
Fred Drake3e8f9212001-03-23 17:01:47 +000080 if child.tagName == gi:
Fred Drake7dab6af1999-01-28 23:59:58 +000081 nodes.append(child)
82 for node in child.getElementsByTagName(gi):
83 nodes.append(node)
Fred Drake865e9ff1999-07-29 22:23:19 +000084 return nodes
85
Fred Drake080c1b51999-08-02 14:46:15 +000086def find_all_child_elements(doc, gi):
87 nodes = []
88 for child in doc.childNodes:
Fred Drake3e8f9212001-03-23 17:01:47 +000089 if child.nodeName == gi:
Fred Drakeabf8a1d1999-08-20 14:28:29 +000090 nodes.append(child)
Fred Drake080c1b51999-08-02 14:46:15 +000091 return nodes
92
Fred Drake3e8f9212001-03-23 17:01:47 +000093
Fred Drake080c1b51999-08-02 14:46:15 +000094def find_all_elements_from_set(doc, gi_set):
95 return __find_all_elements_from_set(doc, gi_set, [])
96
97def __find_all_elements_from_set(doc, gi_set, nodes):
Fred Drake3e8f9212001-03-23 17:01:47 +000098 if doc.nodeName in gi_set:
Fred Drake865e9ff1999-07-29 22:23:19 +000099 nodes.append(doc)
100 for child in doc.childNodes:
Fred Drake3e8f9212001-03-23 17:01:47 +0000101 if child.nodeType == ELEMENT:
Fred Drake080c1b51999-08-02 14:46:15 +0000102 __find_all_elements_from_set(child, gi_set, nodes)
Fred Drake865e9ff1999-07-29 22:23:19 +0000103 return nodes
Fred Drake7dab6af1999-01-28 23:59:58 +0000104
105
Fred Drakee779d4f1999-05-10 19:36:52 +0000106def simplify(doc, fragment):
Fred Drake03204731998-11-23 17:02:03 +0000107 # Try to rationalize the document a bit, since these things are simply
108 # not valid SGML/XML documents as they stand, and need a little work.
109 documentclass = "document"
110 inputs = []
Fred Drakee779d4f1999-05-10 19:36:52 +0000111 node = extract_first_element(fragment, "documentclass")
Fred Drake03204731998-11-23 17:02:03 +0000112 if node is not None:
113 documentclass = node.getAttribute("classname")
Fred Drakee779d4f1999-05-10 19:36:52 +0000114 node = extract_first_element(fragment, "title")
Fred Drake03204731998-11-23 17:02:03 +0000115 if node is not None:
116 inputs.append(node)
117 # update the name of the root element
Fred Drakee779d4f1999-05-10 19:36:52 +0000118 node = get_first_element(fragment, "document")
Fred Drake03204731998-11-23 17:02:03 +0000119 if node is not None:
Fred Drake3e8f9212001-03-23 17:01:47 +0000120 set_tagName(node, documentclass)
Fred Drake3c171d12001-09-28 17:14:35 +0000121 # Move everything that comes before this node into this node;
122 # this will be the document element.
123 nodelist = fragment.childNodes
124 point = node.firstChild
125 while not nodelist[0].isSameNode(node):
126 node.insertBefore(nodelist[0], point)
Fred Drake03204731998-11-23 17:02:03 +0000127 while 1:
Fred Drakee779d4f1999-05-10 19:36:52 +0000128 node = extract_first_element(fragment, "input")
Fred Drake03204731998-11-23 17:02:03 +0000129 if node is None:
130 break
131 inputs.append(node)
132 if inputs:
Fred Drakee779d4f1999-05-10 19:36:52 +0000133 docelem = get_documentElement(fragment)
Fred Drake03204731998-11-23 17:02:03 +0000134 inputs.reverse()
135 for node in inputs:
136 text = doc.createTextNode("\n")
137 docelem.insertBefore(text, docelem.firstChild)
138 docelem.insertBefore(node, text)
139 docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
Fred Drake3e8f9212001-03-23 17:01:47 +0000140 while fragment.firstChild and fragment.firstChild.nodeType == TEXT:
Fred Drakee779d4f1999-05-10 19:36:52 +0000141 fragment.removeChild(fragment.firstChild)
Fred Drake03204731998-11-23 17:02:03 +0000142
143
144def cleanup_root_text(doc):
145 discards = []
146 skip = 0
147 for n in doc.childNodes:
148 prevskip = skip
149 skip = 0
Fred Drake3e8f9212001-03-23 17:01:47 +0000150 if n.nodeType == TEXT and not prevskip:
Fred Drake03204731998-11-23 17:02:03 +0000151 discards.append(n)
Fred Drake3e8f9212001-03-23 17:01:47 +0000152 elif n.nodeName == "COMMENT":
Fred Drake03204731998-11-23 17:02:03 +0000153 skip = 1
154 for node in discards:
155 doc.removeChild(node)
156
157
Fred Drakecb657811999-01-29 20:55:07 +0000158DESCRIPTOR_ELEMENTS = (
159 "cfuncdesc", "cvardesc", "ctypedesc",
160 "classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni",
161 "excdesc", "funcdesc", "funcdescni", "opcodedesc",
162 "datadesc", "datadescni",
163 )
164
Fred Drakee779d4f1999-05-10 19:36:52 +0000165def fixup_descriptors(doc, fragment):
166 sections = find_all_elements(fragment, "section")
Fred Drake3a7ff991999-01-29 21:31:12 +0000167 for section in sections:
168 find_and_fix_descriptors(doc, section)
169
170
171def find_and_fix_descriptors(doc, container):
172 children = container.childNodes
173 for child in children:
Fred Drake3e8f9212001-03-23 17:01:47 +0000174 if child.nodeType == ELEMENT:
175 tagName = child.tagName
Fred Drake3a7ff991999-01-29 21:31:12 +0000176 if tagName in DESCRIPTOR_ELEMENTS:
177 rewrite_descriptor(doc, child)
178 elif tagName == "subsection":
179 find_and_fix_descriptors(doc, child)
180
Fred Drakecb657811999-01-29 20:55:07 +0000181
182def rewrite_descriptor(doc, descriptor):
183 #
184 # Do these things:
Fred Drake080c1b51999-08-02 14:46:15 +0000185 # 1. Add an "index='no'" attribute to the element if the tagName
Fred Drakecb657811999-01-29 20:55:07 +0000186 # ends in 'ni', removing the 'ni' from the name.
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000187 # 2. Create a <signature> from the name attribute
188 # 2a.Create an <args> if it appears to be available.
Fred Drakecb657811999-01-29 20:55:07 +0000189 # 3. Create additional <signature>s from <*line{,ni}> elements,
190 # if found.
Fred Drake1dd152d1999-01-29 22:12:29 +0000191 # 4. If a <versionadded> is found, move it to an attribute on the
192 # descriptor.
193 # 5. Move remaining child nodes to a <description> element.
194 # 6. Put it back together.
Fred Drakecb657811999-01-29 20:55:07 +0000195 #
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000196 # 1.
Fred Drake3e8f9212001-03-23 17:01:47 +0000197 descname = descriptor.tagName
Fred Drakedde993c2001-07-06 21:03:30 +0000198 index = descriptor.getAttribute("name") != "no"
Fred Drakecb657811999-01-29 20:55:07 +0000199 desctype = descname[:-4] # remove 'desc'
200 linename = desctype + "line"
201 if not index:
202 linename = linename + "ni"
203 # 2.
204 signature = doc.createElement("signature")
205 name = doc.createElement("name")
206 signature.appendChild(doc.createTextNode("\n "))
207 signature.appendChild(name)
208 name.appendChild(doc.createTextNode(descriptor.getAttribute("name")))
209 descriptor.removeAttribute("name")
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000210 # 2a.
Fred Drake3e8f9212001-03-23 17:01:47 +0000211 if descriptor.hasAttribute("var"):
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000212 if descname != "opcodedesc":
Collin Winter65d09d42007-03-21 02:11:39 +0000213 raise RuntimeError("got 'var' attribute on descriptor other than opcodedesc")
Fred Drakecb657811999-01-29 20:55:07 +0000214 variable = descriptor.getAttribute("var")
215 if variable:
216 args = doc.createElement("args")
217 args.appendChild(doc.createTextNode(variable))
Fred Drake7dab6af1999-01-28 23:59:58 +0000218 signature.appendChild(doc.createTextNode("\n "))
Fred Drakecb657811999-01-29 20:55:07 +0000219 signature.appendChild(args)
220 descriptor.removeAttribute("var")
221 newchildren = [signature]
222 children = descriptor.childNodes
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000223 pos = skip_leading_nodes(children)
Fred Drakecb657811999-01-29 20:55:07 +0000224 if pos < len(children):
225 child = children[pos]
Fred Drake645af9f1999-11-23 21:52:03 +0000226 if child.nodeName == "args":
227 # move <args> to <signature>, or remove if empty:
228 child.parentNode.removeChild(child)
229 if len(child.childNodes):
230 signature.appendChild(doc.createTextNode("\n "))
231 signature.appendChild(child)
Fred Drakecb657811999-01-29 20:55:07 +0000232 signature.appendChild(doc.createTextNode("\n "))
Fred Drake1dd152d1999-01-29 22:12:29 +0000233 # 3, 4.
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000234 pos = skip_leading_nodes(children, pos)
Fred Drakecb657811999-01-29 20:55:07 +0000235 while pos < len(children) \
Fred Drake3e8f9212001-03-23 17:01:47 +0000236 and children[pos].nodeName in (linename, "versionadded"):
237 if children[pos].tagName == linename:
Fred Drake1dd152d1999-01-29 22:12:29 +0000238 # this is really a supplemental signature, create <signature>
Fred Drake3e8f9212001-03-23 17:01:47 +0000239 oldchild = children[pos].cloneNode(1)
240 try:
241 sig = methodline_to_signature(doc, children[pos])
242 except KeyError:
Collin Winter65d09d42007-03-21 02:11:39 +0000243 print(oldchild.toxml())
Fred Drake3e8f9212001-03-23 17:01:47 +0000244 raise
Fred Drake1dd152d1999-01-29 22:12:29 +0000245 newchildren.append(sig)
246 else:
247 # <versionadded added=...>
248 descriptor.setAttribute(
249 "added", children[pos].getAttribute("version"))
Fred Drakecb657811999-01-29 20:55:07 +0000250 pos = skip_leading_nodes(children, pos + 1)
Fred Drake1dd152d1999-01-29 22:12:29 +0000251 # 5.
Fred Drakecb657811999-01-29 20:55:07 +0000252 description = doc.createElement("description")
253 description.appendChild(doc.createTextNode("\n"))
254 newchildren.append(description)
255 move_children(descriptor, description, pos)
256 last = description.childNodes[-1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000257 if last.nodeType == TEXT:
Fred Drake3c171d12001-09-28 17:14:35 +0000258 last.data = last.data.rstrip() + "\n "
Fred Drake1dd152d1999-01-29 22:12:29 +0000259 # 6.
Fred Drakecb657811999-01-29 20:55:07 +0000260 # should have nothing but whitespace and signature lines in <descriptor>;
261 # discard them
262 while descriptor.childNodes:
263 descriptor.removeChild(descriptor.childNodes[0])
264 for node in newchildren:
265 descriptor.appendChild(doc.createTextNode("\n "))
266 descriptor.appendChild(node)
267 descriptor.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000268
Fred Drake7dab6af1999-01-28 23:59:58 +0000269
270def methodline_to_signature(doc, methodline):
271 signature = doc.createElement("signature")
272 signature.appendChild(doc.createTextNode("\n "))
273 name = doc.createElement("name")
274 name.appendChild(doc.createTextNode(methodline.getAttribute("name")))
Fred Drakecb657811999-01-29 20:55:07 +0000275 methodline.removeAttribute("name")
Fred Drake7dab6af1999-01-28 23:59:58 +0000276 signature.appendChild(name)
Fred Drake7dab6af1999-01-28 23:59:58 +0000277 if len(methodline.childNodes):
Fred Drakecb657811999-01-29 20:55:07 +0000278 args = doc.createElement("args")
Fred Drake7dab6af1999-01-28 23:59:58 +0000279 signature.appendChild(doc.createTextNode("\n "))
Fred Drakecb657811999-01-29 20:55:07 +0000280 signature.appendChild(args)
281 move_children(methodline, args)
Fred Drake7dab6af1999-01-28 23:59:58 +0000282 signature.appendChild(doc.createTextNode("\n "))
283 return signature
Fred Drake03204731998-11-23 17:02:03 +0000284
285
Fred Drakecb657811999-01-29 20:55:07 +0000286def move_children(origin, dest, start=0):
287 children = origin.childNodes
288 while start < len(children):
289 node = children[start]
290 origin.removeChild(node)
291 dest.appendChild(node)
292
293
Fred Drakee779d4f1999-05-10 19:36:52 +0000294def handle_appendix(doc, fragment):
Fred Drake4db5b461998-12-01 19:03:01 +0000295 # must be called after simplfy() if document is multi-rooted to begin with
Fred Drakee779d4f1999-05-10 19:36:52 +0000296 docelem = get_documentElement(fragment)
Fred Drake3e8f9212001-03-23 17:01:47 +0000297 toplevel = docelem.tagName == "manual" and "chapter" or "section"
Fred Drake4db5b461998-12-01 19:03:01 +0000298 appendices = 0
299 nodes = []
300 for node in docelem.childNodes:
301 if appendices:
302 nodes.append(node)
Fred Drakee779d4f1999-05-10 19:36:52 +0000303 elif node.nodeType == ELEMENT:
Fred Drake4db5b461998-12-01 19:03:01 +0000304 appnodes = node.getElementsByTagName("appendix")
305 if appnodes:
306 appendices = 1
307 parent = appnodes[0].parentNode
308 parent.removeChild(appnodes[0])
309 parent.normalize()
310 if nodes:
311 map(docelem.removeChild, nodes)
312 docelem.appendChild(doc.createTextNode("\n\n\n"))
313 back = doc.createElement("back-matter")
314 docelem.appendChild(back)
315 back.appendChild(doc.createTextNode("\n"))
Fred Drakee779d4f1999-05-10 19:36:52 +0000316 while nodes and nodes[0].nodeType == TEXT \
Fred Drake3c171d12001-09-28 17:14:35 +0000317 and not nodes[0].data.strip():
Fred Drake4db5b461998-12-01 19:03:01 +0000318 del nodes[0]
319 map(back.appendChild, nodes)
320 docelem.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000321
322
Fred Drake865e9ff1999-07-29 22:23:19 +0000323def handle_labels(doc, fragment):
324 for label in find_all_elements(fragment, "label"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000325 id = label.getAttribute("id")
326 if not id:
327 continue
328 parent = label.parentNode
Fred Drake3e8f9212001-03-23 17:01:47 +0000329 parentTagName = parent.tagName
Fred Drakea20581c1999-08-26 17:51:56 +0000330 if parentTagName == "title":
Fred Drake7dab6af1999-01-28 23:59:58 +0000331 parent.parentNode.setAttribute("id", id)
332 else:
333 parent.setAttribute("id", id)
334 # now, remove <label id="..."/> from parent:
335 parent.removeChild(label)
Fred Drakea20581c1999-08-26 17:51:56 +0000336 if parentTagName == "title":
Fred Drake865e9ff1999-07-29 22:23:19 +0000337 parent.normalize()
338 children = parent.childNodes
339 if children[-1].nodeType == TEXT:
Fred Drake3c171d12001-09-28 17:14:35 +0000340 children[-1].data = children[-1].data.rstrip()
Fred Drake03204731998-11-23 17:02:03 +0000341
342
Fred Drake3c171d12001-09-28 17:14:35 +0000343def fixup_trailing_whitespace(doc, fragment, wsmap):
344 queue = [fragment]
345 fixups = []
Fred Drake1ff6db41998-11-23 23:10:35 +0000346 while queue:
347 node = queue[0]
348 del queue[0]
Collin Winter65d09d42007-03-21 02:11:39 +0000349 if node.nodeName in wsmap:
Fred Drake3c171d12001-09-28 17:14:35 +0000350 fixups.append(node)
Fred Drake1ff6db41998-11-23 23:10:35 +0000351 for child in node.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000352 if child.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000353 queue.append(child)
354
Fred Drake3c171d12001-09-28 17:14:35 +0000355 # reverse the list to process from the inside out
356 fixups.reverse()
357 for node in fixups:
358 node.parentNode.normalize()
359 lastchild = node.lastChild
360 before, after = wsmap[node.tagName]
361 if lastchild.nodeType == TEXT:
362 data = lastchild.data.rstrip() + before
363 lastchild.data = data
364 norm = 0
365 if wsmap[node.tagName]:
366 nextnode = node.nextSibling
367 if nextnode and nextnode.nodeType == TEXT:
368 nextnode.data = after + nextnode.data.lstrip()
369 else:
370 wsnode = doc.createTextNode(after)
371 node.parentNode.insertBefore(wsnode, nextnode)
372 # hack to get the title in place:
373 if node.tagName == "title" \
374 and node.parentNode.firstChild.nodeType == ELEMENT:
375 node.parentNode.insertBefore(doc.createTextNode("\n "),
376 node.parentNode.firstChild)
377 node.parentNode.normalize()
378
Fred Drake1ff6db41998-11-23 23:10:35 +0000379
380def normalize(doc):
381 for node in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000382 if node.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000383 node.normalize()
384
385
386def cleanup_trailing_parens(doc, element_names):
387 d = {}
388 for gi in element_names:
389 d[gi] = gi
390 rewrite_element = d.has_key
Fred Drake583061a2001-09-29 05:05:25 +0000391 queue = [node for node in doc.childNodes if node.nodeType == ELEMENT]
Fred Drake1ff6db41998-11-23 23:10:35 +0000392 while queue:
393 node = queue[0]
394 del queue[0]
Fred Drake3e8f9212001-03-23 17:01:47 +0000395 if rewrite_element(node.tagName):
Fred Drake583061a2001-09-29 05:05:25 +0000396 lastchild = node.lastChild
397 if lastchild and lastchild.nodeType == TEXT:
398 data = lastchild.data
399 if data.endswith("()"):
400 lastchild.data = data[:-2]
Fred Drake1ff6db41998-11-23 23:10:35 +0000401 else:
402 for child in node.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000403 if child.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000404 queue.append(child)
405
406
Fred Drakeaaed9711998-12-10 20:25:30 +0000407def contents_match(left, right):
408 left_children = left.childNodes
409 right_children = right.childNodes
410 if len(left_children) != len(right_children):
411 return 0
412 for l, r in map(None, left_children, right_children):
413 nodeType = l.nodeType
414 if nodeType != r.nodeType:
415 return 0
Fred Drakee779d4f1999-05-10 19:36:52 +0000416 if nodeType == ELEMENT:
Fred Drake3e8f9212001-03-23 17:01:47 +0000417 if l.tagName != r.tagName:
Fred Drakeaaed9711998-12-10 20:25:30 +0000418 return 0
419 # should check attributes, but that's not a problem here
420 if not contents_match(l, r):
421 return 0
Fred Drakee779d4f1999-05-10 19:36:52 +0000422 elif nodeType == TEXT:
Fred Drakeaaed9711998-12-10 20:25:30 +0000423 if l.data != r.data:
424 return 0
425 else:
426 # not quite right, but good enough
427 return 0
428 return 1
429
430
431def create_module_info(doc, section):
432 # Heavy.
433 node = extract_first_element(section, "modulesynopsis")
434 if node is None:
435 return
Fred Drake3e8f9212001-03-23 17:01:47 +0000436 set_tagName(node, "synopsis")
Fred Drakeaaed9711998-12-10 20:25:30 +0000437 lastchild = node.childNodes[-1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000438 if lastchild.nodeType == TEXT \
Fred Drakeaaed9711998-12-10 20:25:30 +0000439 and lastchild.data[-1:] == ".":
440 lastchild.data = lastchild.data[:-1]
Fred Drake4259f0d1999-01-19 23:09:31 +0000441 modauthor = extract_first_element(section, "moduleauthor")
442 if modauthor:
Fred Drake3e8f9212001-03-23 17:01:47 +0000443 set_tagName(modauthor, "author")
Fred Drake4259f0d1999-01-19 23:09:31 +0000444 modauthor.appendChild(doc.createTextNode(
445 modauthor.getAttribute("name")))
446 modauthor.removeAttribute("name")
Fred Drake87a42cd1999-03-11 17:35:12 +0000447 platform = extract_first_element(section, "platform")
Fred Drake3e8f9212001-03-23 17:01:47 +0000448 if section.tagName == "section":
Fred Drakeaaed9711998-12-10 20:25:30 +0000449 modinfo_pos = 2
450 modinfo = doc.createElement("moduleinfo")
451 moddecl = extract_first_element(section, "declaremodule")
452 name = None
453 if moddecl:
454 modinfo.appendChild(doc.createTextNode("\n "))
455 name = moddecl.attributes["name"].value
456 namenode = doc.createElement("name")
457 namenode.appendChild(doc.createTextNode(name))
458 modinfo.appendChild(namenode)
459 type = moddecl.attributes.get("type")
460 if type:
461 type = type.value
462 modinfo.appendChild(doc.createTextNode("\n "))
463 typenode = doc.createElement("type")
464 typenode.appendChild(doc.createTextNode(type))
465 modinfo.appendChild(typenode)
Fred Drake1dd152d1999-01-29 22:12:29 +0000466 versionadded = extract_first_element(section, "versionadded")
467 if versionadded:
468 modinfo.setAttribute("added", versionadded.getAttribute("version"))
Fred Drakeaaed9711998-12-10 20:25:30 +0000469 title = get_first_element(section, "title")
470 if title:
471 children = title.childNodes
472 if len(children) >= 2 \
Fred Drake3e8f9212001-03-23 17:01:47 +0000473 and children[0].nodeName == "module" \
Fred Drakeaaed9711998-12-10 20:25:30 +0000474 and children[0].childNodes[0].data == name:
475 # this is it; morph the <title> into <short-synopsis>
476 first_data = children[1]
477 if first_data.data[:4] == " ---":
Fred Drake3c171d12001-09-28 17:14:35 +0000478 first_data.data = first_data.data[4:].lstrip()
Fred Drake3e8f9212001-03-23 17:01:47 +0000479 set_tagName(title, "short-synopsis")
Fred Drakee779d4f1999-05-10 19:36:52 +0000480 if children[-1].nodeType == TEXT \
Fred Drake7dab6af1999-01-28 23:59:58 +0000481 and children[-1].data[-1:] == ".":
Fred Drakeaaed9711998-12-10 20:25:30 +0000482 children[-1].data = children[-1].data[:-1]
483 section.removeChild(title)
484 section.removeChild(section.childNodes[0])
485 title.removeChild(children[0])
486 modinfo_pos = 0
487 else:
Fred Drake080c1b51999-08-02 14:46:15 +0000488 ewrite("module name in title doesn't match"
489 " <declaremodule/>; no <short-synopsis/>\n")
Fred Drakeaaed9711998-12-10 20:25:30 +0000490 else:
Fred Drake080c1b51999-08-02 14:46:15 +0000491 ewrite("Unexpected condition: <section/> without <title/>\n")
Fred Drakeaaed9711998-12-10 20:25:30 +0000492 modinfo.appendChild(doc.createTextNode("\n "))
493 modinfo.appendChild(node)
494 if title and not contents_match(title, node):
495 # The short synopsis is actually different,
496 # and needs to be stored:
497 modinfo.appendChild(doc.createTextNode("\n "))
498 modinfo.appendChild(title)
Fred Drake4259f0d1999-01-19 23:09:31 +0000499 if modauthor:
500 modinfo.appendChild(doc.createTextNode("\n "))
501 modinfo.appendChild(modauthor)
Fred Drake87a42cd1999-03-11 17:35:12 +0000502 if platform:
503 modinfo.appendChild(doc.createTextNode("\n "))
504 modinfo.appendChild(platform)
Fred Drakeaaed9711998-12-10 20:25:30 +0000505 modinfo.appendChild(doc.createTextNode("\n "))
506 section.insertBefore(modinfo, section.childNodes[modinfo_pos])
507 section.insertBefore(doc.createTextNode("\n "), modinfo)
Fred Drake87a42cd1999-03-11 17:35:12 +0000508 #
509 # The rest of this removes extra newlines from where we cut out
510 # a lot of elements. A lot of code for minimal value, but keeps
Fred Drake080c1b51999-08-02 14:46:15 +0000511 # keeps the generated *ML from being too funny looking.
Fred Drake87a42cd1999-03-11 17:35:12 +0000512 #
513 section.normalize()
514 children = section.childNodes
515 for i in range(len(children)):
516 node = children[i]
Fred Drake3e8f9212001-03-23 17:01:47 +0000517 if node.nodeName == "moduleinfo":
Fred Drake87a42cd1999-03-11 17:35:12 +0000518 nextnode = children[i+1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000519 if nextnode.nodeType == TEXT:
Fred Drake87a42cd1999-03-11 17:35:12 +0000520 data = nextnode.data
Fred Drake3c171d12001-09-28 17:14:35 +0000521 s = data.lstrip()
522 if len(s) < (len(data) - 4):
523 nextnode.data = "\n\n\n" + s
Fred Drakeaaed9711998-12-10 20:25:30 +0000524
525
Fred Drake080c1b51999-08-02 14:46:15 +0000526def cleanup_synopses(doc, fragment):
527 for node in find_all_elements(fragment, "section"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000528 create_module_info(doc, node)
Fred Drakeaaed9711998-12-10 20:25:30 +0000529
530
Fred Drakee779d4f1999-05-10 19:36:52 +0000531def fixup_table_structures(doc, fragment):
Fred Drakee779d4f1999-05-10 19:36:52 +0000532 for table in find_all_elements(fragment, "table"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000533 fixup_table(doc, table)
534
Fred Drakef8ebb551999-01-14 19:45:38 +0000535
536def fixup_table(doc, table):
537 # create the table head
538 thead = doc.createElement("thead")
539 row = doc.createElement("row")
540 move_elements_by_name(doc, table, row, "entry")
541 thead.appendChild(doc.createTextNode("\n "))
542 thead.appendChild(row)
543 thead.appendChild(doc.createTextNode("\n "))
544 # create the table body
545 tbody = doc.createElement("tbody")
546 prev_row = None
547 last_was_hline = 0
548 children = table.childNodes
549 for child in children:
Fred Drakee779d4f1999-05-10 19:36:52 +0000550 if child.nodeType == ELEMENT:
Fred Drake3e8f9212001-03-23 17:01:47 +0000551 tagName = child.tagName
Fred Drakef8ebb551999-01-14 19:45:38 +0000552 if tagName == "hline" and prev_row is not None:
553 prev_row.setAttribute("rowsep", "1")
554 elif tagName == "row":
555 prev_row = child
556 # save the rows:
557 tbody.appendChild(doc.createTextNode("\n "))
558 move_elements_by_name(doc, table, tbody, "row", sep="\n ")
559 # and toss the rest:
560 while children:
561 child = children[0]
562 nodeType = child.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000563 if nodeType == TEXT:
Fred Drake3c171d12001-09-28 17:14:35 +0000564 if child.data.strip():
Fred Drake3e8f9212001-03-23 17:01:47 +0000565 raise ConversionError("unexpected free data in <%s>: %r"
566 % (table.tagName, child.data))
Fred Drakef8ebb551999-01-14 19:45:38 +0000567 table.removeChild(child)
568 continue
Fred Drakee779d4f1999-05-10 19:36:52 +0000569 if nodeType == ELEMENT:
Fred Drake3e8f9212001-03-23 17:01:47 +0000570 if child.tagName != "hline":
Fred Drakef8ebb551999-01-14 19:45:38 +0000571 raise ConversionError(
Fred Drake3e8f9212001-03-23 17:01:47 +0000572 "unexpected <%s> in table" % child.tagName)
Fred Drakef8ebb551999-01-14 19:45:38 +0000573 table.removeChild(child)
574 continue
575 raise ConversionError(
576 "unexpected %s node in table" % child.__class__.__name__)
577 # nothing left in the <table>; add the <thead> and <tbody>
578 tgroup = doc.createElement("tgroup")
579 tgroup.appendChild(doc.createTextNode("\n "))
580 tgroup.appendChild(thead)
581 tgroup.appendChild(doc.createTextNode("\n "))
582 tgroup.appendChild(tbody)
583 tgroup.appendChild(doc.createTextNode("\n "))
584 table.appendChild(tgroup)
585 # now make the <entry>s look nice:
586 for row in table.getElementsByTagName("row"):
587 fixup_row(doc, row)
588
589
590def fixup_row(doc, row):
591 entries = []
592 map(entries.append, row.childNodes[1:])
593 for entry in entries:
594 row.insertBefore(doc.createTextNode("\n "), entry)
595# row.appendChild(doc.createTextNode("\n "))
596
597
598def move_elements_by_name(doc, source, dest, name, sep=None):
599 nodes = []
600 for child in source.childNodes:
Fred Drake3e8f9212001-03-23 17:01:47 +0000601 if child.nodeName == name:
Fred Drakef8ebb551999-01-14 19:45:38 +0000602 nodes.append(child)
603 for node in nodes:
604 source.removeChild(node)
605 dest.appendChild(node)
606 if sep:
607 dest.appendChild(doc.createTextNode(sep))
608
609
Fred Drake7dab6af1999-01-28 23:59:58 +0000610RECURSE_INTO_PARA_CONTAINERS = (
Fred Drakecb657811999-01-29 20:55:07 +0000611 "chapter", "abstract", "enumerate",
Fred Drake7dab6af1999-01-28 23:59:58 +0000612 "section", "subsection", "subsubsection",
Fred Drake865e9ff1999-07-29 22:23:19 +0000613 "paragraph", "subparagraph", "back-matter",
Fred Drakecb657811999-01-29 20:55:07 +0000614 "howto", "manual",
Fred Drake82ebc271999-08-03 15:32:48 +0000615 "item", "itemize", "fulllineitems", "enumeration", "descriptionlist",
616 "definitionlist", "definition",
Fred Drake4259f0d1999-01-19 23:09:31 +0000617 )
Fred Drakefcc59101999-01-06 22:50:52 +0000618
619PARA_LEVEL_ELEMENTS = (
Fred Drakecb657811999-01-29 20:55:07 +0000620 "moduleinfo", "title", "verbatim", "enumerate", "item",
Fred Drake865e9ff1999-07-29 22:23:19 +0000621 "interpreter-session", "back-matter", "interactive-session",
Fred Drakecb657811999-01-29 20:55:07 +0000622 "opcodedesc", "classdesc", "datadesc",
Fred Drake3c171d12001-09-28 17:14:35 +0000623 "cfuncdesc", "ctypedesc", "cvardesc",
Fred Drake865e9ff1999-07-29 22:23:19 +0000624 "funcdesc", "methoddesc", "excdesc", "memberdesc", "membderdescni",
Fred Drake7dab6af1999-01-28 23:59:58 +0000625 "funcdescni", "methoddescni", "excdescni",
Fred Drakefcc59101999-01-06 22:50:52 +0000626 "tableii", "tableiii", "tableiv", "localmoduletable",
Fred Drake82ebc271999-08-03 15:32:48 +0000627 "sectionauthor", "seealso", "itemize",
Fred Drakefcc59101999-01-06 22:50:52 +0000628 # include <para>, so we can just do it again to get subsequent paras:
Fred Drake865e9ff1999-07-29 22:23:19 +0000629 PARA_ELEMENT,
Fred Drakefcc59101999-01-06 22:50:52 +0000630 )
631
632PARA_LEVEL_PRECEEDERS = (
Fred Drakec81d4702001-03-29 23:31:22 +0000633 "setindexsubitem", "author",
Fred Drakebaacc082001-09-27 15:49:23 +0000634 "stindex", "obindex", "COMMENT", "label", "xi:include", "title",
Fred Drake865e9ff1999-07-29 22:23:19 +0000635 "versionadded", "versionchanged", "declaremodule", "modulesynopsis",
Fred Drake82ebc271999-08-03 15:32:48 +0000636 "moduleauthor", "indexterm", "leader",
Fred Drakefcc59101999-01-06 22:50:52 +0000637 )
638
Fred Drake7dab6af1999-01-28 23:59:58 +0000639
Fred Drakee779d4f1999-05-10 19:36:52 +0000640def fixup_paras(doc, fragment):
641 for child in fragment.childNodes:
Fred Drake3e8f9212001-03-23 17:01:47 +0000642 if child.nodeName in RECURSE_INTO_PARA_CONTAINERS:
Fred Drakefcc59101999-01-06 22:50:52 +0000643 fixup_paras_helper(doc, child)
Fred Drakee779d4f1999-05-10 19:36:52 +0000644 descriptions = find_all_elements(fragment, "description")
Fred Drakecb657811999-01-29 20:55:07 +0000645 for description in descriptions:
646 fixup_paras_helper(doc, description)
Fred Drakefcc59101999-01-06 22:50:52 +0000647
648
Fred Drake7dab6af1999-01-28 23:59:58 +0000649def fixup_paras_helper(doc, container, depth=0):
Fred Drakefcc59101999-01-06 22:50:52 +0000650 # document is already normalized
651 children = container.childNodes
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000652 start = skip_leading_nodes(children)
Fred Drake7dab6af1999-01-28 23:59:58 +0000653 while len(children) > start:
Fred Drake3e8f9212001-03-23 17:01:47 +0000654 if children[start].nodeName in RECURSE_INTO_PARA_CONTAINERS:
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000655 # Something to recurse into:
Fred Drake7dab6af1999-01-28 23:59:58 +0000656 fixup_paras_helper(doc, children[start])
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000657 else:
658 # Paragraph material:
659 build_para(doc, container, start, len(children))
660 if DEBUG_PARA_FIXER and depth == 10:
661 sys.exit(1)
662 start = skip_leading_nodes(children, start + 1)
Fred Drakefcc59101999-01-06 22:50:52 +0000663
664
665def build_para(doc, parent, start, i):
666 children = parent.childNodes
Fred Drakefcc59101999-01-06 22:50:52 +0000667 after = start + 1
668 have_last = 0
Fred Drakecb657811999-01-29 20:55:07 +0000669 BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + RECURSE_INTO_PARA_CONTAINERS
Fred Drake7dab6af1999-01-28 23:59:58 +0000670 # Collect all children until \n\n+ is found in a text node or a
671 # member of BREAK_ELEMENTS is found.
Fred Drakefcc59101999-01-06 22:50:52 +0000672 for j in range(start, i):
673 after = j + 1
674 child = children[j]
675 nodeType = child.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000676 if nodeType == ELEMENT:
Fred Drake3e8f9212001-03-23 17:01:47 +0000677 if child.tagName in BREAK_ELEMENTS:
Fred Drakefcc59101999-01-06 22:50:52 +0000678 after = j
679 break
Fred Drakee779d4f1999-05-10 19:36:52 +0000680 elif nodeType == TEXT:
Fred Drake3c171d12001-09-28 17:14:35 +0000681 pos = child.data.find("\n\n")
Fred Drakefcc59101999-01-06 22:50:52 +0000682 if pos == 0:
683 after = j
684 break
685 if pos >= 1:
686 child.splitText(pos)
687 break
688 else:
689 have_last = 1
Fred Drake7dab6af1999-01-28 23:59:58 +0000690 if (start + 1) > after:
691 raise ConversionError(
692 "build_para() could not identify content to turn into a paragraph")
Fred Drakee779d4f1999-05-10 19:36:52 +0000693 if children[after - 1].nodeType == TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000694 # we may need to split off trailing white space:
695 child = children[after - 1]
696 data = child.data
Fred Drake3c171d12001-09-28 17:14:35 +0000697 if data.rstrip() != data:
Fred Drakefcc59101999-01-06 22:50:52 +0000698 have_last = 0
Fred Drake3c171d12001-09-28 17:14:35 +0000699 child.splitText(len(data.rstrip()))
Fred Drake865e9ff1999-07-29 22:23:19 +0000700 para = doc.createElement(PARA_ELEMENT)
Fred Drakefcc59101999-01-06 22:50:52 +0000701 prev = None
702 indexes = range(start, after)
703 indexes.reverse()
704 for j in indexes:
Fred Drake7dab6af1999-01-28 23:59:58 +0000705 node = parent.childNodes[j]
Fred Drakefcc59101999-01-06 22:50:52 +0000706 parent.removeChild(node)
707 para.insertBefore(node, prev)
708 prev = node
709 if have_last:
710 parent.appendChild(para)
Fred Drake080c1b51999-08-02 14:46:15 +0000711 parent.appendChild(doc.createTextNode("\n\n"))
Fred Drake7dab6af1999-01-28 23:59:58 +0000712 return len(parent.childNodes)
Fred Drakefcc59101999-01-06 22:50:52 +0000713 else:
Fred Drake080c1b51999-08-02 14:46:15 +0000714 nextnode = parent.childNodes[start]
715 if nextnode.nodeType == TEXT:
716 if nextnode.data and nextnode.data[0] != "\n":
717 nextnode.data = "\n" + nextnode.data
718 else:
719 newnode = doc.createTextNode("\n")
720 parent.insertBefore(newnode, nextnode)
721 nextnode = newnode
722 start = start + 1
723 parent.insertBefore(para, nextnode)
Fred Drake7dab6af1999-01-28 23:59:58 +0000724 return start + 1
Fred Drakefcc59101999-01-06 22:50:52 +0000725
726
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000727def skip_leading_nodes(children, start=0):
Fred Drake7dab6af1999-01-28 23:59:58 +0000728 """Return index into children of a node at which paragraph building should
729 begin or a recursive call to fixup_paras_helper() should be made (for
730 subsections, etc.).
731
732 When the return value >= len(children), we've built all the paras we can
733 from this list of children.
734 """
735 i = len(children)
Fred Drakefcc59101999-01-06 22:50:52 +0000736 while i > start:
737 # skip over leading comments and whitespace:
Fred Drake7dab6af1999-01-28 23:59:58 +0000738 child = children[start]
Fred Drakefcc59101999-01-06 22:50:52 +0000739 nodeType = child.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000740 if nodeType == TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000741 data = child.data
Fred Drake3c171d12001-09-28 17:14:35 +0000742 shortened = data.lstrip()
Fred Drakefcc59101999-01-06 22:50:52 +0000743 if shortened:
744 if data != shortened:
745 # break into two nodes: whitespace and non-whitespace
746 child.splitText(len(data) - len(shortened))
Fred Drake7dab6af1999-01-28 23:59:58 +0000747 return start + 1
748 return start
Fred Drakefcc59101999-01-06 22:50:52 +0000749 # all whitespace, just skip
Fred Drakee779d4f1999-05-10 19:36:52 +0000750 elif nodeType == ELEMENT:
Fred Drake3e8f9212001-03-23 17:01:47 +0000751 tagName = child.tagName
Fred Drake7dab6af1999-01-28 23:59:58 +0000752 if tagName in RECURSE_INTO_PARA_CONTAINERS:
753 return start
754 if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
755 return start
756 start = start + 1
757 return start
Fred Drakefba0ba21998-12-10 05:07:09 +0000758
759
Fred Drakee779d4f1999-05-10 19:36:52 +0000760def fixup_rfc_references(doc, fragment):
Fred Drakeac1af802001-09-29 19:07:22 +0000761 for rfcnode in find_all_elements_from_set(fragment, ("pep", "rfc")):
Fred Drake7dab6af1999-01-28 23:59:58 +0000762 rfcnode.appendChild(doc.createTextNode(
Fred Drakeac1af802001-09-29 19:07:22 +0000763 rfcnode.tagName.upper() + " " + rfcnode.getAttribute("num")))
Fred Draked24167b1999-01-14 21:18:03 +0000764
765
Fred Drakee779d4f1999-05-10 19:36:52 +0000766def fixup_signatures(doc, fragment):
767 for child in fragment.childNodes:
768 if child.nodeType == ELEMENT:
Fred Draked24167b1999-01-14 21:18:03 +0000769 args = child.getElementsByTagName("args")
770 for arg in args:
Fred Drake583061a2001-09-29 05:05:25 +0000771 rewrite_args(doc, arg)
Fred Draked24167b1999-01-14 21:18:03 +0000772 args = child.getElementsByTagName("constructor-args")
773 for arg in args:
Fred Drake583061a2001-09-29 05:05:25 +0000774 rewrite_args(doc, arg)
Fred Draked24167b1999-01-14 21:18:03 +0000775
Fred Drake583061a2001-09-29 05:05:25 +0000776def rewrite_args(doc, arglist):
777 fixup_args(doc, arglist)
778 arglist.normalize()
779 if arglist.childNodes.length == 1 and arglist.firstChild.nodeType == TEXT:
780 node = arglist.firstChild
781 node.data = ' '.join(node.data.split())
Fred Draked24167b1999-01-14 21:18:03 +0000782
783def fixup_args(doc, arglist):
784 for child in arglist.childNodes:
Fred Drake3e8f9212001-03-23 17:01:47 +0000785 if child.nodeName == "optional":
Fred Draked24167b1999-01-14 21:18:03 +0000786 # found it; fix and return
787 arglist.insertBefore(doc.createTextNode("["), child)
788 optkids = child.childNodes
789 while optkids:
Fred Drake583061a2001-09-29 05:05:25 +0000790 arglist.insertBefore(child.firstChild, child)
Fred Draked24167b1999-01-14 21:18:03 +0000791 arglist.insertBefore(doc.createTextNode("]"), child)
792 arglist.removeChild(child)
793 return fixup_args(doc, arglist)
794
795
Fred Drakee779d4f1999-05-10 19:36:52 +0000796def fixup_sectionauthors(doc, fragment):
797 for sectauth in find_all_elements(fragment, "sectionauthor"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000798 section = sectauth.parentNode
799 section.removeChild(sectauth)
Fred Drake3e8f9212001-03-23 17:01:47 +0000800 set_tagName(sectauth, "author")
Fred Drake7dab6af1999-01-28 23:59:58 +0000801 sectauth.appendChild(doc.createTextNode(
802 sectauth.getAttribute("name")))
803 sectauth.removeAttribute("name")
804 after = section.childNodes[2]
805 title = section.childNodes[1]
Fred Drake3e8f9212001-03-23 17:01:47 +0000806 if title.nodeName != "title":
Fred Drake7dab6af1999-01-28 23:59:58 +0000807 after = section.childNodes[0]
808 section.insertBefore(doc.createTextNode("\n "), after)
809 section.insertBefore(sectauth, after)
810
811
Fred Drake93d762f1999-02-18 16:32:21 +0000812def fixup_verbatims(doc):
813 for verbatim in find_all_elements(doc, "verbatim"):
814 child = verbatim.childNodes[0]
Fred Drakee779d4f1999-05-10 19:36:52 +0000815 if child.nodeType == TEXT \
Fred Drake3c171d12001-09-28 17:14:35 +0000816 and child.data.lstrip().startswith(">>>"):
Fred Drake3e8f9212001-03-23 17:01:47 +0000817 set_tagName(verbatim, "interactive-session")
Fred Drake93d762f1999-02-18 16:32:21 +0000818
819
Fred Drake865e9ff1999-07-29 22:23:19 +0000820def add_node_ids(fragment, counter=0):
Fred Drake3e8f9212001-03-23 17:01:47 +0000821 fragment.node_id = counter
Fred Drake865e9ff1999-07-29 22:23:19 +0000822 for node in fragment.childNodes:
823 counter = counter + 1
824 if node.nodeType == ELEMENT:
825 counter = add_node_ids(node, counter)
826 else:
Fred Drake3e8f9212001-03-23 17:01:47 +0000827 node.node_id = counter
Fred Drake865e9ff1999-07-29 22:23:19 +0000828 return counter + 1
829
830
Fred Drake9213b7a2001-09-27 16:52:22 +0000831def fixup_ulink(doc, fragment):
832 for ulink in find_all_elements(fragment, "ulink"):
833 children = ulink.childNodes
834 assert len(children) == 2
835 text = children[0]
836 href = children[1]
837 href.normalize()
838 assert len(href.childNodes) == 1
839 assert href.childNodes[0].nodeType == TEXT
840 url = href.childNodes[0].data
841 ulink.setAttribute("href", url)
842 ulink.removeChild(href)
843 content = text.childNodes
844 while len(content):
845 ulink.appendChild(content[0])
846 ulink.removeChild(text)
847
848
Fred Drake865e9ff1999-07-29 22:23:19 +0000849REFMODINDEX_ELEMENTS = ('refmodindex', 'refbimodindex',
850 'refexmodindex', 'refstmodindex')
851
852def fixup_refmodindexes(fragment):
853 # Locate <ref*modindex>...</> co-located with <module>...</>, and
854 # remove the <ref*modindex>, replacing it with index=index on the
855 # <module> element.
856 nodes = find_all_elements_from_set(fragment, REFMODINDEX_ELEMENTS)
857 d = {}
858 for node in nodes:
859 parent = node.parentNode
Fred Drake3e8f9212001-03-23 17:01:47 +0000860 d[parent.node_id] = parent
Fred Drake865e9ff1999-07-29 22:23:19 +0000861 del nodes
862 map(fixup_refmodindexes_chunk, d.values())
863
864
865def fixup_refmodindexes_chunk(container):
866 # node is probably a <para>; let's see how often it isn't:
Fred Drake3e8f9212001-03-23 17:01:47 +0000867 if container.tagName != PARA_ELEMENT:
Fred Drake080c1b51999-08-02 14:46:15 +0000868 bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container)
Fred Drake865e9ff1999-07-29 22:23:19 +0000869 module_entries = find_all_elements(container, "module")
870 if not module_entries:
871 return
872 index_entries = find_all_elements_from_set(container, REFMODINDEX_ELEMENTS)
873 removes = []
874 for entry in index_entries:
875 children = entry.childNodes
876 if len(children) != 0:
Fred Drake080c1b51999-08-02 14:46:15 +0000877 bwrite("--- unexpected number of children for %s node:\n"
Fred Drake3e8f9212001-03-23 17:01:47 +0000878 % entry.tagName)
Fred Drake080c1b51999-08-02 14:46:15 +0000879 ewrite(entry.toxml() + "\n")
Fred Drake865e9ff1999-07-29 22:23:19 +0000880 continue
881 found = 0
Fred Drake82ebc271999-08-03 15:32:48 +0000882 module_name = entry.getAttribute("module")
Fred Drake865e9ff1999-07-29 22:23:19 +0000883 for node in module_entries:
884 if len(node.childNodes) != 1:
885 continue
886 this_name = node.childNodes[0].data
887 if this_name == module_name:
888 found = 1
Fred Drake080c1b51999-08-02 14:46:15 +0000889 node.setAttribute("index", "yes")
Fred Drake865e9ff1999-07-29 22:23:19 +0000890 if found:
891 removes.append(entry)
892 for node in removes:
893 container.removeChild(node)
894
895
896def fixup_bifuncindexes(fragment):
897 nodes = find_all_elements(fragment, 'bifuncindex')
898 d = {}
Fred Drake080c1b51999-08-02 14:46:15 +0000899 # make sure that each parent is only processed once:
Fred Drake865e9ff1999-07-29 22:23:19 +0000900 for node in nodes:
901 parent = node.parentNode
Fred Drake3e8f9212001-03-23 17:01:47 +0000902 d[parent.node_id] = parent
Fred Drake865e9ff1999-07-29 22:23:19 +0000903 del nodes
904 map(fixup_bifuncindexes_chunk, d.values())
905
906
907def fixup_bifuncindexes_chunk(container):
908 removes = []
Fred Drake080c1b51999-08-02 14:46:15 +0000909 entries = find_all_child_elements(container, "bifuncindex")
910 function_entries = find_all_child_elements(container, "function")
Fred Drake865e9ff1999-07-29 22:23:19 +0000911 for entry in entries:
912 function_name = entry.getAttribute("name")
913 found = 0
914 for func_entry in function_entries:
915 t2 = func_entry.childNodes[0].data
916 if t2[-2:] != "()":
917 continue
918 t2 = t2[:-2]
919 if t2 == function_name:
Fred Drake080c1b51999-08-02 14:46:15 +0000920 func_entry.setAttribute("index", "yes")
Fred Drake865e9ff1999-07-29 22:23:19 +0000921 func_entry.setAttribute("module", "__builtin__")
922 if not found:
Fred Drake865e9ff1999-07-29 22:23:19 +0000923 found = 1
Fred Drake080c1b51999-08-02 14:46:15 +0000924 removes.append(entry)
Fred Drake865e9ff1999-07-29 22:23:19 +0000925 for entry in removes:
926 container.removeChild(entry)
927
928
Fred Drake645af9f1999-11-23 21:52:03 +0000929def join_adjacent_elements(container, gi):
930 queue = [container]
931 while queue:
932 parent = queue.pop()
933 i = 0
Fred Drake3e8f9212001-03-23 17:01:47 +0000934 children = parent.childNodes
Fred Drake645af9f1999-11-23 21:52:03 +0000935 nchildren = len(children)
936 while i < (nchildren - 1):
937 child = children[i]
938 if child.nodeName == gi:
939 if children[i+1].nodeName == gi:
940 ewrite("--- merging two <%s/> elements\n" % gi)
941 child = children[i]
942 nextchild = children[i+1]
Fred Drake3e8f9212001-03-23 17:01:47 +0000943 nextchildren = nextchild.childNodes
Fred Drake645af9f1999-11-23 21:52:03 +0000944 while len(nextchildren):
945 node = nextchildren[0]
946 nextchild.removeChild(node)
947 child.appendChild(node)
948 parent.removeChild(nextchild)
949 continue
950 if child.nodeType == ELEMENT:
951 queue.append(child)
952 i = i + 1
953
954
Fred Drake4db5b461998-12-01 19:03:01 +0000955_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
Fred Drakefcc59101999-01-06 22:50:52 +0000956
Fred Drake4db5b461998-12-01 19:03:01 +0000957def write_esis(doc, ofp, knownempty):
958 for node in doc.childNodes:
959 nodeType = node.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000960 if nodeType == ELEMENT:
Fred Drake3e8f9212001-03-23 17:01:47 +0000961 gi = node.tagName
Fred Drake4db5b461998-12-01 19:03:01 +0000962 if knownempty(gi):
963 if node.hasChildNodes():
Collin Winter65d09d42007-03-21 02:11:39 +0000964 raise ValueError("declared-empty node <%s> has children" % gi)
Fred Drake4db5b461998-12-01 19:03:01 +0000965 ofp.write("e\n")
Fred Drake3e8f9212001-03-23 17:01:47 +0000966 for k, value in node.attributes.items():
Fred Drake4db5b461998-12-01 19:03:01 +0000967 if _token_rx.match(value):
968 dtype = "TOKEN"
969 else:
970 dtype = "CDATA"
971 ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
972 ofp.write("(%s\n" % gi)
973 write_esis(node, ofp, knownempty)
974 ofp.write(")%s\n" % gi)
Fred Drakee779d4f1999-05-10 19:36:52 +0000975 elif nodeType == TEXT:
Fred Drake4db5b461998-12-01 19:03:01 +0000976 ofp.write("-%s\n" % esistools.encode(node.data))
Fred Drakea20581c1999-08-26 17:51:56 +0000977 elif nodeType == ENTITY_REFERENCE:
Fred Drake3e8f9212001-03-23 17:01:47 +0000978 ofp.write("&%s\n" % node.nodeName)
Fred Drake4db5b461998-12-01 19:03:01 +0000979 else:
Collin Winter65d09d42007-03-21 02:11:39 +0000980 raise RuntimeError("unsupported node type: %s" % nodeType)
Fred Drake4db5b461998-12-01 19:03:01 +0000981
982
Fred Drake03204731998-11-23 17:02:03 +0000983def convert(ifp, ofp):
Fred Drake3e8f9212001-03-23 17:01:47 +0000984 events = esistools.parse(ifp)
985 toktype, doc = events.getEvent()
986 fragment = doc.createDocumentFragment()
987 events.expandNode(fragment)
988
Fred Drakee779d4f1999-05-10 19:36:52 +0000989 normalize(fragment)
990 simplify(doc, fragment)
Fred Drake865e9ff1999-07-29 22:23:19 +0000991 handle_labels(doc, fragment)
Fred Drakee779d4f1999-05-10 19:36:52 +0000992 handle_appendix(doc, fragment)
Fred Drake3c171d12001-09-28 17:14:35 +0000993 fixup_trailing_whitespace(doc, fragment, {
994 # element -> (before-end-tag, after-end-tag)
995 "abstract": ("\n", "\n"),
996 "title": ("", "\n"),
997 "chapter": ("\n", "\n\n\n"),
998 "section": ("\n", "\n\n\n"),
999 "subsection": ("\n", "\n\n"),
1000 "subsubsection": ("\n", "\n\n"),
1001 "paragraph": ("\n", "\n\n"),
1002 "subparagraph": ("\n", "\n\n"),
Fred Drake1e53fa72001-11-30 19:25:39 +00001003 "description": ("\n", "\n\n"),
Fred Drake3c171d12001-09-28 17:14:35 +00001004 "enumeration": ("\n", "\n\n"),
Fred Drake1e53fa72001-11-30 19:25:39 +00001005 "item": ("\n", "\n\n"),
Fred Drake1ff6db41998-11-23 23:10:35 +00001006 })
Fred Drake03204731998-11-23 17:02:03 +00001007 cleanup_root_text(doc)
Fred Drake080c1b51999-08-02 14:46:15 +00001008 cleanup_trailing_parens(fragment, ["function", "method", "cfunction"])
1009 cleanup_synopses(doc, fragment)
Fred Drakee779d4f1999-05-10 19:36:52 +00001010 fixup_descriptors(doc, fragment)
1011 fixup_verbatims(fragment)
1012 normalize(fragment)
1013 fixup_paras(doc, fragment)
1014 fixup_sectionauthors(doc, fragment)
Fred Drakee779d4f1999-05-10 19:36:52 +00001015 fixup_table_structures(doc, fragment)
1016 fixup_rfc_references(doc, fragment)
1017 fixup_signatures(doc, fragment)
Fred Drake9213b7a2001-09-27 16:52:22 +00001018 fixup_ulink(doc, fragment)
Fred Drake865e9ff1999-07-29 22:23:19 +00001019 add_node_ids(fragment)
1020 fixup_refmodindexes(fragment)
1021 fixup_bifuncindexes(fragment)
Fred Drake645af9f1999-11-23 21:52:03 +00001022 # Take care of ugly hacks in the LaTeX markup to avoid LaTeX and
1023 # LaTeX2HTML screwing with GNU-style long options (the '--' problem).
1024 join_adjacent_elements(fragment, "option")
Fred Drake27ae3112001-11-19 05:28:29 +00001025 # Attempt to avoid trailing blank lines:
1026 fragment.normalize()
1027 if fragment.lastChild.data[-1:] == "\n":
1028 fragment.lastChild.data = fragment.lastChild.data.rstrip() + "\n"
Fred Drake4db5b461998-12-01 19:03:01 +00001029 #
1030 d = {}
Fred Drake3e8f9212001-03-23 17:01:47 +00001031 for gi in events.parser.get_empties():
Fred Drake4db5b461998-12-01 19:03:01 +00001032 d[gi] = gi
Fred Drakeac1af802001-09-29 19:07:22 +00001033 for key in ("author", "pep", "rfc"):
Collin Winter65d09d42007-03-21 02:11:39 +00001034 if key in d:
Fred Drakeac1af802001-09-29 19:07:22 +00001035 del d[key]
Fred Drake4db5b461998-12-01 19:03:01 +00001036 knownempty = d.has_key
1037 #
Fred Drake03204731998-11-23 17:02:03 +00001038 try:
Fred Drakee779d4f1999-05-10 19:36:52 +00001039 write_esis(fragment, ofp, knownempty)
Guido van Rossumb940e112007-01-10 16:19:56 +00001040 except IOError as e:
1041 (err, msg) = e
Fred Drake03204731998-11-23 17:02:03 +00001042 # Ignore EPIPE; it just means that whoever we're writing to stopped
1043 # reading. The rest of the output would be ignored. All other errors
1044 # should still be reported,
1045 if err != errno.EPIPE:
1046 raise
1047
1048
1049def main():
1050 if len(sys.argv) == 1:
1051 ifp = sys.stdin
1052 ofp = sys.stdout
1053 elif len(sys.argv) == 2:
1054 ifp = open(sys.argv[1])
1055 ofp = sys.stdout
1056 elif len(sys.argv) == 3:
1057 ifp = open(sys.argv[1])
Fred Drake3e8f9212001-03-23 17:01:47 +00001058 import StringIO
1059 ofp = StringIO.StringIO()
Fred Drake03204731998-11-23 17:02:03 +00001060 else:
1061 usage()
1062 sys.exit(2)
1063 convert(ifp, ofp)
Fred Drake3e8f9212001-03-23 17:01:47 +00001064 if len(sys.argv) == 3:
1065 fp = open(sys.argv[2], "w")
1066 fp.write(ofp.getvalue())
1067 fp.close()
1068 ofp.close()
Fred Drake03204731998-11-23 17:02:03 +00001069
1070
1071if __name__ == "__main__":
1072 main()