blob: 983601b7057283e59a50b4265357d150a056d3a0 [file] [log] [blame]
Fred Drake03204731998-11-23 17:02:03 +00001#! /usr/bin/env python
2
Fred Drake7dab6af1999-01-28 23:59:58 +00003"""Perform massive transformations on a document tree created from the LaTeX
4of the Python documentation, and dump the ESIS data for the transformed tree.
Fred Drake03204731998-11-23 17:02:03 +00005"""
6__version__ = '$Revision$'
7
8
9import errno
Fred Drake4db5b461998-12-01 19:03:01 +000010import esistools
11import re
Fred Drake03204731998-11-23 17:02:03 +000012import string
13import sys
14import xml.dom.core
Fred Drakee779d4f1999-05-10 19:36:52 +000015
16from xml.dom.core import \
17 ELEMENT, \
18 TEXT
Fred Drake03204731998-11-23 17:02:03 +000019
20
Fred Drakef8ebb551999-01-14 19:45:38 +000021class ConversionError(Exception):
22 pass
23
24
Fred Drake080c1b51999-08-02 14:46:15 +000025ewrite = sys.stderr.write
26try:
27 # We can only do this trick on Unix (if tput is on $PATH)!
28 if sys.platform != "posix" or not sys.stderr.isatty():
29 raise ImportError
30 import curses
31 import commands
32except ImportError:
33 bwrite = ewrite
34else:
35 def bwrite(s, BOLDON=commands.getoutput("tput bold"),
36 BOLDOFF=commands.getoutput("tput sgr0")):
37 ewrite("%s%s%s" % (BOLDON, s, BOLDOFF))
38
39
Fred Drake865e9ff1999-07-29 22:23:19 +000040PARA_ELEMENT = "para"
41
Fred Drakefcc59101999-01-06 22:50:52 +000042DEBUG_PARA_FIXER = 0
43
Fred Drake7dab6af1999-01-28 23:59:58 +000044if DEBUG_PARA_FIXER:
45 def para_msg(s):
Fred Drake080c1b51999-08-02 14:46:15 +000046 ewrite("*** %s\n" % s)
Fred Drake7dab6af1999-01-28 23:59:58 +000047else:
48 def para_msg(s):
49 pass
50
Fred Drakefcc59101999-01-06 22:50:52 +000051
Fred Drake03204731998-11-23 17:02:03 +000052# Workaround to deal with invalid documents (multiple root elements). This
53# does not indicate a bug in the DOM implementation.
54#
Fred Drakee779d4f1999-05-10 19:36:52 +000055def get_documentElement(doc):
Fred Drake03204731998-11-23 17:02:03 +000056 docelem = None
Fred Drakee779d4f1999-05-10 19:36:52 +000057 for n in doc.childNodes:
58 if n.nodeType == ELEMENT:
59 docelem = n
Fred Drake03204731998-11-23 17:02:03 +000060 return docelem
61
62xml.dom.core.Document.get_documentElement = get_documentElement
63
64
65# Replace get_childNodes for the Document class; without this, children
66# accessed from the Document object via .childNodes (no matter how many
67# levels of access are used) will be given an ownerDocument of None.
68#
Fred Drakee779d4f1999-05-10 19:36:52 +000069def get_childNodes(doc):
70 return xml.dom.core.NodeList(doc._node.children, doc._node)
Fred Drake03204731998-11-23 17:02:03 +000071
72xml.dom.core.Document.get_childNodes = get_childNodes
73
74
75def get_first_element(doc, gi):
76 for n in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +000077 if n.nodeType == ELEMENT and n.tagName == gi:
Fred Drake03204731998-11-23 17:02:03 +000078 return n
79
80def extract_first_element(doc, gi):
81 node = get_first_element(doc, gi)
82 if node is not None:
83 doc.removeChild(node)
84 return node
85
86
Fred Drake7dab6af1999-01-28 23:59:58 +000087def find_all_elements(doc, gi):
88 nodes = []
Fred Drakee779d4f1999-05-10 19:36:52 +000089 if doc.nodeType == ELEMENT and doc.tagName == gi:
Fred Drake7dab6af1999-01-28 23:59:58 +000090 nodes.append(doc)
91 for child in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +000092 if child.nodeType == ELEMENT:
Fred Drake7dab6af1999-01-28 23:59:58 +000093 if child.tagName == gi:
94 nodes.append(child)
95 for node in child.getElementsByTagName(gi):
96 nodes.append(node)
Fred Drake865e9ff1999-07-29 22:23:19 +000097 return nodes
98
Fred Drake080c1b51999-08-02 14:46:15 +000099def find_all_child_elements(doc, gi):
100 nodes = []
101 for child in doc.childNodes:
102 if child.nodeType == ELEMENT:
103 if child.tagName == gi:
104 nodes.append(child)
105 return nodes
106
107def find_all_elements_from_set(doc, gi_set):
108 return __find_all_elements_from_set(doc, gi_set, [])
109
110def __find_all_elements_from_set(doc, gi_set, nodes):
Fred Drake865e9ff1999-07-29 22:23:19 +0000111 if doc.nodeType == ELEMENT and doc.tagName in gi_set:
112 nodes.append(doc)
113 for child in doc.childNodes:
114 if child.nodeType == ELEMENT:
Fred Drake080c1b51999-08-02 14:46:15 +0000115 __find_all_elements_from_set(child, gi_set, nodes)
Fred Drake865e9ff1999-07-29 22:23:19 +0000116 return nodes
Fred Drake7dab6af1999-01-28 23:59:58 +0000117
118
Fred Drakee779d4f1999-05-10 19:36:52 +0000119def simplify(doc, fragment):
Fred Drake03204731998-11-23 17:02:03 +0000120 # Try to rationalize the document a bit, since these things are simply
121 # not valid SGML/XML documents as they stand, and need a little work.
122 documentclass = "document"
123 inputs = []
Fred Drakee779d4f1999-05-10 19:36:52 +0000124 node = extract_first_element(fragment, "documentclass")
Fred Drake03204731998-11-23 17:02:03 +0000125 if node is not None:
126 documentclass = node.getAttribute("classname")
Fred Drakee779d4f1999-05-10 19:36:52 +0000127 node = extract_first_element(fragment, "title")
Fred Drake03204731998-11-23 17:02:03 +0000128 if node is not None:
129 inputs.append(node)
130 # update the name of the root element
Fred Drakee779d4f1999-05-10 19:36:52 +0000131 node = get_first_element(fragment, "document")
Fred Drake03204731998-11-23 17:02:03 +0000132 if node is not None:
133 node._node.name = documentclass
134 while 1:
Fred Drakee779d4f1999-05-10 19:36:52 +0000135 node = extract_first_element(fragment, "input")
Fred Drake03204731998-11-23 17:02:03 +0000136 if node is None:
137 break
138 inputs.append(node)
139 if inputs:
Fred Drakee779d4f1999-05-10 19:36:52 +0000140 docelem = get_documentElement(fragment)
Fred Drake03204731998-11-23 17:02:03 +0000141 inputs.reverse()
142 for node in inputs:
143 text = doc.createTextNode("\n")
144 docelem.insertBefore(text, docelem.firstChild)
145 docelem.insertBefore(node, text)
146 docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
Fred Drake865e9ff1999-07-29 22:23:19 +0000147 while fragment.firstChild and fragment.firstChild.nodeType == TEXT:
Fred Drakee779d4f1999-05-10 19:36:52 +0000148 fragment.removeChild(fragment.firstChild)
Fred Drake03204731998-11-23 17:02:03 +0000149
150
151def cleanup_root_text(doc):
152 discards = []
153 skip = 0
154 for n in doc.childNodes:
155 prevskip = skip
156 skip = 0
Fred Drakee779d4f1999-05-10 19:36:52 +0000157 if n.nodeType == TEXT and not prevskip:
Fred Drake03204731998-11-23 17:02:03 +0000158 discards.append(n)
Fred Drakee779d4f1999-05-10 19:36:52 +0000159 elif n.nodeType == ELEMENT and n.tagName == "COMMENT":
Fred Drake03204731998-11-23 17:02:03 +0000160 skip = 1
161 for node in discards:
162 doc.removeChild(node)
163
164
Fred Drakecb657811999-01-29 20:55:07 +0000165DESCRIPTOR_ELEMENTS = (
166 "cfuncdesc", "cvardesc", "ctypedesc",
167 "classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni",
168 "excdesc", "funcdesc", "funcdescni", "opcodedesc",
169 "datadesc", "datadescni",
170 )
171
Fred Drakee779d4f1999-05-10 19:36:52 +0000172def fixup_descriptors(doc, fragment):
173 sections = find_all_elements(fragment, "section")
Fred Drake3a7ff991999-01-29 21:31:12 +0000174 for section in sections:
175 find_and_fix_descriptors(doc, section)
176
177
178def find_and_fix_descriptors(doc, container):
179 children = container.childNodes
180 for child in children:
Fred Drakee779d4f1999-05-10 19:36:52 +0000181 if child.nodeType == ELEMENT:
Fred Drake3a7ff991999-01-29 21:31:12 +0000182 tagName = child.tagName
183 if tagName in DESCRIPTOR_ELEMENTS:
184 rewrite_descriptor(doc, child)
185 elif tagName == "subsection":
186 find_and_fix_descriptors(doc, child)
187
Fred Drakecb657811999-01-29 20:55:07 +0000188
189def rewrite_descriptor(doc, descriptor):
190 #
191 # Do these things:
Fred Drake080c1b51999-08-02 14:46:15 +0000192 # 1. Add an "index='no'" attribute to the element if the tagName
Fred Drakecb657811999-01-29 20:55:07 +0000193 # ends in 'ni', removing the 'ni' from the name.
194 # 2. Create a <signature> from the name attribute and <args>.
195 # 3. Create additional <signature>s from <*line{,ni}> elements,
196 # if found.
Fred Drake1dd152d1999-01-29 22:12:29 +0000197 # 4. If a <versionadded> is found, move it to an attribute on the
198 # descriptor.
199 # 5. Move remaining child nodes to a <description> element.
200 # 6. Put it back together.
Fred Drakecb657811999-01-29 20:55:07 +0000201 #
202 descname = descriptor.tagName
203 index = 1
204 if descname[-2:] == "ni":
205 descname = descname[:-2]
Fred Drake080c1b51999-08-02 14:46:15 +0000206 descriptor.setAttribute("index", "no")
Fred Drakecb657811999-01-29 20:55:07 +0000207 descriptor._node.name = descname
208 index = 0
209 desctype = descname[:-4] # remove 'desc'
210 linename = desctype + "line"
211 if not index:
212 linename = linename + "ni"
213 # 2.
214 signature = doc.createElement("signature")
215 name = doc.createElement("name")
216 signature.appendChild(doc.createTextNode("\n "))
217 signature.appendChild(name)
218 name.appendChild(doc.createTextNode(descriptor.getAttribute("name")))
219 descriptor.removeAttribute("name")
220 if descriptor.attributes.has_key("var"):
221 variable = descriptor.getAttribute("var")
222 if variable:
223 args = doc.createElement("args")
224 args.appendChild(doc.createTextNode(variable))
Fred Drake7dab6af1999-01-28 23:59:58 +0000225 signature.appendChild(doc.createTextNode("\n "))
Fred Drakecb657811999-01-29 20:55:07 +0000226 signature.appendChild(args)
227 descriptor.removeAttribute("var")
228 newchildren = [signature]
229 children = descriptor.childNodes
230 pos = skip_leading_nodes(children, 0)
231 if pos < len(children):
232 child = children[pos]
Fred Drakee779d4f1999-05-10 19:36:52 +0000233 if child.nodeType == ELEMENT and child.tagName == "args":
Fred Drakecb657811999-01-29 20:55:07 +0000234 # create an <args> in <signature>:
235 args = doc.createElement("args")
236 argchildren = []
237 map(argchildren.append, child.childNodes)
238 for n in argchildren:
239 child.removeChild(n)
240 args.appendChild(n)
241 signature.appendChild(doc.createTextNode("\n "))
242 signature.appendChild(args)
243 signature.appendChild(doc.createTextNode("\n "))
Fred Drake1dd152d1999-01-29 22:12:29 +0000244 # 3, 4.
Fred Drakecb657811999-01-29 20:55:07 +0000245 pos = skip_leading_nodes(children, pos + 1)
246 while pos < len(children) \
Fred Drakee779d4f1999-05-10 19:36:52 +0000247 and children[pos].nodeType == ELEMENT \
Fred Drake1dd152d1999-01-29 22:12:29 +0000248 and children[pos].tagName in (linename, "versionadded"):
249 if children[pos].tagName == linename:
250 # this is really a supplemental signature, create <signature>
251 sig = methodline_to_signature(doc, children[pos])
252 newchildren.append(sig)
253 else:
254 # <versionadded added=...>
255 descriptor.setAttribute(
256 "added", children[pos].getAttribute("version"))
Fred Drakecb657811999-01-29 20:55:07 +0000257 pos = skip_leading_nodes(children, pos + 1)
Fred Drake1dd152d1999-01-29 22:12:29 +0000258 # 5.
Fred Drakecb657811999-01-29 20:55:07 +0000259 description = doc.createElement("description")
260 description.appendChild(doc.createTextNode("\n"))
261 newchildren.append(description)
262 move_children(descriptor, description, pos)
263 last = description.childNodes[-1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000264 if last.nodeType == TEXT:
Fred Drakecb657811999-01-29 20:55:07 +0000265 last.data = string.rstrip(last.data) + "\n "
Fred Drake1dd152d1999-01-29 22:12:29 +0000266 # 6.
Fred Drakecb657811999-01-29 20:55:07 +0000267 # should have nothing but whitespace and signature lines in <descriptor>;
268 # discard them
269 while descriptor.childNodes:
270 descriptor.removeChild(descriptor.childNodes[0])
271 for node in newchildren:
272 descriptor.appendChild(doc.createTextNode("\n "))
273 descriptor.appendChild(node)
274 descriptor.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000275
Fred Drake7dab6af1999-01-28 23:59:58 +0000276
277def methodline_to_signature(doc, methodline):
278 signature = doc.createElement("signature")
279 signature.appendChild(doc.createTextNode("\n "))
280 name = doc.createElement("name")
281 name.appendChild(doc.createTextNode(methodline.getAttribute("name")))
Fred Drakecb657811999-01-29 20:55:07 +0000282 methodline.removeAttribute("name")
Fred Drake7dab6af1999-01-28 23:59:58 +0000283 signature.appendChild(name)
Fred Drake7dab6af1999-01-28 23:59:58 +0000284 if len(methodline.childNodes):
Fred Drakecb657811999-01-29 20:55:07 +0000285 args = doc.createElement("args")
Fred Drake7dab6af1999-01-28 23:59:58 +0000286 signature.appendChild(doc.createTextNode("\n "))
Fred Drakecb657811999-01-29 20:55:07 +0000287 signature.appendChild(args)
288 move_children(methodline, args)
Fred Drake7dab6af1999-01-28 23:59:58 +0000289 signature.appendChild(doc.createTextNode("\n "))
290 return signature
Fred Drake03204731998-11-23 17:02:03 +0000291
292
Fred Drakecb657811999-01-29 20:55:07 +0000293def move_children(origin, dest, start=0):
294 children = origin.childNodes
295 while start < len(children):
296 node = children[start]
297 origin.removeChild(node)
298 dest.appendChild(node)
299
300
Fred Drakee779d4f1999-05-10 19:36:52 +0000301def handle_appendix(doc, fragment):
Fred Drake4db5b461998-12-01 19:03:01 +0000302 # must be called after simplfy() if document is multi-rooted to begin with
Fred Drakee779d4f1999-05-10 19:36:52 +0000303 docelem = get_documentElement(fragment)
Fred Drake4db5b461998-12-01 19:03:01 +0000304 toplevel = docelem.tagName == "manual" and "chapter" or "section"
305 appendices = 0
306 nodes = []
307 for node in docelem.childNodes:
308 if appendices:
309 nodes.append(node)
Fred Drakee779d4f1999-05-10 19:36:52 +0000310 elif node.nodeType == ELEMENT:
Fred Drake4db5b461998-12-01 19:03:01 +0000311 appnodes = node.getElementsByTagName("appendix")
312 if appnodes:
313 appendices = 1
314 parent = appnodes[0].parentNode
315 parent.removeChild(appnodes[0])
316 parent.normalize()
317 if nodes:
318 map(docelem.removeChild, nodes)
319 docelem.appendChild(doc.createTextNode("\n\n\n"))
320 back = doc.createElement("back-matter")
321 docelem.appendChild(back)
322 back.appendChild(doc.createTextNode("\n"))
Fred Drakee779d4f1999-05-10 19:36:52 +0000323 while nodes and nodes[0].nodeType == TEXT \
Fred Drake4db5b461998-12-01 19:03:01 +0000324 and not string.strip(nodes[0].data):
325 del nodes[0]
326 map(back.appendChild, nodes)
327 docelem.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000328
329
Fred Drake865e9ff1999-07-29 22:23:19 +0000330def handle_labels(doc, fragment):
331 for label in find_all_elements(fragment, "label"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000332 id = label.getAttribute("id")
333 if not id:
334 continue
335 parent = label.parentNode
336 if parent.tagName == "title":
337 parent.parentNode.setAttribute("id", id)
338 else:
339 parent.setAttribute("id", id)
340 # now, remove <label id="..."/> from parent:
341 parent.removeChild(label)
Fred Drake865e9ff1999-07-29 22:23:19 +0000342 if parent.tagName == "title":
343 parent.normalize()
344 children = parent.childNodes
345 if children[-1].nodeType == TEXT:
346 children[-1].data = string.rstrip(children[-1].data)
Fred Drake03204731998-11-23 17:02:03 +0000347
348
Fred Drake1ff6db41998-11-23 23:10:35 +0000349def fixup_trailing_whitespace(doc, wsmap):
350 queue = [doc]
351 while queue:
352 node = queue[0]
353 del queue[0]
Fred Drakee779d4f1999-05-10 19:36:52 +0000354 if node.nodeType == ELEMENT \
Fred Drake1ff6db41998-11-23 23:10:35 +0000355 and wsmap.has_key(node.tagName):
356 ws = wsmap[node.tagName]
357 children = node.childNodes
358 children.reverse()
Fred Drakee779d4f1999-05-10 19:36:52 +0000359 if children[0].nodeType == TEXT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000360 data = string.rstrip(children[0].data) + ws
361 children[0].data = data
362 children.reverse()
363 # hack to get the title in place:
364 if node.tagName == "title" \
Fred Drakee779d4f1999-05-10 19:36:52 +0000365 and node.parentNode.firstChild.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000366 node.parentNode.insertBefore(doc.createText("\n "),
367 node.parentNode.firstChild)
368 for child in node.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000369 if child.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000370 queue.append(child)
371
372
373def normalize(doc):
374 for node in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000375 if node.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000376 node.normalize()
377
378
379def cleanup_trailing_parens(doc, element_names):
380 d = {}
381 for gi in element_names:
382 d[gi] = gi
383 rewrite_element = d.has_key
384 queue = []
385 for node in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000386 if node.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000387 queue.append(node)
388 while queue:
389 node = queue[0]
390 del queue[0]
391 if rewrite_element(node.tagName):
392 children = node.childNodes
393 if len(children) == 1 \
Fred Drakee779d4f1999-05-10 19:36:52 +0000394 and children[0].nodeType == TEXT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000395 data = children[0].data
396 if data[-2:] == "()":
397 children[0].data = data[:-2]
398 else:
399 for child in node.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000400 if child.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000401 queue.append(child)
402
403
Fred Drakeaaed9711998-12-10 20:25:30 +0000404def contents_match(left, right):
405 left_children = left.childNodes
406 right_children = right.childNodes
407 if len(left_children) != len(right_children):
408 return 0
409 for l, r in map(None, left_children, right_children):
410 nodeType = l.nodeType
411 if nodeType != r.nodeType:
412 return 0
Fred Drakee779d4f1999-05-10 19:36:52 +0000413 if nodeType == ELEMENT:
Fred Drakeaaed9711998-12-10 20:25:30 +0000414 if l.tagName != r.tagName:
415 return 0
416 # should check attributes, but that's not a problem here
417 if not contents_match(l, r):
418 return 0
Fred Drakee779d4f1999-05-10 19:36:52 +0000419 elif nodeType == TEXT:
Fred Drakeaaed9711998-12-10 20:25:30 +0000420 if l.data != r.data:
421 return 0
422 else:
423 # not quite right, but good enough
424 return 0
425 return 1
426
427
428def create_module_info(doc, section):
429 # Heavy.
430 node = extract_first_element(section, "modulesynopsis")
431 if node is None:
432 return
433 node._node.name = "synopsis"
434 lastchild = node.childNodes[-1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000435 if lastchild.nodeType == TEXT \
Fred Drakeaaed9711998-12-10 20:25:30 +0000436 and lastchild.data[-1:] == ".":
437 lastchild.data = lastchild.data[:-1]
Fred Drake4259f0d1999-01-19 23:09:31 +0000438 modauthor = extract_first_element(section, "moduleauthor")
439 if modauthor:
440 modauthor._node.name = "author"
441 modauthor.appendChild(doc.createTextNode(
442 modauthor.getAttribute("name")))
443 modauthor.removeAttribute("name")
Fred Drake87a42cd1999-03-11 17:35:12 +0000444 platform = extract_first_element(section, "platform")
Fred Drakeaaed9711998-12-10 20:25:30 +0000445 if section.tagName == "section":
446 modinfo_pos = 2
447 modinfo = doc.createElement("moduleinfo")
448 moddecl = extract_first_element(section, "declaremodule")
449 name = None
450 if moddecl:
451 modinfo.appendChild(doc.createTextNode("\n "))
452 name = moddecl.attributes["name"].value
453 namenode = doc.createElement("name")
454 namenode.appendChild(doc.createTextNode(name))
455 modinfo.appendChild(namenode)
456 type = moddecl.attributes.get("type")
457 if type:
458 type = type.value
459 modinfo.appendChild(doc.createTextNode("\n "))
460 typenode = doc.createElement("type")
461 typenode.appendChild(doc.createTextNode(type))
462 modinfo.appendChild(typenode)
Fred Drake1dd152d1999-01-29 22:12:29 +0000463 versionadded = extract_first_element(section, "versionadded")
464 if versionadded:
465 modinfo.setAttribute("added", versionadded.getAttribute("version"))
Fred Drakeaaed9711998-12-10 20:25:30 +0000466 title = get_first_element(section, "title")
467 if title:
468 children = title.childNodes
469 if len(children) >= 2 \
Fred Drakee779d4f1999-05-10 19:36:52 +0000470 and children[0].nodeType == ELEMENT \
Fred Drakeaaed9711998-12-10 20:25:30 +0000471 and children[0].tagName == "module" \
472 and children[0].childNodes[0].data == name:
473 # this is it; morph the <title> into <short-synopsis>
474 first_data = children[1]
475 if first_data.data[:4] == " ---":
476 first_data.data = string.lstrip(first_data.data[4:])
477 title._node.name = "short-synopsis"
Fred Drakee779d4f1999-05-10 19:36:52 +0000478 if children[-1].nodeType == TEXT \
Fred Drake7dab6af1999-01-28 23:59:58 +0000479 and children[-1].data[-1:] == ".":
Fred Drakeaaed9711998-12-10 20:25:30 +0000480 children[-1].data = children[-1].data[:-1]
481 section.removeChild(title)
482 section.removeChild(section.childNodes[0])
483 title.removeChild(children[0])
484 modinfo_pos = 0
485 else:
Fred Drake080c1b51999-08-02 14:46:15 +0000486 ewrite("module name in title doesn't match"
487 " <declaremodule/>; no <short-synopsis/>\n")
Fred Drakeaaed9711998-12-10 20:25:30 +0000488 else:
Fred Drake080c1b51999-08-02 14:46:15 +0000489 ewrite("Unexpected condition: <section/> without <title/>\n")
Fred Drakeaaed9711998-12-10 20:25:30 +0000490 modinfo.appendChild(doc.createTextNode("\n "))
491 modinfo.appendChild(node)
492 if title and not contents_match(title, node):
493 # The short synopsis is actually different,
494 # and needs to be stored:
495 modinfo.appendChild(doc.createTextNode("\n "))
496 modinfo.appendChild(title)
Fred Drake4259f0d1999-01-19 23:09:31 +0000497 if modauthor:
498 modinfo.appendChild(doc.createTextNode("\n "))
499 modinfo.appendChild(modauthor)
Fred Drake87a42cd1999-03-11 17:35:12 +0000500 if platform:
501 modinfo.appendChild(doc.createTextNode("\n "))
502 modinfo.appendChild(platform)
Fred Drakeaaed9711998-12-10 20:25:30 +0000503 modinfo.appendChild(doc.createTextNode("\n "))
504 section.insertBefore(modinfo, section.childNodes[modinfo_pos])
505 section.insertBefore(doc.createTextNode("\n "), modinfo)
Fred Drake87a42cd1999-03-11 17:35:12 +0000506 #
507 # The rest of this removes extra newlines from where we cut out
508 # a lot of elements. A lot of code for minimal value, but keeps
Fred Drake080c1b51999-08-02 14:46:15 +0000509 # keeps the generated *ML from being too funny looking.
Fred Drake87a42cd1999-03-11 17:35:12 +0000510 #
511 section.normalize()
512 children = section.childNodes
513 for i in range(len(children)):
514 node = children[i]
Fred Drakee779d4f1999-05-10 19:36:52 +0000515 if node.nodeType == ELEMENT \
Fred Drake87a42cd1999-03-11 17:35:12 +0000516 and node.tagName == "moduleinfo":
517 nextnode = children[i+1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000518 if nextnode.nodeType == TEXT:
Fred Drake87a42cd1999-03-11 17:35:12 +0000519 data = nextnode.data
520 if len(string.lstrip(data)) < (len(data) - 4):
521 nextnode.data = "\n\n\n" + string.lstrip(data)
Fred Drakeaaed9711998-12-10 20:25:30 +0000522
523
Fred Drake080c1b51999-08-02 14:46:15 +0000524def cleanup_synopses(doc, fragment):
525 for node in find_all_elements(fragment, "section"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000526 create_module_info(doc, node)
Fred Drakeaaed9711998-12-10 20:25:30 +0000527
528
Fred Drakef8ebb551999-01-14 19:45:38 +0000529def remap_element_names(root, name_map):
530 queue = []
531 for child in root.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000532 if child.nodeType == ELEMENT:
Fred Drakef8ebb551999-01-14 19:45:38 +0000533 queue.append(child)
534 while queue:
535 node = queue.pop()
536 tagName = node.tagName
537 if name_map.has_key(tagName):
538 name, attrs = name_map[tagName]
539 node._node.name = name
540 for attr, value in attrs.items():
541 node.setAttribute(attr, value)
542 for child in node.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000543 if child.nodeType == ELEMENT:
Fred Drakef8ebb551999-01-14 19:45:38 +0000544 queue.append(child)
545
546
Fred Drakee779d4f1999-05-10 19:36:52 +0000547def fixup_table_structures(doc, fragment):
Fred Drakef8ebb551999-01-14 19:45:38 +0000548 # must be done after remap_element_names(), or the tables won't be found
Fred Drakee779d4f1999-05-10 19:36:52 +0000549 for table in find_all_elements(fragment, "table"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000550 fixup_table(doc, table)
551
Fred Drakef8ebb551999-01-14 19:45:38 +0000552
553def fixup_table(doc, table):
554 # create the table head
555 thead = doc.createElement("thead")
556 row = doc.createElement("row")
557 move_elements_by_name(doc, table, row, "entry")
558 thead.appendChild(doc.createTextNode("\n "))
559 thead.appendChild(row)
560 thead.appendChild(doc.createTextNode("\n "))
561 # create the table body
562 tbody = doc.createElement("tbody")
563 prev_row = None
564 last_was_hline = 0
565 children = table.childNodes
566 for child in children:
Fred Drakee779d4f1999-05-10 19:36:52 +0000567 if child.nodeType == ELEMENT:
Fred Drakef8ebb551999-01-14 19:45:38 +0000568 tagName = child.tagName
569 if tagName == "hline" and prev_row is not None:
570 prev_row.setAttribute("rowsep", "1")
571 elif tagName == "row":
572 prev_row = child
573 # save the rows:
574 tbody.appendChild(doc.createTextNode("\n "))
575 move_elements_by_name(doc, table, tbody, "row", sep="\n ")
576 # and toss the rest:
577 while children:
578 child = children[0]
579 nodeType = child.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000580 if nodeType == TEXT:
Fred Drakef8ebb551999-01-14 19:45:38 +0000581 if string.strip(child.data):
582 raise ConversionError("unexpected free data in table")
583 table.removeChild(child)
584 continue
Fred Drakee779d4f1999-05-10 19:36:52 +0000585 if nodeType == ELEMENT:
Fred Drakef8ebb551999-01-14 19:45:38 +0000586 if child.tagName != "hline":
587 raise ConversionError(
588 "unexpected <%s> in table" % child.tagName)
589 table.removeChild(child)
590 continue
591 raise ConversionError(
592 "unexpected %s node in table" % child.__class__.__name__)
593 # nothing left in the <table>; add the <thead> and <tbody>
594 tgroup = doc.createElement("tgroup")
595 tgroup.appendChild(doc.createTextNode("\n "))
596 tgroup.appendChild(thead)
597 tgroup.appendChild(doc.createTextNode("\n "))
598 tgroup.appendChild(tbody)
599 tgroup.appendChild(doc.createTextNode("\n "))
600 table.appendChild(tgroup)
601 # now make the <entry>s look nice:
602 for row in table.getElementsByTagName("row"):
603 fixup_row(doc, row)
604
605
606def fixup_row(doc, row):
607 entries = []
608 map(entries.append, row.childNodes[1:])
609 for entry in entries:
610 row.insertBefore(doc.createTextNode("\n "), entry)
611# row.appendChild(doc.createTextNode("\n "))
612
613
614def move_elements_by_name(doc, source, dest, name, sep=None):
615 nodes = []
616 for child in source.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000617 if child.nodeType == ELEMENT and child.tagName == name:
Fred Drakef8ebb551999-01-14 19:45:38 +0000618 nodes.append(child)
619 for node in nodes:
620 source.removeChild(node)
621 dest.appendChild(node)
622 if sep:
623 dest.appendChild(doc.createTextNode(sep))
624
625
Fred Drake7dab6af1999-01-28 23:59:58 +0000626RECURSE_INTO_PARA_CONTAINERS = (
Fred Drakecb657811999-01-29 20:55:07 +0000627 "chapter", "abstract", "enumerate",
Fred Drake7dab6af1999-01-28 23:59:58 +0000628 "section", "subsection", "subsubsection",
Fred Drake865e9ff1999-07-29 22:23:19 +0000629 "paragraph", "subparagraph", "back-matter",
Fred Drakecb657811999-01-29 20:55:07 +0000630 "howto", "manual",
Fred Drake4259f0d1999-01-19 23:09:31 +0000631 )
Fred Drakefcc59101999-01-06 22:50:52 +0000632
633PARA_LEVEL_ELEMENTS = (
Fred Drakecb657811999-01-29 20:55:07 +0000634 "moduleinfo", "title", "verbatim", "enumerate", "item",
Fred Drake865e9ff1999-07-29 22:23:19 +0000635 "interpreter-session", "back-matter", "interactive-session",
Fred Drakecb657811999-01-29 20:55:07 +0000636 "opcodedesc", "classdesc", "datadesc",
Fred Drake865e9ff1999-07-29 22:23:19 +0000637 "funcdesc", "methoddesc", "excdesc", "memberdesc", "membderdescni",
Fred Drake7dab6af1999-01-28 23:59:58 +0000638 "funcdescni", "methoddescni", "excdescni",
Fred Drakefcc59101999-01-06 22:50:52 +0000639 "tableii", "tableiii", "tableiv", "localmoduletable",
Fred Drake7dab6af1999-01-28 23:59:58 +0000640 "sectionauthor", "seealso",
Fred Drakefcc59101999-01-06 22:50:52 +0000641 # include <para>, so we can just do it again to get subsequent paras:
Fred Drake865e9ff1999-07-29 22:23:19 +0000642 PARA_ELEMENT,
Fred Drakefcc59101999-01-06 22:50:52 +0000643 )
644
645PARA_LEVEL_PRECEEDERS = (
Fred Drakecb657811999-01-29 20:55:07 +0000646 "index", "indexii", "indexiii", "indexiv", "setindexsubitem",
647 "stindex", "obindex", "COMMENT", "label", "input", "title",
Fred Drake865e9ff1999-07-29 22:23:19 +0000648 "versionadded", "versionchanged", "declaremodule", "modulesynopsis",
Fred Drake080c1b51999-08-02 14:46:15 +0000649 "moduleauthor", "indexterm",
Fred Drakefcc59101999-01-06 22:50:52 +0000650 )
651
Fred Drake7dab6af1999-01-28 23:59:58 +0000652
Fred Drakee779d4f1999-05-10 19:36:52 +0000653def fixup_paras(doc, fragment):
654 for child in fragment.childNodes:
655 if child.nodeType == ELEMENT \
Fred Drake7dab6af1999-01-28 23:59:58 +0000656 and child.tagName in RECURSE_INTO_PARA_CONTAINERS:
657 #
Fred Drakefcc59101999-01-06 22:50:52 +0000658 fixup_paras_helper(doc, child)
Fred Drakee779d4f1999-05-10 19:36:52 +0000659 descriptions = find_all_elements(fragment, "description")
Fred Drakecb657811999-01-29 20:55:07 +0000660 for description in descriptions:
661 fixup_paras_helper(doc, description)
Fred Drakefcc59101999-01-06 22:50:52 +0000662
663
Fred Drake7dab6af1999-01-28 23:59:58 +0000664def fixup_paras_helper(doc, container, depth=0):
Fred Drakefcc59101999-01-06 22:50:52 +0000665 # document is already normalized
666 children = container.childNodes
667 start = 0
Fred Drake7dab6af1999-01-28 23:59:58 +0000668 while len(children) > start:
669 start = skip_leading_nodes(children, start)
670 if start >= len(children):
671 break
672 #
673 # Either paragraph material or something to recurse into:
674 #
Fred Drakee779d4f1999-05-10 19:36:52 +0000675 if (children[start].nodeType == ELEMENT) \
Fred Drake7dab6af1999-01-28 23:59:58 +0000676 and (children[start].tagName in RECURSE_INTO_PARA_CONTAINERS):
677 fixup_paras_helper(doc, children[start])
678 start = skip_leading_nodes(children, start + 1)
679 continue
680 #
681 # paragraph material:
682 #
683 build_para(doc, container, start, len(children))
684 if DEBUG_PARA_FIXER and depth == 10:
685 sys.exit(1)
686 start = start + 1
Fred Drakefcc59101999-01-06 22:50:52 +0000687
688
689def build_para(doc, parent, start, i):
690 children = parent.childNodes
Fred Drakefcc59101999-01-06 22:50:52 +0000691 after = start + 1
692 have_last = 0
Fred Drakecb657811999-01-29 20:55:07 +0000693 BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + RECURSE_INTO_PARA_CONTAINERS
Fred Drake7dab6af1999-01-28 23:59:58 +0000694 # Collect all children until \n\n+ is found in a text node or a
695 # member of BREAK_ELEMENTS is found.
Fred Drakefcc59101999-01-06 22:50:52 +0000696 for j in range(start, i):
697 after = j + 1
698 child = children[j]
699 nodeType = child.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000700 if nodeType == ELEMENT:
Fred Drakefcc59101999-01-06 22:50:52 +0000701 if child.tagName in BREAK_ELEMENTS:
702 after = j
703 break
Fred Drakee779d4f1999-05-10 19:36:52 +0000704 elif nodeType == TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000705 pos = string.find(child.data, "\n\n")
706 if pos == 0:
707 after = j
708 break
709 if pos >= 1:
710 child.splitText(pos)
711 break
712 else:
713 have_last = 1
Fred Drake7dab6af1999-01-28 23:59:58 +0000714 if (start + 1) > after:
715 raise ConversionError(
716 "build_para() could not identify content to turn into a paragraph")
Fred Drakee779d4f1999-05-10 19:36:52 +0000717 if children[after - 1].nodeType == TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000718 # we may need to split off trailing white space:
719 child = children[after - 1]
720 data = child.data
721 if string.rstrip(data) != data:
722 have_last = 0
723 child.splitText(len(string.rstrip(data)))
Fred Drake865e9ff1999-07-29 22:23:19 +0000724 para = doc.createElement(PARA_ELEMENT)
Fred Drakefcc59101999-01-06 22:50:52 +0000725 prev = None
726 indexes = range(start, after)
727 indexes.reverse()
728 for j in indexes:
Fred Drake7dab6af1999-01-28 23:59:58 +0000729 node = parent.childNodes[j]
Fred Drakefcc59101999-01-06 22:50:52 +0000730 parent.removeChild(node)
731 para.insertBefore(node, prev)
732 prev = node
733 if have_last:
734 parent.appendChild(para)
Fred Drake080c1b51999-08-02 14:46:15 +0000735 parent.appendChild(doc.createTextNode("\n\n"))
Fred Drake7dab6af1999-01-28 23:59:58 +0000736 return len(parent.childNodes)
Fred Drakefcc59101999-01-06 22:50:52 +0000737 else:
Fred Drake080c1b51999-08-02 14:46:15 +0000738 nextnode = parent.childNodes[start]
739 if nextnode.nodeType == TEXT:
740 if nextnode.data and nextnode.data[0] != "\n":
741 nextnode.data = "\n" + nextnode.data
742 else:
743 newnode = doc.createTextNode("\n")
744 parent.insertBefore(newnode, nextnode)
745 nextnode = newnode
746 start = start + 1
747 parent.insertBefore(para, nextnode)
Fred Drake7dab6af1999-01-28 23:59:58 +0000748 return start + 1
Fred Drakefcc59101999-01-06 22:50:52 +0000749
750
Fred Drake7dab6af1999-01-28 23:59:58 +0000751def skip_leading_nodes(children, start):
752 """Return index into children of a node at which paragraph building should
753 begin or a recursive call to fixup_paras_helper() should be made (for
754 subsections, etc.).
755
756 When the return value >= len(children), we've built all the paras we can
757 from this list of children.
758 """
759 i = len(children)
Fred Drakefcc59101999-01-06 22:50:52 +0000760 while i > start:
761 # skip over leading comments and whitespace:
Fred Drake7dab6af1999-01-28 23:59:58 +0000762 child = children[start]
Fred Drakefcc59101999-01-06 22:50:52 +0000763 nodeType = child.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000764 if nodeType == TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000765 data = child.data
766 shortened = string.lstrip(data)
767 if shortened:
768 if data != shortened:
769 # break into two nodes: whitespace and non-whitespace
770 child.splitText(len(data) - len(shortened))
Fred Drake7dab6af1999-01-28 23:59:58 +0000771 return start + 1
772 return start
Fred Drakefcc59101999-01-06 22:50:52 +0000773 # all whitespace, just skip
Fred Drakee779d4f1999-05-10 19:36:52 +0000774 elif nodeType == ELEMENT:
Fred Drake7dab6af1999-01-28 23:59:58 +0000775 tagName = child.tagName
776 if tagName in RECURSE_INTO_PARA_CONTAINERS:
777 return start
778 if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
779 return start
780 start = start + 1
781 return start
Fred Drakefba0ba21998-12-10 05:07:09 +0000782
783
Fred Drakee779d4f1999-05-10 19:36:52 +0000784def fixup_rfc_references(doc, fragment):
785 for rfcnode in find_all_elements(fragment, "rfc"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000786 rfcnode.appendChild(doc.createTextNode(
787 "RFC " + rfcnode.getAttribute("num")))
Fred Draked24167b1999-01-14 21:18:03 +0000788
789
Fred Drakee779d4f1999-05-10 19:36:52 +0000790def fixup_signatures(doc, fragment):
791 for child in fragment.childNodes:
792 if child.nodeType == ELEMENT:
Fred Draked24167b1999-01-14 21:18:03 +0000793 args = child.getElementsByTagName("args")
794 for arg in args:
795 fixup_args(doc, arg)
Fred Drake7dab6af1999-01-28 23:59:58 +0000796 arg.normalize()
Fred Draked24167b1999-01-14 21:18:03 +0000797 args = child.getElementsByTagName("constructor-args")
798 for arg in args:
799 fixup_args(doc, arg)
800 arg.normalize()
801
802
803def fixup_args(doc, arglist):
804 for child in arglist.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000805 if child.nodeType == ELEMENT \
Fred Draked24167b1999-01-14 21:18:03 +0000806 and child.tagName == "optional":
807 # found it; fix and return
808 arglist.insertBefore(doc.createTextNode("["), child)
809 optkids = child.childNodes
810 while optkids:
811 k = optkids[0]
812 child.removeChild(k)
813 arglist.insertBefore(k, child)
814 arglist.insertBefore(doc.createTextNode("]"), child)
815 arglist.removeChild(child)
816 return fixup_args(doc, arglist)
817
818
Fred Drakee779d4f1999-05-10 19:36:52 +0000819def fixup_sectionauthors(doc, fragment):
820 for sectauth in find_all_elements(fragment, "sectionauthor"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000821 section = sectauth.parentNode
822 section.removeChild(sectauth)
823 sectauth._node.name = "author"
824 sectauth.appendChild(doc.createTextNode(
825 sectauth.getAttribute("name")))
826 sectauth.removeAttribute("name")
827 after = section.childNodes[2]
828 title = section.childNodes[1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000829 if title.nodeType == ELEMENT and title.tagName != "title":
Fred Drake7dab6af1999-01-28 23:59:58 +0000830 after = section.childNodes[0]
831 section.insertBefore(doc.createTextNode("\n "), after)
832 section.insertBefore(sectauth, after)
833
834
Fred Drake93d762f1999-02-18 16:32:21 +0000835def fixup_verbatims(doc):
836 for verbatim in find_all_elements(doc, "verbatim"):
837 child = verbatim.childNodes[0]
Fred Drakee779d4f1999-05-10 19:36:52 +0000838 if child.nodeType == TEXT \
Fred Drake93d762f1999-02-18 16:32:21 +0000839 and string.lstrip(child.data)[:3] == ">>>":
Fred Drakee779d4f1999-05-10 19:36:52 +0000840 verbatim._node.name = "interactive-session"
Fred Drake93d762f1999-02-18 16:32:21 +0000841
842
Fred Drake865e9ff1999-07-29 22:23:19 +0000843def add_node_ids(fragment, counter=0):
844 fragment._node.node_id = counter
845 for node in fragment.childNodes:
846 counter = counter + 1
847 if node.nodeType == ELEMENT:
848 counter = add_node_ids(node, counter)
849 else:
850 node._node.node_id = counter
851 return counter + 1
852
853
854REFMODINDEX_ELEMENTS = ('refmodindex', 'refbimodindex',
855 'refexmodindex', 'refstmodindex')
856
857def fixup_refmodindexes(fragment):
858 # Locate <ref*modindex>...</> co-located with <module>...</>, and
859 # remove the <ref*modindex>, replacing it with index=index on the
860 # <module> element.
861 nodes = find_all_elements_from_set(fragment, REFMODINDEX_ELEMENTS)
862 d = {}
863 for node in nodes:
864 parent = node.parentNode
865 d[parent._node.node_id] = parent
866 del nodes
867 map(fixup_refmodindexes_chunk, d.values())
868
869
870def fixup_refmodindexes_chunk(container):
871 # node is probably a <para>; let's see how often it isn't:
872 if container.tagName != PARA_ELEMENT:
Fred Drake080c1b51999-08-02 14:46:15 +0000873 bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container)
Fred Drake865e9ff1999-07-29 22:23:19 +0000874 module_entries = find_all_elements(container, "module")
875 if not module_entries:
876 return
877 index_entries = find_all_elements_from_set(container, REFMODINDEX_ELEMENTS)
878 removes = []
879 for entry in index_entries:
880 children = entry.childNodes
881 if len(children) != 0:
Fred Drake080c1b51999-08-02 14:46:15 +0000882 bwrite("--- unexpected number of children for %s node:\n"
883 % entry.tagName)
884 ewrite(entry.toxml() + "\n")
Fred Drake865e9ff1999-07-29 22:23:19 +0000885 continue
886 found = 0
887 module_name = entry.getAttribute("name")
888 for node in module_entries:
889 if len(node.childNodes) != 1:
890 continue
891 this_name = node.childNodes[0].data
892 if this_name == module_name:
893 found = 1
Fred Drake080c1b51999-08-02 14:46:15 +0000894 node.setAttribute("index", "yes")
Fred Drake865e9ff1999-07-29 22:23:19 +0000895 if found:
896 removes.append(entry)
897 for node in removes:
898 container.removeChild(node)
899
900
901def fixup_bifuncindexes(fragment):
902 nodes = find_all_elements(fragment, 'bifuncindex')
903 d = {}
Fred Drake080c1b51999-08-02 14:46:15 +0000904 # make sure that each parent is only processed once:
Fred Drake865e9ff1999-07-29 22:23:19 +0000905 for node in nodes:
906 parent = node.parentNode
907 d[parent._node.node_id] = parent
908 del nodes
909 map(fixup_bifuncindexes_chunk, d.values())
910
911
912def fixup_bifuncindexes_chunk(container):
913 removes = []
Fred Drake080c1b51999-08-02 14:46:15 +0000914 entries = find_all_child_elements(container, "bifuncindex")
915 function_entries = find_all_child_elements(container, "function")
Fred Drake865e9ff1999-07-29 22:23:19 +0000916 for entry in entries:
917 function_name = entry.getAttribute("name")
918 found = 0
919 for func_entry in function_entries:
920 t2 = func_entry.childNodes[0].data
921 if t2[-2:] != "()":
922 continue
923 t2 = t2[:-2]
924 if t2 == function_name:
Fred Drake080c1b51999-08-02 14:46:15 +0000925 func_entry.setAttribute("index", "yes")
Fred Drake865e9ff1999-07-29 22:23:19 +0000926 func_entry.setAttribute("module", "__builtin__")
927 if not found:
Fred Drake865e9ff1999-07-29 22:23:19 +0000928 found = 1
Fred Drake080c1b51999-08-02 14:46:15 +0000929 removes.append(entry)
Fred Drake865e9ff1999-07-29 22:23:19 +0000930 for entry in removes:
931 container.removeChild(entry)
932
933
Fred Drake4db5b461998-12-01 19:03:01 +0000934_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
Fred Drakefcc59101999-01-06 22:50:52 +0000935
Fred Drake4db5b461998-12-01 19:03:01 +0000936def write_esis(doc, ofp, knownempty):
937 for node in doc.childNodes:
938 nodeType = node.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000939 if nodeType == ELEMENT:
Fred Drake4db5b461998-12-01 19:03:01 +0000940 gi = node.tagName
941 if knownempty(gi):
942 if node.hasChildNodes():
Fred Drake865e9ff1999-07-29 22:23:19 +0000943 raise ValueError, \
944 "declared-empty node <%s> has children" % gi
Fred Drake4db5b461998-12-01 19:03:01 +0000945 ofp.write("e\n")
946 for k, v in node.attributes.items():
947 value = v.value
948 if _token_rx.match(value):
949 dtype = "TOKEN"
950 else:
951 dtype = "CDATA"
952 ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
953 ofp.write("(%s\n" % gi)
954 write_esis(node, ofp, knownempty)
955 ofp.write(")%s\n" % gi)
Fred Drakee779d4f1999-05-10 19:36:52 +0000956 elif nodeType == TEXT:
Fred Drake4db5b461998-12-01 19:03:01 +0000957 ofp.write("-%s\n" % esistools.encode(node.data))
958 else:
959 raise RuntimeError, "unsupported node type: %s" % nodeType
960
961
Fred Drake03204731998-11-23 17:02:03 +0000962def convert(ifp, ofp):
Fred Drake4db5b461998-12-01 19:03:01 +0000963 p = esistools.ExtendedEsisBuilder()
Fred Drake03204731998-11-23 17:02:03 +0000964 p.feed(ifp.read())
965 doc = p.document
Fred Drakee779d4f1999-05-10 19:36:52 +0000966 fragment = p.fragment
967 normalize(fragment)
968 simplify(doc, fragment)
Fred Drake865e9ff1999-07-29 22:23:19 +0000969 handle_labels(doc, fragment)
Fred Drakee779d4f1999-05-10 19:36:52 +0000970 handle_appendix(doc, fragment)
Fred Drake1ff6db41998-11-23 23:10:35 +0000971 fixup_trailing_whitespace(doc, {
972 "abstract": "\n",
973 "title": "",
974 "chapter": "\n\n",
975 "section": "\n\n",
976 "subsection": "\n\n",
977 "subsubsection": "\n\n",
978 "paragraph": "\n\n",
979 "subparagraph": "\n\n",
980 })
Fred Drake03204731998-11-23 17:02:03 +0000981 cleanup_root_text(doc)
Fred Drake080c1b51999-08-02 14:46:15 +0000982 cleanup_trailing_parens(fragment, ["function", "method", "cfunction"])
983 cleanup_synopses(doc, fragment)
Fred Drakee779d4f1999-05-10 19:36:52 +0000984 fixup_descriptors(doc, fragment)
985 fixup_verbatims(fragment)
986 normalize(fragment)
987 fixup_paras(doc, fragment)
988 fixup_sectionauthors(doc, fragment)
989 remap_element_names(fragment, {
Fred Drakef8ebb551999-01-14 19:45:38 +0000990 "tableii": ("table", {"cols": "2"}),
991 "tableiii": ("table", {"cols": "3"}),
992 "tableiv": ("table", {"cols": "4"}),
993 "lineii": ("row", {}),
994 "lineiii": ("row", {}),
995 "lineiv": ("row", {}),
Fred Draked6ced7d1999-01-19 17:11:23 +0000996 "refmodule": ("module", {"link": "link"}),
Fred Drakef8ebb551999-01-14 19:45:38 +0000997 })
Fred Drakee779d4f1999-05-10 19:36:52 +0000998 fixup_table_structures(doc, fragment)
999 fixup_rfc_references(doc, fragment)
1000 fixup_signatures(doc, fragment)
Fred Drake865e9ff1999-07-29 22:23:19 +00001001 add_node_ids(fragment)
1002 fixup_refmodindexes(fragment)
1003 fixup_bifuncindexes(fragment)
Fred Drake4db5b461998-12-01 19:03:01 +00001004 #
1005 d = {}
1006 for gi in p.get_empties():
1007 d[gi] = gi
Fred Draked24167b1999-01-14 21:18:03 +00001008 if d.has_key("rfc"):
1009 del d["rfc"]
Fred Drake4db5b461998-12-01 19:03:01 +00001010 knownempty = d.has_key
1011 #
Fred Drake03204731998-11-23 17:02:03 +00001012 try:
Fred Drakee779d4f1999-05-10 19:36:52 +00001013 write_esis(fragment, ofp, knownempty)
Fred Drake03204731998-11-23 17:02:03 +00001014 except IOError, (err, msg):
1015 # Ignore EPIPE; it just means that whoever we're writing to stopped
1016 # reading. The rest of the output would be ignored. All other errors
1017 # should still be reported,
1018 if err != errno.EPIPE:
1019 raise
1020
1021
1022def main():
1023 if len(sys.argv) == 1:
1024 ifp = sys.stdin
1025 ofp = sys.stdout
1026 elif len(sys.argv) == 2:
1027 ifp = open(sys.argv[1])
1028 ofp = sys.stdout
1029 elif len(sys.argv) == 3:
1030 ifp = open(sys.argv[1])
1031 ofp = open(sys.argv[2], "w")
1032 else:
1033 usage()
1034 sys.exit(2)
1035 convert(ifp, ofp)
1036
1037
1038if __name__ == "__main__":
1039 main()