blob: a56ac6e3c8eda61f93fe8454324e4dffc0675831 [file] [log] [blame]
Fred Drake03204731998-11-23 17:02:03 +00001#! /usr/bin/env python
2
Fred Drake7dab6af1999-01-28 23:59:58 +00003"""Perform massive transformations on a document tree created from the LaTeX
4of the Python documentation, and dump the ESIS data for the transformed tree.
Fred Drake03204731998-11-23 17:02:03 +00005"""
6__version__ = '$Revision$'
7
8
9import errno
Fred Drake4db5b461998-12-01 19:03:01 +000010import esistools
11import re
Fred Drake03204731998-11-23 17:02:03 +000012import string
13import sys
14import xml.dom.core
Fred Drakee779d4f1999-05-10 19:36:52 +000015
16from xml.dom.core import \
17 ELEMENT, \
18 TEXT
Fred Drake03204731998-11-23 17:02:03 +000019
20
Fred Drakef8ebb551999-01-14 19:45:38 +000021class ConversionError(Exception):
22 pass
23
24
Fred Drake080c1b51999-08-02 14:46:15 +000025ewrite = sys.stderr.write
26try:
27 # We can only do this trick on Unix (if tput is on $PATH)!
28 if sys.platform != "posix" or not sys.stderr.isatty():
29 raise ImportError
30 import curses
31 import commands
32except ImportError:
33 bwrite = ewrite
34else:
35 def bwrite(s, BOLDON=commands.getoutput("tput bold"),
36 BOLDOFF=commands.getoutput("tput sgr0")):
37 ewrite("%s%s%s" % (BOLDON, s, BOLDOFF))
38
39
Fred Drake865e9ff1999-07-29 22:23:19 +000040PARA_ELEMENT = "para"
41
Fred Drakefcc59101999-01-06 22:50:52 +000042DEBUG_PARA_FIXER = 0
43
Fred Drake7dab6af1999-01-28 23:59:58 +000044if DEBUG_PARA_FIXER:
45 def para_msg(s):
Fred Drake080c1b51999-08-02 14:46:15 +000046 ewrite("*** %s\n" % s)
Fred Drake7dab6af1999-01-28 23:59:58 +000047else:
48 def para_msg(s):
49 pass
50
Fred Drakefcc59101999-01-06 22:50:52 +000051
Fred Drake03204731998-11-23 17:02:03 +000052# Workaround to deal with invalid documents (multiple root elements). This
53# does not indicate a bug in the DOM implementation.
54#
Fred Drakee779d4f1999-05-10 19:36:52 +000055def get_documentElement(doc):
Fred Drake03204731998-11-23 17:02:03 +000056 docelem = None
Fred Drakee779d4f1999-05-10 19:36:52 +000057 for n in doc.childNodes:
58 if n.nodeType == ELEMENT:
59 docelem = n
Fred Drake03204731998-11-23 17:02:03 +000060 return docelem
61
62xml.dom.core.Document.get_documentElement = get_documentElement
63
64
65# Replace get_childNodes for the Document class; without this, children
66# accessed from the Document object via .childNodes (no matter how many
67# levels of access are used) will be given an ownerDocument of None.
68#
Fred Drakee779d4f1999-05-10 19:36:52 +000069def get_childNodes(doc):
70 return xml.dom.core.NodeList(doc._node.children, doc._node)
Fred Drake03204731998-11-23 17:02:03 +000071
72xml.dom.core.Document.get_childNodes = get_childNodes
73
74
75def get_first_element(doc, gi):
76 for n in doc.childNodes:
Fred Drakeabf8a1d1999-08-20 14:28:29 +000077 if n.nodeName == gi:
Fred Drake03204731998-11-23 17:02:03 +000078 return n
79
80def extract_first_element(doc, gi):
81 node = get_first_element(doc, gi)
82 if node is not None:
83 doc.removeChild(node)
84 return node
85
86
Fred Drake7dab6af1999-01-28 23:59:58 +000087def find_all_elements(doc, gi):
88 nodes = []
Fred Drakeabf8a1d1999-08-20 14:28:29 +000089 if doc.nodeName == gi:
Fred Drake7dab6af1999-01-28 23:59:58 +000090 nodes.append(doc)
91 for child in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +000092 if child.nodeType == ELEMENT:
Fred Drake7dab6af1999-01-28 23:59:58 +000093 if child.tagName == gi:
94 nodes.append(child)
95 for node in child.getElementsByTagName(gi):
96 nodes.append(node)
Fred Drake865e9ff1999-07-29 22:23:19 +000097 return nodes
98
Fred Drake080c1b51999-08-02 14:46:15 +000099def find_all_child_elements(doc, gi):
100 nodes = []
101 for child in doc.childNodes:
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000102 if child.nodeName == gi:
103 nodes.append(child)
Fred Drake080c1b51999-08-02 14:46:15 +0000104 return nodes
105
106def find_all_elements_from_set(doc, gi_set):
107 return __find_all_elements_from_set(doc, gi_set, [])
108
109def __find_all_elements_from_set(doc, gi_set, nodes):
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000110 if doc.nodeName in gi_set:
Fred Drake865e9ff1999-07-29 22:23:19 +0000111 nodes.append(doc)
112 for child in doc.childNodes:
113 if child.nodeType == ELEMENT:
Fred Drake080c1b51999-08-02 14:46:15 +0000114 __find_all_elements_from_set(child, gi_set, nodes)
Fred Drake865e9ff1999-07-29 22:23:19 +0000115 return nodes
Fred Drake7dab6af1999-01-28 23:59:58 +0000116
117
Fred Drakee779d4f1999-05-10 19:36:52 +0000118def simplify(doc, fragment):
Fred Drake03204731998-11-23 17:02:03 +0000119 # Try to rationalize the document a bit, since these things are simply
120 # not valid SGML/XML documents as they stand, and need a little work.
121 documentclass = "document"
122 inputs = []
Fred Drakee779d4f1999-05-10 19:36:52 +0000123 node = extract_first_element(fragment, "documentclass")
Fred Drake03204731998-11-23 17:02:03 +0000124 if node is not None:
125 documentclass = node.getAttribute("classname")
Fred Drakee779d4f1999-05-10 19:36:52 +0000126 node = extract_first_element(fragment, "title")
Fred Drake03204731998-11-23 17:02:03 +0000127 if node is not None:
128 inputs.append(node)
129 # update the name of the root element
Fred Drakee779d4f1999-05-10 19:36:52 +0000130 node = get_first_element(fragment, "document")
Fred Drake03204731998-11-23 17:02:03 +0000131 if node is not None:
132 node._node.name = documentclass
133 while 1:
Fred Drakee779d4f1999-05-10 19:36:52 +0000134 node = extract_first_element(fragment, "input")
Fred Drake03204731998-11-23 17:02:03 +0000135 if node is None:
136 break
137 inputs.append(node)
138 if inputs:
Fred Drakee779d4f1999-05-10 19:36:52 +0000139 docelem = get_documentElement(fragment)
Fred Drake03204731998-11-23 17:02:03 +0000140 inputs.reverse()
141 for node in inputs:
142 text = doc.createTextNode("\n")
143 docelem.insertBefore(text, docelem.firstChild)
144 docelem.insertBefore(node, text)
145 docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
Fred Drake865e9ff1999-07-29 22:23:19 +0000146 while fragment.firstChild and fragment.firstChild.nodeType == TEXT:
Fred Drakee779d4f1999-05-10 19:36:52 +0000147 fragment.removeChild(fragment.firstChild)
Fred Drake03204731998-11-23 17:02:03 +0000148
149
150def cleanup_root_text(doc):
151 discards = []
152 skip = 0
153 for n in doc.childNodes:
154 prevskip = skip
155 skip = 0
Fred Drakee779d4f1999-05-10 19:36:52 +0000156 if n.nodeType == TEXT and not prevskip:
Fred Drake03204731998-11-23 17:02:03 +0000157 discards.append(n)
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000158 elif n.nodeName == "COMMENT":
Fred Drake03204731998-11-23 17:02:03 +0000159 skip = 1
160 for node in discards:
161 doc.removeChild(node)
162
163
Fred Drakecb657811999-01-29 20:55:07 +0000164DESCRIPTOR_ELEMENTS = (
165 "cfuncdesc", "cvardesc", "ctypedesc",
166 "classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni",
167 "excdesc", "funcdesc", "funcdescni", "opcodedesc",
168 "datadesc", "datadescni",
169 )
170
Fred Drakee779d4f1999-05-10 19:36:52 +0000171def fixup_descriptors(doc, fragment):
172 sections = find_all_elements(fragment, "section")
Fred Drake3a7ff991999-01-29 21:31:12 +0000173 for section in sections:
174 find_and_fix_descriptors(doc, section)
175
176
177def find_and_fix_descriptors(doc, container):
178 children = container.childNodes
179 for child in children:
Fred Drakee779d4f1999-05-10 19:36:52 +0000180 if child.nodeType == ELEMENT:
Fred Drake3a7ff991999-01-29 21:31:12 +0000181 tagName = child.tagName
182 if tagName in DESCRIPTOR_ELEMENTS:
183 rewrite_descriptor(doc, child)
184 elif tagName == "subsection":
185 find_and_fix_descriptors(doc, child)
186
Fred Drakecb657811999-01-29 20:55:07 +0000187
188def rewrite_descriptor(doc, descriptor):
189 #
190 # Do these things:
Fred Drake080c1b51999-08-02 14:46:15 +0000191 # 1. Add an "index='no'" attribute to the element if the tagName
Fred Drakecb657811999-01-29 20:55:07 +0000192 # ends in 'ni', removing the 'ni' from the name.
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000193 # 2. Create a <signature> from the name attribute
194 # 2a.Create an <args> if it appears to be available.
Fred Drakecb657811999-01-29 20:55:07 +0000195 # 3. Create additional <signature>s from <*line{,ni}> elements,
196 # if found.
Fred Drake1dd152d1999-01-29 22:12:29 +0000197 # 4. If a <versionadded> is found, move it to an attribute on the
198 # descriptor.
199 # 5. Move remaining child nodes to a <description> element.
200 # 6. Put it back together.
Fred Drakecb657811999-01-29 20:55:07 +0000201 #
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000202 # 1.
Fred Drakecb657811999-01-29 20:55:07 +0000203 descname = descriptor.tagName
204 index = 1
205 if descname[-2:] == "ni":
206 descname = descname[:-2]
Fred Drake080c1b51999-08-02 14:46:15 +0000207 descriptor.setAttribute("index", "no")
Fred Drakecb657811999-01-29 20:55:07 +0000208 descriptor._node.name = descname
209 index = 0
210 desctype = descname[:-4] # remove 'desc'
211 linename = desctype + "line"
212 if not index:
213 linename = linename + "ni"
214 # 2.
215 signature = doc.createElement("signature")
216 name = doc.createElement("name")
217 signature.appendChild(doc.createTextNode("\n "))
218 signature.appendChild(name)
219 name.appendChild(doc.createTextNode(descriptor.getAttribute("name")))
220 descriptor.removeAttribute("name")
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000221 # 2a.
Fred Drakecb657811999-01-29 20:55:07 +0000222 if descriptor.attributes.has_key("var"):
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000223 if descname != "opcodedesc":
224 raise RuntimeError, \
225 "got 'var' attribute on descriptor other than opcodedesc"
Fred Drakecb657811999-01-29 20:55:07 +0000226 variable = descriptor.getAttribute("var")
227 if variable:
228 args = doc.createElement("args")
229 args.appendChild(doc.createTextNode(variable))
Fred Drake7dab6af1999-01-28 23:59:58 +0000230 signature.appendChild(doc.createTextNode("\n "))
Fred Drakecb657811999-01-29 20:55:07 +0000231 signature.appendChild(args)
232 descriptor.removeAttribute("var")
233 newchildren = [signature]
234 children = descriptor.childNodes
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000235 pos = skip_leading_nodes(children)
Fred Drakecb657811999-01-29 20:55:07 +0000236 if pos < len(children):
237 child = children[pos]
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000238 if child.nodeName == "args":
239## bwrite("found <args> in descriptor, moving to <signature>\n")
240## ewrite(descriptor.toxml() + "\n---\n")
Fred Drakecb657811999-01-29 20:55:07 +0000241 # create an <args> in <signature>:
242 args = doc.createElement("args")
243 argchildren = []
244 map(argchildren.append, child.childNodes)
245 for n in argchildren:
246 child.removeChild(n)
247 args.appendChild(n)
248 signature.appendChild(doc.createTextNode("\n "))
249 signature.appendChild(args)
250 signature.appendChild(doc.createTextNode("\n "))
Fred Drake1dd152d1999-01-29 22:12:29 +0000251 # 3, 4.
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000252 pos = skip_leading_nodes(children, pos)
Fred Drakecb657811999-01-29 20:55:07 +0000253 while pos < len(children) \
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000254 and children[pos].nodeName in (linename, "versionadded"):
Fred Drake1dd152d1999-01-29 22:12:29 +0000255 if children[pos].tagName == linename:
256 # this is really a supplemental signature, create <signature>
257 sig = methodline_to_signature(doc, children[pos])
258 newchildren.append(sig)
259 else:
260 # <versionadded added=...>
261 descriptor.setAttribute(
262 "added", children[pos].getAttribute("version"))
Fred Drakecb657811999-01-29 20:55:07 +0000263 pos = skip_leading_nodes(children, pos + 1)
Fred Drake1dd152d1999-01-29 22:12:29 +0000264 # 5.
Fred Drakecb657811999-01-29 20:55:07 +0000265 description = doc.createElement("description")
266 description.appendChild(doc.createTextNode("\n"))
267 newchildren.append(description)
268 move_children(descriptor, description, pos)
269 last = description.childNodes[-1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000270 if last.nodeType == TEXT:
Fred Drakecb657811999-01-29 20:55:07 +0000271 last.data = string.rstrip(last.data) + "\n "
Fred Drake1dd152d1999-01-29 22:12:29 +0000272 # 6.
Fred Drakecb657811999-01-29 20:55:07 +0000273 # should have nothing but whitespace and signature lines in <descriptor>;
274 # discard them
275 while descriptor.childNodes:
276 descriptor.removeChild(descriptor.childNodes[0])
277 for node in newchildren:
278 descriptor.appendChild(doc.createTextNode("\n "))
279 descriptor.appendChild(node)
280 descriptor.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000281
Fred Drake7dab6af1999-01-28 23:59:58 +0000282
283def methodline_to_signature(doc, methodline):
284 signature = doc.createElement("signature")
285 signature.appendChild(doc.createTextNode("\n "))
286 name = doc.createElement("name")
287 name.appendChild(doc.createTextNode(methodline.getAttribute("name")))
Fred Drakecb657811999-01-29 20:55:07 +0000288 methodline.removeAttribute("name")
Fred Drake7dab6af1999-01-28 23:59:58 +0000289 signature.appendChild(name)
Fred Drake7dab6af1999-01-28 23:59:58 +0000290 if len(methodline.childNodes):
Fred Drakecb657811999-01-29 20:55:07 +0000291 args = doc.createElement("args")
Fred Drake7dab6af1999-01-28 23:59:58 +0000292 signature.appendChild(doc.createTextNode("\n "))
Fred Drakecb657811999-01-29 20:55:07 +0000293 signature.appendChild(args)
294 move_children(methodline, args)
Fred Drake7dab6af1999-01-28 23:59:58 +0000295 signature.appendChild(doc.createTextNode("\n "))
296 return signature
Fred Drake03204731998-11-23 17:02:03 +0000297
298
Fred Drakecb657811999-01-29 20:55:07 +0000299def move_children(origin, dest, start=0):
300 children = origin.childNodes
301 while start < len(children):
302 node = children[start]
303 origin.removeChild(node)
304 dest.appendChild(node)
305
306
Fred Drakee779d4f1999-05-10 19:36:52 +0000307def handle_appendix(doc, fragment):
Fred Drake4db5b461998-12-01 19:03:01 +0000308 # must be called after simplfy() if document is multi-rooted to begin with
Fred Drakee779d4f1999-05-10 19:36:52 +0000309 docelem = get_documentElement(fragment)
Fred Drake4db5b461998-12-01 19:03:01 +0000310 toplevel = docelem.tagName == "manual" and "chapter" or "section"
311 appendices = 0
312 nodes = []
313 for node in docelem.childNodes:
314 if appendices:
315 nodes.append(node)
Fred Drakee779d4f1999-05-10 19:36:52 +0000316 elif node.nodeType == ELEMENT:
Fred Drake4db5b461998-12-01 19:03:01 +0000317 appnodes = node.getElementsByTagName("appendix")
318 if appnodes:
319 appendices = 1
320 parent = appnodes[0].parentNode
321 parent.removeChild(appnodes[0])
322 parent.normalize()
323 if nodes:
324 map(docelem.removeChild, nodes)
325 docelem.appendChild(doc.createTextNode("\n\n\n"))
326 back = doc.createElement("back-matter")
327 docelem.appendChild(back)
328 back.appendChild(doc.createTextNode("\n"))
Fred Drakee779d4f1999-05-10 19:36:52 +0000329 while nodes and nodes[0].nodeType == TEXT \
Fred Drake4db5b461998-12-01 19:03:01 +0000330 and not string.strip(nodes[0].data):
331 del nodes[0]
332 map(back.appendChild, nodes)
333 docelem.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000334
335
Fred Drake865e9ff1999-07-29 22:23:19 +0000336def handle_labels(doc, fragment):
337 for label in find_all_elements(fragment, "label"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000338 id = label.getAttribute("id")
339 if not id:
340 continue
341 parent = label.parentNode
342 if parent.tagName == "title":
343 parent.parentNode.setAttribute("id", id)
344 else:
345 parent.setAttribute("id", id)
346 # now, remove <label id="..."/> from parent:
347 parent.removeChild(label)
Fred Drake865e9ff1999-07-29 22:23:19 +0000348 if parent.tagName == "title":
349 parent.normalize()
350 children = parent.childNodes
351 if children[-1].nodeType == TEXT:
352 children[-1].data = string.rstrip(children[-1].data)
Fred Drake03204731998-11-23 17:02:03 +0000353
354
Fred Drake1ff6db41998-11-23 23:10:35 +0000355def fixup_trailing_whitespace(doc, wsmap):
356 queue = [doc]
357 while queue:
358 node = queue[0]
359 del queue[0]
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000360 if wsmap.has_key(node.nodeName):
Fred Drake1ff6db41998-11-23 23:10:35 +0000361 ws = wsmap[node.tagName]
362 children = node.childNodes
363 children.reverse()
Fred Drakee779d4f1999-05-10 19:36:52 +0000364 if children[0].nodeType == TEXT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000365 data = string.rstrip(children[0].data) + ws
366 children[0].data = data
367 children.reverse()
368 # hack to get the title in place:
369 if node.tagName == "title" \
Fred Drakee779d4f1999-05-10 19:36:52 +0000370 and node.parentNode.firstChild.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000371 node.parentNode.insertBefore(doc.createText("\n "),
372 node.parentNode.firstChild)
373 for child in node.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000374 if child.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000375 queue.append(child)
376
377
378def normalize(doc):
379 for node in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000380 if node.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000381 node.normalize()
382
383
384def cleanup_trailing_parens(doc, element_names):
385 d = {}
386 for gi in element_names:
387 d[gi] = gi
388 rewrite_element = d.has_key
389 queue = []
390 for node in doc.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000391 if node.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000392 queue.append(node)
393 while queue:
394 node = queue[0]
395 del queue[0]
396 if rewrite_element(node.tagName):
397 children = node.childNodes
398 if len(children) == 1 \
Fred Drakee779d4f1999-05-10 19:36:52 +0000399 and children[0].nodeType == TEXT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000400 data = children[0].data
401 if data[-2:] == "()":
402 children[0].data = data[:-2]
403 else:
404 for child in node.childNodes:
Fred Drakee779d4f1999-05-10 19:36:52 +0000405 if child.nodeType == ELEMENT:
Fred Drake1ff6db41998-11-23 23:10:35 +0000406 queue.append(child)
407
408
Fred Drakeaaed9711998-12-10 20:25:30 +0000409def contents_match(left, right):
410 left_children = left.childNodes
411 right_children = right.childNodes
412 if len(left_children) != len(right_children):
413 return 0
414 for l, r in map(None, left_children, right_children):
415 nodeType = l.nodeType
416 if nodeType != r.nodeType:
417 return 0
Fred Drakee779d4f1999-05-10 19:36:52 +0000418 if nodeType == ELEMENT:
Fred Drakeaaed9711998-12-10 20:25:30 +0000419 if l.tagName != r.tagName:
420 return 0
421 # should check attributes, but that's not a problem here
422 if not contents_match(l, r):
423 return 0
Fred Drakee779d4f1999-05-10 19:36:52 +0000424 elif nodeType == TEXT:
Fred Drakeaaed9711998-12-10 20:25:30 +0000425 if l.data != r.data:
426 return 0
427 else:
428 # not quite right, but good enough
429 return 0
430 return 1
431
432
433def create_module_info(doc, section):
434 # Heavy.
435 node = extract_first_element(section, "modulesynopsis")
436 if node is None:
437 return
438 node._node.name = "synopsis"
439 lastchild = node.childNodes[-1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000440 if lastchild.nodeType == TEXT \
Fred Drakeaaed9711998-12-10 20:25:30 +0000441 and lastchild.data[-1:] == ".":
442 lastchild.data = lastchild.data[:-1]
Fred Drake4259f0d1999-01-19 23:09:31 +0000443 modauthor = extract_first_element(section, "moduleauthor")
444 if modauthor:
445 modauthor._node.name = "author"
446 modauthor.appendChild(doc.createTextNode(
447 modauthor.getAttribute("name")))
448 modauthor.removeAttribute("name")
Fred Drake87a42cd1999-03-11 17:35:12 +0000449 platform = extract_first_element(section, "platform")
Fred Drakeaaed9711998-12-10 20:25:30 +0000450 if section.tagName == "section":
451 modinfo_pos = 2
452 modinfo = doc.createElement("moduleinfo")
453 moddecl = extract_first_element(section, "declaremodule")
454 name = None
455 if moddecl:
456 modinfo.appendChild(doc.createTextNode("\n "))
457 name = moddecl.attributes["name"].value
458 namenode = doc.createElement("name")
459 namenode.appendChild(doc.createTextNode(name))
460 modinfo.appendChild(namenode)
461 type = moddecl.attributes.get("type")
462 if type:
463 type = type.value
464 modinfo.appendChild(doc.createTextNode("\n "))
465 typenode = doc.createElement("type")
466 typenode.appendChild(doc.createTextNode(type))
467 modinfo.appendChild(typenode)
Fred Drake1dd152d1999-01-29 22:12:29 +0000468 versionadded = extract_first_element(section, "versionadded")
469 if versionadded:
470 modinfo.setAttribute("added", versionadded.getAttribute("version"))
Fred Drakeaaed9711998-12-10 20:25:30 +0000471 title = get_first_element(section, "title")
472 if title:
473 children = title.childNodes
474 if len(children) >= 2 \
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000475 and children[0].nodeName == "module" \
Fred Drakeaaed9711998-12-10 20:25:30 +0000476 and children[0].childNodes[0].data == name:
477 # this is it; morph the <title> into <short-synopsis>
478 first_data = children[1]
479 if first_data.data[:4] == " ---":
480 first_data.data = string.lstrip(first_data.data[4:])
481 title._node.name = "short-synopsis"
Fred Drakee779d4f1999-05-10 19:36:52 +0000482 if children[-1].nodeType == TEXT \
Fred Drake7dab6af1999-01-28 23:59:58 +0000483 and children[-1].data[-1:] == ".":
Fred Drakeaaed9711998-12-10 20:25:30 +0000484 children[-1].data = children[-1].data[:-1]
485 section.removeChild(title)
486 section.removeChild(section.childNodes[0])
487 title.removeChild(children[0])
488 modinfo_pos = 0
489 else:
Fred Drake080c1b51999-08-02 14:46:15 +0000490 ewrite("module name in title doesn't match"
491 " <declaremodule/>; no <short-synopsis/>\n")
Fred Drakeaaed9711998-12-10 20:25:30 +0000492 else:
Fred Drake080c1b51999-08-02 14:46:15 +0000493 ewrite("Unexpected condition: <section/> without <title/>\n")
Fred Drakeaaed9711998-12-10 20:25:30 +0000494 modinfo.appendChild(doc.createTextNode("\n "))
495 modinfo.appendChild(node)
496 if title and not contents_match(title, node):
497 # The short synopsis is actually different,
498 # and needs to be stored:
499 modinfo.appendChild(doc.createTextNode("\n "))
500 modinfo.appendChild(title)
Fred Drake4259f0d1999-01-19 23:09:31 +0000501 if modauthor:
502 modinfo.appendChild(doc.createTextNode("\n "))
503 modinfo.appendChild(modauthor)
Fred Drake87a42cd1999-03-11 17:35:12 +0000504 if platform:
505 modinfo.appendChild(doc.createTextNode("\n "))
506 modinfo.appendChild(platform)
Fred Drakeaaed9711998-12-10 20:25:30 +0000507 modinfo.appendChild(doc.createTextNode("\n "))
508 section.insertBefore(modinfo, section.childNodes[modinfo_pos])
509 section.insertBefore(doc.createTextNode("\n "), modinfo)
Fred Drake87a42cd1999-03-11 17:35:12 +0000510 #
511 # The rest of this removes extra newlines from where we cut out
512 # a lot of elements. A lot of code for minimal value, but keeps
Fred Drake080c1b51999-08-02 14:46:15 +0000513 # keeps the generated *ML from being too funny looking.
Fred Drake87a42cd1999-03-11 17:35:12 +0000514 #
515 section.normalize()
516 children = section.childNodes
517 for i in range(len(children)):
518 node = children[i]
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000519 if node.nodeName == "moduleinfo":
Fred Drake87a42cd1999-03-11 17:35:12 +0000520 nextnode = children[i+1]
Fred Drakee779d4f1999-05-10 19:36:52 +0000521 if nextnode.nodeType == TEXT:
Fred Drake87a42cd1999-03-11 17:35:12 +0000522 data = nextnode.data
523 if len(string.lstrip(data)) < (len(data) - 4):
524 nextnode.data = "\n\n\n" + string.lstrip(data)
Fred Drakeaaed9711998-12-10 20:25:30 +0000525
526
Fred Drake080c1b51999-08-02 14:46:15 +0000527def cleanup_synopses(doc, fragment):
528 for node in find_all_elements(fragment, "section"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000529 create_module_info(doc, node)
Fred Drakeaaed9711998-12-10 20:25:30 +0000530
531
Fred Drakee779d4f1999-05-10 19:36:52 +0000532def fixup_table_structures(doc, fragment):
Fred Drakee779d4f1999-05-10 19:36:52 +0000533 for table in find_all_elements(fragment, "table"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000534 fixup_table(doc, table)
535
Fred Drakef8ebb551999-01-14 19:45:38 +0000536
537def fixup_table(doc, table):
538 # create the table head
539 thead = doc.createElement("thead")
540 row = doc.createElement("row")
541 move_elements_by_name(doc, table, row, "entry")
542 thead.appendChild(doc.createTextNode("\n "))
543 thead.appendChild(row)
544 thead.appendChild(doc.createTextNode("\n "))
545 # create the table body
546 tbody = doc.createElement("tbody")
547 prev_row = None
548 last_was_hline = 0
549 children = table.childNodes
550 for child in children:
Fred Drakee779d4f1999-05-10 19:36:52 +0000551 if child.nodeType == ELEMENT:
Fred Drakef8ebb551999-01-14 19:45:38 +0000552 tagName = child.tagName
553 if tagName == "hline" and prev_row is not None:
554 prev_row.setAttribute("rowsep", "1")
555 elif tagName == "row":
556 prev_row = child
557 # save the rows:
558 tbody.appendChild(doc.createTextNode("\n "))
559 move_elements_by_name(doc, table, tbody, "row", sep="\n ")
560 # and toss the rest:
561 while children:
562 child = children[0]
563 nodeType = child.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000564 if nodeType == TEXT:
Fred Drakef8ebb551999-01-14 19:45:38 +0000565 if string.strip(child.data):
566 raise ConversionError("unexpected free data in table")
567 table.removeChild(child)
568 continue
Fred Drakee779d4f1999-05-10 19:36:52 +0000569 if nodeType == ELEMENT:
Fred Drakef8ebb551999-01-14 19:45:38 +0000570 if child.tagName != "hline":
571 raise ConversionError(
572 "unexpected <%s> in table" % child.tagName)
573 table.removeChild(child)
574 continue
575 raise ConversionError(
576 "unexpected %s node in table" % child.__class__.__name__)
577 # nothing left in the <table>; add the <thead> and <tbody>
578 tgroup = doc.createElement("tgroup")
579 tgroup.appendChild(doc.createTextNode("\n "))
580 tgroup.appendChild(thead)
581 tgroup.appendChild(doc.createTextNode("\n "))
582 tgroup.appendChild(tbody)
583 tgroup.appendChild(doc.createTextNode("\n "))
584 table.appendChild(tgroup)
585 # now make the <entry>s look nice:
586 for row in table.getElementsByTagName("row"):
587 fixup_row(doc, row)
588
589
590def fixup_row(doc, row):
591 entries = []
592 map(entries.append, row.childNodes[1:])
593 for entry in entries:
594 row.insertBefore(doc.createTextNode("\n "), entry)
595# row.appendChild(doc.createTextNode("\n "))
596
597
598def move_elements_by_name(doc, source, dest, name, sep=None):
599 nodes = []
600 for child in source.childNodes:
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000601 if child.nodeName == name:
Fred Drakef8ebb551999-01-14 19:45:38 +0000602 nodes.append(child)
603 for node in nodes:
604 source.removeChild(node)
605 dest.appendChild(node)
606 if sep:
607 dest.appendChild(doc.createTextNode(sep))
608
609
Fred Drake7dab6af1999-01-28 23:59:58 +0000610RECURSE_INTO_PARA_CONTAINERS = (
Fred Drakecb657811999-01-29 20:55:07 +0000611 "chapter", "abstract", "enumerate",
Fred Drake7dab6af1999-01-28 23:59:58 +0000612 "section", "subsection", "subsubsection",
Fred Drake865e9ff1999-07-29 22:23:19 +0000613 "paragraph", "subparagraph", "back-matter",
Fred Drakecb657811999-01-29 20:55:07 +0000614 "howto", "manual",
Fred Drake82ebc271999-08-03 15:32:48 +0000615 "item", "itemize", "fulllineitems", "enumeration", "descriptionlist",
616 "definitionlist", "definition",
Fred Drake4259f0d1999-01-19 23:09:31 +0000617 )
Fred Drakefcc59101999-01-06 22:50:52 +0000618
619PARA_LEVEL_ELEMENTS = (
Fred Drakecb657811999-01-29 20:55:07 +0000620 "moduleinfo", "title", "verbatim", "enumerate", "item",
Fred Drake865e9ff1999-07-29 22:23:19 +0000621 "interpreter-session", "back-matter", "interactive-session",
Fred Drakecb657811999-01-29 20:55:07 +0000622 "opcodedesc", "classdesc", "datadesc",
Fred Drake865e9ff1999-07-29 22:23:19 +0000623 "funcdesc", "methoddesc", "excdesc", "memberdesc", "membderdescni",
Fred Drake7dab6af1999-01-28 23:59:58 +0000624 "funcdescni", "methoddescni", "excdescni",
Fred Drakefcc59101999-01-06 22:50:52 +0000625 "tableii", "tableiii", "tableiv", "localmoduletable",
Fred Drake82ebc271999-08-03 15:32:48 +0000626 "sectionauthor", "seealso", "itemize",
Fred Drakefcc59101999-01-06 22:50:52 +0000627 # include <para>, so we can just do it again to get subsequent paras:
Fred Drake865e9ff1999-07-29 22:23:19 +0000628 PARA_ELEMENT,
Fred Drakefcc59101999-01-06 22:50:52 +0000629 )
630
631PARA_LEVEL_PRECEEDERS = (
Fred Drake82ebc271999-08-03 15:32:48 +0000632 "setindexsubitem",
Fred Drakecb657811999-01-29 20:55:07 +0000633 "stindex", "obindex", "COMMENT", "label", "input", "title",
Fred Drake865e9ff1999-07-29 22:23:19 +0000634 "versionadded", "versionchanged", "declaremodule", "modulesynopsis",
Fred Drake82ebc271999-08-03 15:32:48 +0000635 "moduleauthor", "indexterm", "leader",
Fred Drakefcc59101999-01-06 22:50:52 +0000636 )
637
Fred Drake7dab6af1999-01-28 23:59:58 +0000638
Fred Drakee779d4f1999-05-10 19:36:52 +0000639def fixup_paras(doc, fragment):
640 for child in fragment.childNodes:
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000641 if child.nodeName in RECURSE_INTO_PARA_CONTAINERS:
Fred Drakefcc59101999-01-06 22:50:52 +0000642 fixup_paras_helper(doc, child)
Fred Drakee779d4f1999-05-10 19:36:52 +0000643 descriptions = find_all_elements(fragment, "description")
Fred Drakecb657811999-01-29 20:55:07 +0000644 for description in descriptions:
645 fixup_paras_helper(doc, description)
Fred Drakefcc59101999-01-06 22:50:52 +0000646
647
Fred Drake7dab6af1999-01-28 23:59:58 +0000648def fixup_paras_helper(doc, container, depth=0):
Fred Drakefcc59101999-01-06 22:50:52 +0000649 # document is already normalized
650 children = container.childNodes
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000651 start = skip_leading_nodes(children)
Fred Drake7dab6af1999-01-28 23:59:58 +0000652 while len(children) > start:
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000653 if children[start].nodeName in RECURSE_INTO_PARA_CONTAINERS:
654 # Something to recurse into:
Fred Drake7dab6af1999-01-28 23:59:58 +0000655 fixup_paras_helper(doc, children[start])
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000656 else:
657 # Paragraph material:
658 build_para(doc, container, start, len(children))
659 if DEBUG_PARA_FIXER and depth == 10:
660 sys.exit(1)
661 start = skip_leading_nodes(children, start + 1)
Fred Drakefcc59101999-01-06 22:50:52 +0000662
663
664def build_para(doc, parent, start, i):
665 children = parent.childNodes
Fred Drakefcc59101999-01-06 22:50:52 +0000666 after = start + 1
667 have_last = 0
Fred Drakecb657811999-01-29 20:55:07 +0000668 BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + RECURSE_INTO_PARA_CONTAINERS
Fred Drake7dab6af1999-01-28 23:59:58 +0000669 # Collect all children until \n\n+ is found in a text node or a
670 # member of BREAK_ELEMENTS is found.
Fred Drakefcc59101999-01-06 22:50:52 +0000671 for j in range(start, i):
672 after = j + 1
673 child = children[j]
674 nodeType = child.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000675 if nodeType == ELEMENT:
Fred Drakefcc59101999-01-06 22:50:52 +0000676 if child.tagName in BREAK_ELEMENTS:
677 after = j
678 break
Fred Drakee779d4f1999-05-10 19:36:52 +0000679 elif nodeType == TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000680 pos = string.find(child.data, "\n\n")
681 if pos == 0:
682 after = j
683 break
684 if pos >= 1:
685 child.splitText(pos)
686 break
687 else:
688 have_last = 1
Fred Drake7dab6af1999-01-28 23:59:58 +0000689 if (start + 1) > after:
690 raise ConversionError(
691 "build_para() could not identify content to turn into a paragraph")
Fred Drakee779d4f1999-05-10 19:36:52 +0000692 if children[after - 1].nodeType == TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000693 # we may need to split off trailing white space:
694 child = children[after - 1]
695 data = child.data
696 if string.rstrip(data) != data:
697 have_last = 0
698 child.splitText(len(string.rstrip(data)))
Fred Drake865e9ff1999-07-29 22:23:19 +0000699 para = doc.createElement(PARA_ELEMENT)
Fred Drakefcc59101999-01-06 22:50:52 +0000700 prev = None
701 indexes = range(start, after)
702 indexes.reverse()
703 for j in indexes:
Fred Drake7dab6af1999-01-28 23:59:58 +0000704 node = parent.childNodes[j]
Fred Drakefcc59101999-01-06 22:50:52 +0000705 parent.removeChild(node)
706 para.insertBefore(node, prev)
707 prev = node
708 if have_last:
709 parent.appendChild(para)
Fred Drake080c1b51999-08-02 14:46:15 +0000710 parent.appendChild(doc.createTextNode("\n\n"))
Fred Drake7dab6af1999-01-28 23:59:58 +0000711 return len(parent.childNodes)
Fred Drakefcc59101999-01-06 22:50:52 +0000712 else:
Fred Drake080c1b51999-08-02 14:46:15 +0000713 nextnode = parent.childNodes[start]
714 if nextnode.nodeType == TEXT:
715 if nextnode.data and nextnode.data[0] != "\n":
716 nextnode.data = "\n" + nextnode.data
717 else:
718 newnode = doc.createTextNode("\n")
719 parent.insertBefore(newnode, nextnode)
720 nextnode = newnode
721 start = start + 1
722 parent.insertBefore(para, nextnode)
Fred Drake7dab6af1999-01-28 23:59:58 +0000723 return start + 1
Fred Drakefcc59101999-01-06 22:50:52 +0000724
725
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000726def skip_leading_nodes(children, start=0):
Fred Drake7dab6af1999-01-28 23:59:58 +0000727 """Return index into children of a node at which paragraph building should
728 begin or a recursive call to fixup_paras_helper() should be made (for
729 subsections, etc.).
730
731 When the return value >= len(children), we've built all the paras we can
732 from this list of children.
733 """
734 i = len(children)
Fred Drakefcc59101999-01-06 22:50:52 +0000735 while i > start:
736 # skip over leading comments and whitespace:
Fred Drake7dab6af1999-01-28 23:59:58 +0000737 child = children[start]
Fred Drakefcc59101999-01-06 22:50:52 +0000738 nodeType = child.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000739 if nodeType == TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000740 data = child.data
741 shortened = string.lstrip(data)
742 if shortened:
743 if data != shortened:
744 # break into two nodes: whitespace and non-whitespace
745 child.splitText(len(data) - len(shortened))
Fred Drake7dab6af1999-01-28 23:59:58 +0000746 return start + 1
747 return start
Fred Drakefcc59101999-01-06 22:50:52 +0000748 # all whitespace, just skip
Fred Drakee779d4f1999-05-10 19:36:52 +0000749 elif nodeType == ELEMENT:
Fred Drake7dab6af1999-01-28 23:59:58 +0000750 tagName = child.tagName
751 if tagName in RECURSE_INTO_PARA_CONTAINERS:
752 return start
753 if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
754 return start
755 start = start + 1
756 return start
Fred Drakefba0ba21998-12-10 05:07:09 +0000757
758
Fred Drakee779d4f1999-05-10 19:36:52 +0000759def fixup_rfc_references(doc, fragment):
760 for rfcnode in find_all_elements(fragment, "rfc"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000761 rfcnode.appendChild(doc.createTextNode(
762 "RFC " + rfcnode.getAttribute("num")))
Fred Draked24167b1999-01-14 21:18:03 +0000763
764
Fred Drakee779d4f1999-05-10 19:36:52 +0000765def fixup_signatures(doc, fragment):
766 for child in fragment.childNodes:
767 if child.nodeType == ELEMENT:
Fred Draked24167b1999-01-14 21:18:03 +0000768 args = child.getElementsByTagName("args")
769 for arg in args:
770 fixup_args(doc, arg)
Fred Drake7dab6af1999-01-28 23:59:58 +0000771 arg.normalize()
Fred Draked24167b1999-01-14 21:18:03 +0000772 args = child.getElementsByTagName("constructor-args")
773 for arg in args:
774 fixup_args(doc, arg)
775 arg.normalize()
776
777
778def fixup_args(doc, arglist):
779 for child in arglist.childNodes:
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000780 if child.nodeName == "optional":
Fred Draked24167b1999-01-14 21:18:03 +0000781 # found it; fix and return
782 arglist.insertBefore(doc.createTextNode("["), child)
783 optkids = child.childNodes
784 while optkids:
785 k = optkids[0]
786 child.removeChild(k)
787 arglist.insertBefore(k, child)
788 arglist.insertBefore(doc.createTextNode("]"), child)
789 arglist.removeChild(child)
790 return fixup_args(doc, arglist)
791
792
Fred Drakee779d4f1999-05-10 19:36:52 +0000793def fixup_sectionauthors(doc, fragment):
794 for sectauth in find_all_elements(fragment, "sectionauthor"):
Fred Drake7dab6af1999-01-28 23:59:58 +0000795 section = sectauth.parentNode
796 section.removeChild(sectauth)
797 sectauth._node.name = "author"
798 sectauth.appendChild(doc.createTextNode(
799 sectauth.getAttribute("name")))
800 sectauth.removeAttribute("name")
801 after = section.childNodes[2]
802 title = section.childNodes[1]
Fred Drakeabf8a1d1999-08-20 14:28:29 +0000803 if title.nodeName != "title":
Fred Drake7dab6af1999-01-28 23:59:58 +0000804 after = section.childNodes[0]
805 section.insertBefore(doc.createTextNode("\n "), after)
806 section.insertBefore(sectauth, after)
807
808
Fred Drake93d762f1999-02-18 16:32:21 +0000809def fixup_verbatims(doc):
810 for verbatim in find_all_elements(doc, "verbatim"):
811 child = verbatim.childNodes[0]
Fred Drakee779d4f1999-05-10 19:36:52 +0000812 if child.nodeType == TEXT \
Fred Drake93d762f1999-02-18 16:32:21 +0000813 and string.lstrip(child.data)[:3] == ">>>":
Fred Drakee779d4f1999-05-10 19:36:52 +0000814 verbatim._node.name = "interactive-session"
Fred Drake93d762f1999-02-18 16:32:21 +0000815
816
Fred Drake865e9ff1999-07-29 22:23:19 +0000817def add_node_ids(fragment, counter=0):
818 fragment._node.node_id = counter
819 for node in fragment.childNodes:
820 counter = counter + 1
821 if node.nodeType == ELEMENT:
822 counter = add_node_ids(node, counter)
823 else:
824 node._node.node_id = counter
825 return counter + 1
826
827
828REFMODINDEX_ELEMENTS = ('refmodindex', 'refbimodindex',
829 'refexmodindex', 'refstmodindex')
830
831def fixup_refmodindexes(fragment):
832 # Locate <ref*modindex>...</> co-located with <module>...</>, and
833 # remove the <ref*modindex>, replacing it with index=index on the
834 # <module> element.
835 nodes = find_all_elements_from_set(fragment, REFMODINDEX_ELEMENTS)
836 d = {}
837 for node in nodes:
838 parent = node.parentNode
839 d[parent._node.node_id] = parent
840 del nodes
841 map(fixup_refmodindexes_chunk, d.values())
842
843
844def fixup_refmodindexes_chunk(container):
845 # node is probably a <para>; let's see how often it isn't:
846 if container.tagName != PARA_ELEMENT:
Fred Drake080c1b51999-08-02 14:46:15 +0000847 bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container)
Fred Drake865e9ff1999-07-29 22:23:19 +0000848 module_entries = find_all_elements(container, "module")
849 if not module_entries:
850 return
851 index_entries = find_all_elements_from_set(container, REFMODINDEX_ELEMENTS)
852 removes = []
853 for entry in index_entries:
854 children = entry.childNodes
855 if len(children) != 0:
Fred Drake080c1b51999-08-02 14:46:15 +0000856 bwrite("--- unexpected number of children for %s node:\n"
857 % entry.tagName)
858 ewrite(entry.toxml() + "\n")
Fred Drake865e9ff1999-07-29 22:23:19 +0000859 continue
860 found = 0
Fred Drake82ebc271999-08-03 15:32:48 +0000861 module_name = entry.getAttribute("module")
Fred Drake865e9ff1999-07-29 22:23:19 +0000862 for node in module_entries:
863 if len(node.childNodes) != 1:
864 continue
865 this_name = node.childNodes[0].data
866 if this_name == module_name:
867 found = 1
Fred Drake080c1b51999-08-02 14:46:15 +0000868 node.setAttribute("index", "yes")
Fred Drake865e9ff1999-07-29 22:23:19 +0000869 if found:
870 removes.append(entry)
871 for node in removes:
872 container.removeChild(node)
873
874
875def fixup_bifuncindexes(fragment):
876 nodes = find_all_elements(fragment, 'bifuncindex')
877 d = {}
Fred Drake080c1b51999-08-02 14:46:15 +0000878 # make sure that each parent is only processed once:
Fred Drake865e9ff1999-07-29 22:23:19 +0000879 for node in nodes:
880 parent = node.parentNode
881 d[parent._node.node_id] = parent
882 del nodes
883 map(fixup_bifuncindexes_chunk, d.values())
884
885
886def fixup_bifuncindexes_chunk(container):
887 removes = []
Fred Drake080c1b51999-08-02 14:46:15 +0000888 entries = find_all_child_elements(container, "bifuncindex")
889 function_entries = find_all_child_elements(container, "function")
Fred Drake865e9ff1999-07-29 22:23:19 +0000890 for entry in entries:
891 function_name = entry.getAttribute("name")
892 found = 0
893 for func_entry in function_entries:
894 t2 = func_entry.childNodes[0].data
895 if t2[-2:] != "()":
896 continue
897 t2 = t2[:-2]
898 if t2 == function_name:
Fred Drake080c1b51999-08-02 14:46:15 +0000899 func_entry.setAttribute("index", "yes")
Fred Drake865e9ff1999-07-29 22:23:19 +0000900 func_entry.setAttribute("module", "__builtin__")
901 if not found:
Fred Drake865e9ff1999-07-29 22:23:19 +0000902 found = 1
Fred Drake080c1b51999-08-02 14:46:15 +0000903 removes.append(entry)
Fred Drake865e9ff1999-07-29 22:23:19 +0000904 for entry in removes:
905 container.removeChild(entry)
906
907
Fred Drake4db5b461998-12-01 19:03:01 +0000908_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
Fred Drakefcc59101999-01-06 22:50:52 +0000909
Fred Drake4db5b461998-12-01 19:03:01 +0000910def write_esis(doc, ofp, knownempty):
911 for node in doc.childNodes:
912 nodeType = node.nodeType
Fred Drakee779d4f1999-05-10 19:36:52 +0000913 if nodeType == ELEMENT:
Fred Drake4db5b461998-12-01 19:03:01 +0000914 gi = node.tagName
915 if knownempty(gi):
916 if node.hasChildNodes():
Fred Drake865e9ff1999-07-29 22:23:19 +0000917 raise ValueError, \
918 "declared-empty node <%s> has children" % gi
Fred Drake4db5b461998-12-01 19:03:01 +0000919 ofp.write("e\n")
920 for k, v in node.attributes.items():
921 value = v.value
922 if _token_rx.match(value):
923 dtype = "TOKEN"
924 else:
925 dtype = "CDATA"
926 ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
927 ofp.write("(%s\n" % gi)
928 write_esis(node, ofp, knownempty)
929 ofp.write(")%s\n" % gi)
Fred Drakee779d4f1999-05-10 19:36:52 +0000930 elif nodeType == TEXT:
Fred Drake4db5b461998-12-01 19:03:01 +0000931 ofp.write("-%s\n" % esistools.encode(node.data))
932 else:
933 raise RuntimeError, "unsupported node type: %s" % nodeType
934
935
Fred Drake03204731998-11-23 17:02:03 +0000936def convert(ifp, ofp):
Fred Drake4db5b461998-12-01 19:03:01 +0000937 p = esistools.ExtendedEsisBuilder()
Fred Drake03204731998-11-23 17:02:03 +0000938 p.feed(ifp.read())
939 doc = p.document
Fred Drakee779d4f1999-05-10 19:36:52 +0000940 fragment = p.fragment
941 normalize(fragment)
942 simplify(doc, fragment)
Fred Drake865e9ff1999-07-29 22:23:19 +0000943 handle_labels(doc, fragment)
Fred Drakee779d4f1999-05-10 19:36:52 +0000944 handle_appendix(doc, fragment)
Fred Drake1ff6db41998-11-23 23:10:35 +0000945 fixup_trailing_whitespace(doc, {
946 "abstract": "\n",
947 "title": "",
948 "chapter": "\n\n",
949 "section": "\n\n",
950 "subsection": "\n\n",
951 "subsubsection": "\n\n",
952 "paragraph": "\n\n",
953 "subparagraph": "\n\n",
954 })
Fred Drake03204731998-11-23 17:02:03 +0000955 cleanup_root_text(doc)
Fred Drake080c1b51999-08-02 14:46:15 +0000956 cleanup_trailing_parens(fragment, ["function", "method", "cfunction"])
957 cleanup_synopses(doc, fragment)
Fred Drakee779d4f1999-05-10 19:36:52 +0000958 fixup_descriptors(doc, fragment)
959 fixup_verbatims(fragment)
960 normalize(fragment)
961 fixup_paras(doc, fragment)
962 fixup_sectionauthors(doc, fragment)
Fred Drakee779d4f1999-05-10 19:36:52 +0000963 fixup_table_structures(doc, fragment)
964 fixup_rfc_references(doc, fragment)
965 fixup_signatures(doc, fragment)
Fred Drake865e9ff1999-07-29 22:23:19 +0000966 add_node_ids(fragment)
967 fixup_refmodindexes(fragment)
968 fixup_bifuncindexes(fragment)
Fred Drake4db5b461998-12-01 19:03:01 +0000969 #
970 d = {}
971 for gi in p.get_empties():
972 d[gi] = gi
Fred Draked24167b1999-01-14 21:18:03 +0000973 if d.has_key("rfc"):
974 del d["rfc"]
Fred Drake4db5b461998-12-01 19:03:01 +0000975 knownempty = d.has_key
976 #
Fred Drake03204731998-11-23 17:02:03 +0000977 try:
Fred Drakee779d4f1999-05-10 19:36:52 +0000978 write_esis(fragment, ofp, knownempty)
Fred Drake03204731998-11-23 17:02:03 +0000979 except IOError, (err, msg):
980 # Ignore EPIPE; it just means that whoever we're writing to stopped
981 # reading. The rest of the output would be ignored. All other errors
982 # should still be reported,
983 if err != errno.EPIPE:
984 raise
985
986
987def main():
988 if len(sys.argv) == 1:
989 ifp = sys.stdin
990 ofp = sys.stdout
991 elif len(sys.argv) == 2:
992 ifp = open(sys.argv[1])
993 ofp = sys.stdout
994 elif len(sys.argv) == 3:
995 ifp = open(sys.argv[1])
996 ofp = open(sys.argv[2], "w")
997 else:
998 usage()
999 sys.exit(2)
1000 convert(ifp, ofp)
1001
1002
1003if __name__ == "__main__":
1004 main()