blob: b180d78ceec9c7be98a4061d5d343f71847f03ca [file] [log] [blame]
Fred Drake03204731998-11-23 17:02:03 +00001#! /usr/bin/env python
2
Fred Drake7dab6af1999-01-28 23:59:58 +00003"""Perform massive transformations on a document tree created from the LaTeX
4of the Python documentation, and dump the ESIS data for the transformed tree.
Fred Drake03204731998-11-23 17:02:03 +00005"""
6__version__ = '$Revision$'
7
8
9import errno
Fred Drake4db5b461998-12-01 19:03:01 +000010import esistools
11import re
Fred Drake03204731998-11-23 17:02:03 +000012import string
13import sys
14import xml.dom.core
15import xml.dom.esis_builder
16
17
Fred Drakef8ebb551999-01-14 19:45:38 +000018class ConversionError(Exception):
19 pass
20
21
Fred Drakefcc59101999-01-06 22:50:52 +000022DEBUG_PARA_FIXER = 0
23
Fred Drake7dab6af1999-01-28 23:59:58 +000024if DEBUG_PARA_FIXER:
25 def para_msg(s):
26 sys.stderr.write("*** %s\n" % s)
27else:
28 def para_msg(s):
29 pass
30
Fred Drakefcc59101999-01-06 22:50:52 +000031
Fred Drake03204731998-11-23 17:02:03 +000032# Workaround to deal with invalid documents (multiple root elements). This
33# does not indicate a bug in the DOM implementation.
34#
35def get_documentElement(self):
36 docelem = None
37 for n in self._node.children:
38 if n.type == xml.dom.core.ELEMENT:
39 docelem = xml.dom.core.Element(n, self, self)
40 return docelem
41
42xml.dom.core.Document.get_documentElement = get_documentElement
43
44
45# Replace get_childNodes for the Document class; without this, children
46# accessed from the Document object via .childNodes (no matter how many
47# levels of access are used) will be given an ownerDocument of None.
48#
49def get_childNodes(self):
50 return xml.dom.core.NodeList(self._node.children, self, self)
51
52xml.dom.core.Document.get_childNodes = get_childNodes
53
54
55def get_first_element(doc, gi):
56 for n in doc.childNodes:
57 if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi:
58 return n
59
60def extract_first_element(doc, gi):
61 node = get_first_element(doc, gi)
62 if node is not None:
63 doc.removeChild(node)
64 return node
65
66
Fred Drake7dab6af1999-01-28 23:59:58 +000067def find_all_elements(doc, gi):
68 nodes = []
69 if doc.nodeType == xml.dom.core.ELEMENT and doc.tagName == gi:
70 nodes.append(doc)
71 for child in doc.childNodes:
72 if child.nodeType == xml.dom.core.ELEMENT:
73 if child.tagName == gi:
74 nodes.append(child)
75 for node in child.getElementsByTagName(gi):
76 nodes.append(node)
77 return nodes
78
79
Fred Drake03204731998-11-23 17:02:03 +000080def simplify(doc):
81 # Try to rationalize the document a bit, since these things are simply
82 # not valid SGML/XML documents as they stand, and need a little work.
83 documentclass = "document"
84 inputs = []
85 node = extract_first_element(doc, "documentclass")
86 if node is not None:
87 documentclass = node.getAttribute("classname")
88 node = extract_first_element(doc, "title")
89 if node is not None:
90 inputs.append(node)
91 # update the name of the root element
92 node = get_first_element(doc, "document")
93 if node is not None:
94 node._node.name = documentclass
95 while 1:
96 node = extract_first_element(doc, "input")
97 if node is None:
98 break
99 inputs.append(node)
100 if inputs:
101 docelem = doc.documentElement
102 inputs.reverse()
103 for node in inputs:
104 text = doc.createTextNode("\n")
105 docelem.insertBefore(text, docelem.firstChild)
106 docelem.insertBefore(node, text)
107 docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
108 while doc.firstChild.nodeType == xml.dom.core.TEXT:
109 doc.removeChild(doc.firstChild)
110
111
112def cleanup_root_text(doc):
113 discards = []
114 skip = 0
115 for n in doc.childNodes:
116 prevskip = skip
117 skip = 0
118 if n.nodeType == xml.dom.core.TEXT and not prevskip:
119 discards.append(n)
Fred Drake4db5b461998-12-01 19:03:01 +0000120 elif n.nodeType == xml.dom.core.ELEMENT and n.tagName == "COMMENT":
Fred Drake03204731998-11-23 17:02:03 +0000121 skip = 1
122 for node in discards:
123 doc.removeChild(node)
124
125
Fred Drake7dab6af1999-01-28 23:59:58 +0000126def handle_args(doc):
127 for node in find_all_elements(doc, "args"):
Fred Drake03204731998-11-23 17:02:03 +0000128 parent = node.parentNode
129 nodes = []
130 for n in parent.childNodes:
Fred Drake7dab6af1999-01-28 23:59:58 +0000131 if n.nodeType != xml.dom.core.ELEMENT or n.tagName != "args":
Fred Drake03204731998-11-23 17:02:03 +0000132 nodes.append(n)
Fred Drake7dab6af1999-01-28 23:59:58 +0000133 signature = doc.createElement("signature")
134 signature.appendChild(doc.createTextNode("\n "))
135 name = doc.createElement("name")
136 name.appendChild(doc.createTextNode(parent.getAttribute("name")))
137 parent.removeAttribute("name")
138 signature.appendChild(name)
Fred Drake03204731998-11-23 17:02:03 +0000139 desc = doc.createElement("description")
140 for n in nodes:
141 parent.removeChild(n)
142 desc.appendChild(n)
Fred Drake7dab6af1999-01-28 23:59:58 +0000143 desc.appendChild(doc.createTextNode("\n "))
144 parent.replaceChild(signature, node)
145 parent.insertBefore(doc.createTextNode("\n "), signature)
Fred Drake03204731998-11-23 17:02:03 +0000146 if node.childNodes:
147 # keep the <args>...</args>, newline & indent
Fred Drake7dab6af1999-01-28 23:59:58 +0000148 signature.appendChild(doc.createTextNode("\n "))
149 signature.appendChild(node)
Fred Drake03204731998-11-23 17:02:03 +0000150 parent.appendChild(doc.createText("\n "))
151 parent.appendChild(desc)
152 parent.appendChild(doc.createText("\n"))
Fred Drake7dab6af1999-01-28 23:59:58 +0000153 signature.appendChild(doc.createTextNode("\n "))
Fred Drake03204731998-11-23 17:02:03 +0000154
Fred Drake7dab6af1999-01-28 23:59:58 +0000155
156def methodline_to_signature(doc, methodline):
157 signature = doc.createElement("signature")
158 signature.appendChild(doc.createTextNode("\n "))
159 name = doc.createElement("name")
160 name.appendChild(doc.createTextNode(methodline.getAttribute("name")))
161 signature.appendChild(name)
162 methodline.parentNode.removeChild(methodline)
163 if len(methodline.childNodes):
164 methodline._node.name = "args"
165 methodline.removeAttribute("name")
166 signature.appendChild(doc.createTextNode("\n "))
167 signature.appendChild(methodline)
168 signature.appendChild(doc.createTextNode("\n "))
169 return signature
Fred Drake03204731998-11-23 17:02:03 +0000170
171
Fred Drake4db5b461998-12-01 19:03:01 +0000172def handle_appendix(doc):
173 # must be called after simplfy() if document is multi-rooted to begin with
174 docelem = doc.documentElement
175 toplevel = docelem.tagName == "manual" and "chapter" or "section"
176 appendices = 0
177 nodes = []
178 for node in docelem.childNodes:
179 if appendices:
180 nodes.append(node)
181 elif node.nodeType == xml.dom.core.ELEMENT:
182 appnodes = node.getElementsByTagName("appendix")
183 if appnodes:
184 appendices = 1
185 parent = appnodes[0].parentNode
186 parent.removeChild(appnodes[0])
187 parent.normalize()
188 if nodes:
189 map(docelem.removeChild, nodes)
190 docelem.appendChild(doc.createTextNode("\n\n\n"))
191 back = doc.createElement("back-matter")
192 docelem.appendChild(back)
193 back.appendChild(doc.createTextNode("\n"))
194 while nodes and nodes[0].nodeType == xml.dom.core.TEXT \
195 and not string.strip(nodes[0].data):
196 del nodes[0]
197 map(back.appendChild, nodes)
198 docelem.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000199
200
201def handle_labels(doc):
Fred Drake7dab6af1999-01-28 23:59:58 +0000202 for label in find_all_elements(doc, "label"):
203 id = label.getAttribute("id")
204 if not id:
205 continue
206 parent = label.parentNode
207 if parent.tagName == "title":
208 parent.parentNode.setAttribute("id", id)
209 else:
210 parent.setAttribute("id", id)
211 # now, remove <label id="..."/> from parent:
212 parent.removeChild(label)
Fred Drake03204731998-11-23 17:02:03 +0000213
214
Fred Drake1ff6db41998-11-23 23:10:35 +0000215def fixup_trailing_whitespace(doc, wsmap):
216 queue = [doc]
217 while queue:
218 node = queue[0]
219 del queue[0]
220 if node.nodeType == xml.dom.core.ELEMENT \
221 and wsmap.has_key(node.tagName):
222 ws = wsmap[node.tagName]
223 children = node.childNodes
224 children.reverse()
225 if children[0].nodeType == xml.dom.core.TEXT:
226 data = string.rstrip(children[0].data) + ws
227 children[0].data = data
228 children.reverse()
229 # hack to get the title in place:
230 if node.tagName == "title" \
231 and node.parentNode.firstChild.nodeType == xml.dom.core.ELEMENT:
232 node.parentNode.insertBefore(doc.createText("\n "),
233 node.parentNode.firstChild)
234 for child in node.childNodes:
235 if child.nodeType == xml.dom.core.ELEMENT:
236 queue.append(child)
237
238
239def normalize(doc):
240 for node in doc.childNodes:
241 if node.nodeType == xml.dom.core.ELEMENT:
242 node.normalize()
243
244
245def cleanup_trailing_parens(doc, element_names):
246 d = {}
247 for gi in element_names:
248 d[gi] = gi
249 rewrite_element = d.has_key
250 queue = []
251 for node in doc.childNodes:
252 if node.nodeType == xml.dom.core.ELEMENT:
253 queue.append(node)
254 while queue:
255 node = queue[0]
256 del queue[0]
257 if rewrite_element(node.tagName):
258 children = node.childNodes
259 if len(children) == 1 \
260 and children[0].nodeType == xml.dom.core.TEXT:
261 data = children[0].data
262 if data[-2:] == "()":
263 children[0].data = data[:-2]
264 else:
265 for child in node.childNodes:
266 if child.nodeType == xml.dom.core.ELEMENT:
267 queue.append(child)
268
269
Fred Drakeaaed9711998-12-10 20:25:30 +0000270def contents_match(left, right):
271 left_children = left.childNodes
272 right_children = right.childNodes
273 if len(left_children) != len(right_children):
274 return 0
275 for l, r in map(None, left_children, right_children):
276 nodeType = l.nodeType
277 if nodeType != r.nodeType:
278 return 0
279 if nodeType == xml.dom.core.ELEMENT:
280 if l.tagName != r.tagName:
281 return 0
282 # should check attributes, but that's not a problem here
283 if not contents_match(l, r):
284 return 0
285 elif nodeType == xml.dom.core.TEXT:
286 if l.data != r.data:
287 return 0
288 else:
289 # not quite right, but good enough
290 return 0
291 return 1
292
293
294def create_module_info(doc, section):
295 # Heavy.
296 node = extract_first_element(section, "modulesynopsis")
297 if node is None:
298 return
299 node._node.name = "synopsis"
300 lastchild = node.childNodes[-1]
301 if lastchild.nodeType == xml.dom.core.TEXT \
302 and lastchild.data[-1:] == ".":
303 lastchild.data = lastchild.data[:-1]
Fred Drake4259f0d1999-01-19 23:09:31 +0000304 modauthor = extract_first_element(section, "moduleauthor")
305 if modauthor:
306 modauthor._node.name = "author"
307 modauthor.appendChild(doc.createTextNode(
308 modauthor.getAttribute("name")))
309 modauthor.removeAttribute("name")
Fred Drakeaaed9711998-12-10 20:25:30 +0000310 if section.tagName == "section":
311 modinfo_pos = 2
312 modinfo = doc.createElement("moduleinfo")
313 moddecl = extract_first_element(section, "declaremodule")
314 name = None
315 if moddecl:
316 modinfo.appendChild(doc.createTextNode("\n "))
317 name = moddecl.attributes["name"].value
318 namenode = doc.createElement("name")
319 namenode.appendChild(doc.createTextNode(name))
320 modinfo.appendChild(namenode)
321 type = moddecl.attributes.get("type")
322 if type:
323 type = type.value
324 modinfo.appendChild(doc.createTextNode("\n "))
325 typenode = doc.createElement("type")
326 typenode.appendChild(doc.createTextNode(type))
327 modinfo.appendChild(typenode)
328 title = get_first_element(section, "title")
329 if title:
330 children = title.childNodes
331 if len(children) >= 2 \
332 and children[0].nodeType == xml.dom.core.ELEMENT \
333 and children[0].tagName == "module" \
334 and children[0].childNodes[0].data == name:
335 # this is it; morph the <title> into <short-synopsis>
336 first_data = children[1]
337 if first_data.data[:4] == " ---":
338 first_data.data = string.lstrip(first_data.data[4:])
339 title._node.name = "short-synopsis"
Fred Drake7dab6af1999-01-28 23:59:58 +0000340 if children[-1].nodeType == xml.dom.core.TEXT \
341 and children[-1].data[-1:] == ".":
Fred Drakeaaed9711998-12-10 20:25:30 +0000342 children[-1].data = children[-1].data[:-1]
343 section.removeChild(title)
344 section.removeChild(section.childNodes[0])
345 title.removeChild(children[0])
346 modinfo_pos = 0
347 else:
348 sys.stderr.write(
349 "module name in title doesn't match"
350 " <declaremodule>; no <short-synopsis>\n")
351 else:
352 sys.stderr.write(
353 "Unexpected condition: <section> without <title>\n")
354 modinfo.appendChild(doc.createTextNode("\n "))
355 modinfo.appendChild(node)
356 if title and not contents_match(title, node):
357 # The short synopsis is actually different,
358 # and needs to be stored:
359 modinfo.appendChild(doc.createTextNode("\n "))
360 modinfo.appendChild(title)
Fred Drake4259f0d1999-01-19 23:09:31 +0000361 if modauthor:
362 modinfo.appendChild(doc.createTextNode("\n "))
363 modinfo.appendChild(modauthor)
Fred Drakeaaed9711998-12-10 20:25:30 +0000364 modinfo.appendChild(doc.createTextNode("\n "))
365 section.insertBefore(modinfo, section.childNodes[modinfo_pos])
366 section.insertBefore(doc.createTextNode("\n "), modinfo)
367
368
Fred Drakefba0ba21998-12-10 05:07:09 +0000369def cleanup_synopses(doc):
Fred Drake7dab6af1999-01-28 23:59:58 +0000370 for node in find_all_elements(doc, "section"):
371 create_module_info(doc, node)
Fred Drakeaaed9711998-12-10 20:25:30 +0000372
373
Fred Drakef8ebb551999-01-14 19:45:38 +0000374def remap_element_names(root, name_map):
375 queue = []
376 for child in root.childNodes:
377 if child.nodeType == xml.dom.core.ELEMENT:
378 queue.append(child)
379 while queue:
380 node = queue.pop()
381 tagName = node.tagName
382 if name_map.has_key(tagName):
383 name, attrs = name_map[tagName]
384 node._node.name = name
385 for attr, value in attrs.items():
386 node.setAttribute(attr, value)
387 for child in node.childNodes:
388 if child.nodeType == xml.dom.core.ELEMENT:
389 queue.append(child)
390
391
392def fixup_table_structures(doc):
393 # must be done after remap_element_names(), or the tables won't be found
Fred Drake7dab6af1999-01-28 23:59:58 +0000394 for table in find_all_elements(doc, "table"):
395 fixup_table(doc, table)
396
Fred Drakef8ebb551999-01-14 19:45:38 +0000397
398def fixup_table(doc, table):
399 # create the table head
400 thead = doc.createElement("thead")
401 row = doc.createElement("row")
402 move_elements_by_name(doc, table, row, "entry")
403 thead.appendChild(doc.createTextNode("\n "))
404 thead.appendChild(row)
405 thead.appendChild(doc.createTextNode("\n "))
406 # create the table body
407 tbody = doc.createElement("tbody")
408 prev_row = None
409 last_was_hline = 0
410 children = table.childNodes
411 for child in children:
412 if child.nodeType == xml.dom.core.ELEMENT:
413 tagName = child.tagName
414 if tagName == "hline" and prev_row is not None:
415 prev_row.setAttribute("rowsep", "1")
416 elif tagName == "row":
417 prev_row = child
418 # save the rows:
419 tbody.appendChild(doc.createTextNode("\n "))
420 move_elements_by_name(doc, table, tbody, "row", sep="\n ")
421 # and toss the rest:
422 while children:
423 child = children[0]
424 nodeType = child.nodeType
425 if nodeType == xml.dom.core.TEXT:
426 if string.strip(child.data):
427 raise ConversionError("unexpected free data in table")
428 table.removeChild(child)
429 continue
430 if nodeType == xml.dom.core.ELEMENT:
431 if child.tagName != "hline":
432 raise ConversionError(
433 "unexpected <%s> in table" % child.tagName)
434 table.removeChild(child)
435 continue
436 raise ConversionError(
437 "unexpected %s node in table" % child.__class__.__name__)
438 # nothing left in the <table>; add the <thead> and <tbody>
439 tgroup = doc.createElement("tgroup")
440 tgroup.appendChild(doc.createTextNode("\n "))
441 tgroup.appendChild(thead)
442 tgroup.appendChild(doc.createTextNode("\n "))
443 tgroup.appendChild(tbody)
444 tgroup.appendChild(doc.createTextNode("\n "))
445 table.appendChild(tgroup)
446 # now make the <entry>s look nice:
447 for row in table.getElementsByTagName("row"):
448 fixup_row(doc, row)
449
450
451def fixup_row(doc, row):
452 entries = []
453 map(entries.append, row.childNodes[1:])
454 for entry in entries:
455 row.insertBefore(doc.createTextNode("\n "), entry)
456# row.appendChild(doc.createTextNode("\n "))
457
458
459def move_elements_by_name(doc, source, dest, name, sep=None):
460 nodes = []
461 for child in source.childNodes:
462 if child.nodeType == xml.dom.core.ELEMENT and child.tagName == name:
463 nodes.append(child)
464 for node in nodes:
465 source.removeChild(node)
466 dest.appendChild(node)
467 if sep:
468 dest.appendChild(doc.createTextNode(sep))
469
470
Fred Drakefcc59101999-01-06 22:50:52 +0000471FIXUP_PARA_ELEMENTS = (
472 "chapter",
473 "section", "subsection", "subsubsection",
Fred Drake7dab6af1999-01-28 23:59:58 +0000474 "paragraph", "subparagraph",
475 "excdesc", "datadesc",
476 "excdescni", "datadescni",
477 )
478
479RECURSE_INTO_PARA_CONTAINERS = (
480 "chapter",
481 "section", "subsection", "subsubsection",
482 "paragraph", "subparagraph",
483 "abstract",
484 "memberdesc", "memberdescni", "datadesc", "datadescni",
Fred Drake4259f0d1999-01-19 23:09:31 +0000485 )
Fred Drakefcc59101999-01-06 22:50:52 +0000486
487PARA_LEVEL_ELEMENTS = (
Fred Drake4259f0d1999-01-19 23:09:31 +0000488 "moduleinfo", "title", "verbatim",
489 "opcodedesc", "classdesc",
Fred Drake7dab6af1999-01-28 23:59:58 +0000490 "funcdesc", "methoddesc", "excdesc",
491 "funcdescni", "methoddescni", "excdescni",
Fred Drakefcc59101999-01-06 22:50:52 +0000492 "tableii", "tableiii", "tableiv", "localmoduletable",
Fred Drake7dab6af1999-01-28 23:59:58 +0000493 "sectionauthor", "seealso",
Fred Drakefcc59101999-01-06 22:50:52 +0000494 # include <para>, so we can just do it again to get subsequent paras:
495 "para",
496 )
497
498PARA_LEVEL_PRECEEDERS = (
499 "index", "indexii", "indexiii", "indexiv",
Fred Drake7dab6af1999-01-28 23:59:58 +0000500 "stindex", "obindex", "COMMENT", "label", "input",
501 "memberline", "memberlineni",
502 "methodline", "methodlineni",
Fred Drakefcc59101999-01-06 22:50:52 +0000503 )
504
Fred Drake7dab6af1999-01-28 23:59:58 +0000505
Fred Drakeaaed9711998-12-10 20:25:30 +0000506def fixup_paras(doc):
Fred Drakefcc59101999-01-06 22:50:52 +0000507 for child in doc.childNodes:
508 if child.nodeType == xml.dom.core.ELEMENT \
Fred Drake7dab6af1999-01-28 23:59:58 +0000509 and child.tagName in RECURSE_INTO_PARA_CONTAINERS:
510 #
Fred Drakefcc59101999-01-06 22:50:52 +0000511 fixup_paras_helper(doc, child)
512 descriptions = child.getElementsByTagName("description")
513 for description in descriptions:
Fred Drakefcc59101999-01-06 22:50:52 +0000514 fixup_paras_helper(doc, description)
515
516
Fred Drake7dab6af1999-01-28 23:59:58 +0000517def fixup_paras_helper(doc, container, depth=0):
Fred Drakefcc59101999-01-06 22:50:52 +0000518 # document is already normalized
519 children = container.childNodes
520 start = 0
Fred Drake7dab6af1999-01-28 23:59:58 +0000521 while len(children) > start:
522 start = skip_leading_nodes(children, start)
523 if start >= len(children):
524 break
525 #
526 # Either paragraph material or something to recurse into:
527 #
528 if (children[start].nodeType == xml.dom.core.ELEMENT) \
529 and (children[start].tagName in RECURSE_INTO_PARA_CONTAINERS):
530 fixup_paras_helper(doc, children[start])
531 start = skip_leading_nodes(children, start + 1)
532 continue
533 #
534 # paragraph material:
535 #
536 build_para(doc, container, start, len(children))
537 if DEBUG_PARA_FIXER and depth == 10:
538 sys.exit(1)
539 start = start + 1
Fred Drakefcc59101999-01-06 22:50:52 +0000540
541
542def build_para(doc, parent, start, i):
543 children = parent.childNodes
Fred Drakefcc59101999-01-06 22:50:52 +0000544 after = start + 1
545 have_last = 0
546 BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + FIXUP_PARA_ELEMENTS
Fred Drake7dab6af1999-01-28 23:59:58 +0000547 # Collect all children until \n\n+ is found in a text node or a
548 # member of BREAK_ELEMENTS is found.
Fred Drakefcc59101999-01-06 22:50:52 +0000549 for j in range(start, i):
550 after = j + 1
551 child = children[j]
552 nodeType = child.nodeType
553 if nodeType == xml.dom.core.ELEMENT:
554 if child.tagName in BREAK_ELEMENTS:
555 after = j
556 break
557 elif nodeType == xml.dom.core.TEXT:
558 pos = string.find(child.data, "\n\n")
559 if pos == 0:
560 after = j
561 break
562 if pos >= 1:
563 child.splitText(pos)
564 break
565 else:
566 have_last = 1
Fred Drake7dab6af1999-01-28 23:59:58 +0000567 if (start + 1) > after:
568 raise ConversionError(
569 "build_para() could not identify content to turn into a paragraph")
Fred Drakefcc59101999-01-06 22:50:52 +0000570 if children[after - 1].nodeType == xml.dom.core.TEXT:
571 # we may need to split off trailing white space:
572 child = children[after - 1]
573 data = child.data
574 if string.rstrip(data) != data:
575 have_last = 0
576 child.splitText(len(string.rstrip(data)))
Fred Drakefcc59101999-01-06 22:50:52 +0000577 para = doc.createElement("para")
578 prev = None
579 indexes = range(start, after)
580 indexes.reverse()
581 for j in indexes:
Fred Drake7dab6af1999-01-28 23:59:58 +0000582 node = parent.childNodes[j]
Fred Drakefcc59101999-01-06 22:50:52 +0000583 parent.removeChild(node)
584 para.insertBefore(node, prev)
585 prev = node
586 if have_last:
587 parent.appendChild(para)
Fred Drake7dab6af1999-01-28 23:59:58 +0000588 return len(parent.childNodes)
Fred Drakefcc59101999-01-06 22:50:52 +0000589 else:
590 parent.insertBefore(para, parent.childNodes[start])
Fred Drake7dab6af1999-01-28 23:59:58 +0000591 return start + 1
Fred Drakefcc59101999-01-06 22:50:52 +0000592
593
Fred Drake7dab6af1999-01-28 23:59:58 +0000594def skip_leading_nodes(children, start):
595 """Return index into children of a node at which paragraph building should
596 begin or a recursive call to fixup_paras_helper() should be made (for
597 subsections, etc.).
598
599 When the return value >= len(children), we've built all the paras we can
600 from this list of children.
601 """
602 i = len(children)
Fred Drakefcc59101999-01-06 22:50:52 +0000603 while i > start:
604 # skip over leading comments and whitespace:
Fred Drake7dab6af1999-01-28 23:59:58 +0000605 child = children[start]
Fred Drakefcc59101999-01-06 22:50:52 +0000606 nodeType = child.nodeType
Fred Drake7dab6af1999-01-28 23:59:58 +0000607 if nodeType == xml.dom.core.TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000608 data = child.data
609 shortened = string.lstrip(data)
610 if shortened:
611 if data != shortened:
612 # break into two nodes: whitespace and non-whitespace
613 child.splitText(len(data) - len(shortened))
Fred Drake7dab6af1999-01-28 23:59:58 +0000614 return start + 1
615 return start
Fred Drakefcc59101999-01-06 22:50:52 +0000616 # all whitespace, just skip
Fred Drakefcc59101999-01-06 22:50:52 +0000617 elif nodeType == xml.dom.core.ELEMENT:
Fred Drake7dab6af1999-01-28 23:59:58 +0000618 tagName = child.tagName
619 if tagName in RECURSE_INTO_PARA_CONTAINERS:
620 return start
621 if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
622 return start
623 start = start + 1
624 return start
Fred Drakefba0ba21998-12-10 05:07:09 +0000625
626
Fred Draked24167b1999-01-14 21:18:03 +0000627def fixup_rfc_references(doc):
Fred Drake7dab6af1999-01-28 23:59:58 +0000628 for rfcnode in find_all_elements(doc, "rfc"):
629 rfcnode.appendChild(doc.createTextNode(
630 "RFC " + rfcnode.getAttribute("num")))
Fred Draked24167b1999-01-14 21:18:03 +0000631
632
633def fixup_signatures(doc):
634 for child in doc.childNodes:
635 if child.nodeType == xml.dom.core.ELEMENT:
636 args = child.getElementsByTagName("args")
637 for arg in args:
638 fixup_args(doc, arg)
Fred Drake7dab6af1999-01-28 23:59:58 +0000639 arg.normalize()
Fred Draked24167b1999-01-14 21:18:03 +0000640 args = child.getElementsByTagName("constructor-args")
641 for arg in args:
642 fixup_args(doc, arg)
643 arg.normalize()
644
645
646def fixup_args(doc, arglist):
647 for child in arglist.childNodes:
648 if child.nodeType == xml.dom.core.ELEMENT \
649 and child.tagName == "optional":
650 # found it; fix and return
651 arglist.insertBefore(doc.createTextNode("["), child)
652 optkids = child.childNodes
653 while optkids:
654 k = optkids[0]
655 child.removeChild(k)
656 arglist.insertBefore(k, child)
657 arglist.insertBefore(doc.createTextNode("]"), child)
658 arglist.removeChild(child)
659 return fixup_args(doc, arglist)
660
661
Fred Drake7dab6af1999-01-28 23:59:58 +0000662def fixup_sectionauthors(doc):
663 for sectauth in find_all_elements(doc, "sectionauthor"):
664 section = sectauth.parentNode
665 section.removeChild(sectauth)
666 sectauth._node.name = "author"
667 sectauth.appendChild(doc.createTextNode(
668 sectauth.getAttribute("name")))
669 sectauth.removeAttribute("name")
670 after = section.childNodes[2]
671 title = section.childNodes[1]
672 if title.nodeType == xml.dom.core.ELEMENT and title.tagName != "title":
673 after = section.childNodes[0]
674 section.insertBefore(doc.createTextNode("\n "), after)
675 section.insertBefore(sectauth, after)
676
677
Fred Drake4db5b461998-12-01 19:03:01 +0000678_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
Fred Drakefcc59101999-01-06 22:50:52 +0000679
Fred Drake4db5b461998-12-01 19:03:01 +0000680def write_esis(doc, ofp, knownempty):
681 for node in doc.childNodes:
682 nodeType = node.nodeType
683 if nodeType == xml.dom.core.ELEMENT:
684 gi = node.tagName
685 if knownempty(gi):
686 if node.hasChildNodes():
687 raise ValueError, "declared-empty node has children"
688 ofp.write("e\n")
689 for k, v in node.attributes.items():
690 value = v.value
691 if _token_rx.match(value):
692 dtype = "TOKEN"
693 else:
694 dtype = "CDATA"
695 ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
696 ofp.write("(%s\n" % gi)
697 write_esis(node, ofp, knownempty)
698 ofp.write(")%s\n" % gi)
699 elif nodeType == xml.dom.core.TEXT:
700 ofp.write("-%s\n" % esistools.encode(node.data))
701 else:
702 raise RuntimeError, "unsupported node type: %s" % nodeType
703
704
Fred Drake03204731998-11-23 17:02:03 +0000705def convert(ifp, ofp):
Fred Drake4db5b461998-12-01 19:03:01 +0000706 p = esistools.ExtendedEsisBuilder()
Fred Drake03204731998-11-23 17:02:03 +0000707 p.feed(ifp.read())
708 doc = p.document
Fred Drake1ff6db41998-11-23 23:10:35 +0000709 normalize(doc)
Fred Drake03204731998-11-23 17:02:03 +0000710 handle_args(doc)
Fred Drake03204731998-11-23 17:02:03 +0000711 simplify(doc)
712 handle_labels(doc)
Fred Drake4db5b461998-12-01 19:03:01 +0000713 handle_appendix(doc)
Fred Drake1ff6db41998-11-23 23:10:35 +0000714 fixup_trailing_whitespace(doc, {
715 "abstract": "\n",
716 "title": "",
717 "chapter": "\n\n",
718 "section": "\n\n",
719 "subsection": "\n\n",
720 "subsubsection": "\n\n",
721 "paragraph": "\n\n",
722 "subparagraph": "\n\n",
723 })
Fred Drake03204731998-11-23 17:02:03 +0000724 cleanup_root_text(doc)
Fred Drake1ff6db41998-11-23 23:10:35 +0000725 cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
Fred Drakefba0ba21998-12-10 05:07:09 +0000726 cleanup_synopses(doc)
Fred Drakeaaed9711998-12-10 20:25:30 +0000727 normalize(doc)
728 fixup_paras(doc)
Fred Drake7dab6af1999-01-28 23:59:58 +0000729 fixup_sectionauthors(doc)
Fred Drakef8ebb551999-01-14 19:45:38 +0000730 remap_element_names(doc, {
731 "tableii": ("table", {"cols": "2"}),
732 "tableiii": ("table", {"cols": "3"}),
733 "tableiv": ("table", {"cols": "4"}),
734 "lineii": ("row", {}),
735 "lineiii": ("row", {}),
736 "lineiv": ("row", {}),
Fred Draked6ced7d1999-01-19 17:11:23 +0000737 "refmodule": ("module", {"link": "link"}),
Fred Drakef8ebb551999-01-14 19:45:38 +0000738 })
739 fixup_table_structures(doc)
Fred Draked24167b1999-01-14 21:18:03 +0000740 fixup_rfc_references(doc)
741 fixup_signatures(doc)
Fred Drake4db5b461998-12-01 19:03:01 +0000742 #
743 d = {}
744 for gi in p.get_empties():
745 d[gi] = gi
Fred Draked24167b1999-01-14 21:18:03 +0000746 if d.has_key("rfc"):
747 del d["rfc"]
Fred Drake4db5b461998-12-01 19:03:01 +0000748 knownempty = d.has_key
749 #
Fred Drake03204731998-11-23 17:02:03 +0000750 try:
Fred Drake4db5b461998-12-01 19:03:01 +0000751 write_esis(doc, ofp, knownempty)
Fred Drake03204731998-11-23 17:02:03 +0000752 except IOError, (err, msg):
753 # Ignore EPIPE; it just means that whoever we're writing to stopped
754 # reading. The rest of the output would be ignored. All other errors
755 # should still be reported,
756 if err != errno.EPIPE:
757 raise
758
759
760def main():
761 if len(sys.argv) == 1:
762 ifp = sys.stdin
763 ofp = sys.stdout
764 elif len(sys.argv) == 2:
765 ifp = open(sys.argv[1])
766 ofp = sys.stdout
767 elif len(sys.argv) == 3:
768 ifp = open(sys.argv[1])
769 ofp = open(sys.argv[2], "w")
770 else:
771 usage()
772 sys.exit(2)
773 convert(ifp, ofp)
774
775
776if __name__ == "__main__":
777 main()