blob: 68fd4a3731178c750a9f21b3f61a7527674bfce8 [file] [log] [blame]
Fred Drake03204731998-11-23 17:02:03 +00001#! /usr/bin/env python
2
Fred Drake7dab6af1999-01-28 23:59:58 +00003"""Perform massive transformations on a document tree created from the LaTeX
4of the Python documentation, and dump the ESIS data for the transformed tree.
Fred Drake03204731998-11-23 17:02:03 +00005"""
6__version__ = '$Revision$'
7
8
9import errno
Fred Drake4db5b461998-12-01 19:03:01 +000010import esistools
11import re
Fred Drake03204731998-11-23 17:02:03 +000012import string
13import sys
14import xml.dom.core
15import xml.dom.esis_builder
16
17
Fred Drakef8ebb551999-01-14 19:45:38 +000018class ConversionError(Exception):
19 pass
20
21
Fred Drakefcc59101999-01-06 22:50:52 +000022DEBUG_PARA_FIXER = 0
23
Fred Drake7dab6af1999-01-28 23:59:58 +000024if DEBUG_PARA_FIXER:
25 def para_msg(s):
26 sys.stderr.write("*** %s\n" % s)
27else:
28 def para_msg(s):
29 pass
30
Fred Drakefcc59101999-01-06 22:50:52 +000031
Fred Drake03204731998-11-23 17:02:03 +000032# Workaround to deal with invalid documents (multiple root elements). This
33# does not indicate a bug in the DOM implementation.
34#
35def get_documentElement(self):
36 docelem = None
37 for n in self._node.children:
38 if n.type == xml.dom.core.ELEMENT:
39 docelem = xml.dom.core.Element(n, self, self)
40 return docelem
41
42xml.dom.core.Document.get_documentElement = get_documentElement
43
44
45# Replace get_childNodes for the Document class; without this, children
46# accessed from the Document object via .childNodes (no matter how many
47# levels of access are used) will be given an ownerDocument of None.
48#
49def get_childNodes(self):
50 return xml.dom.core.NodeList(self._node.children, self, self)
51
52xml.dom.core.Document.get_childNodes = get_childNodes
53
54
55def get_first_element(doc, gi):
56 for n in doc.childNodes:
57 if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi:
58 return n
59
60def extract_first_element(doc, gi):
61 node = get_first_element(doc, gi)
62 if node is not None:
63 doc.removeChild(node)
64 return node
65
66
Fred Drake7dab6af1999-01-28 23:59:58 +000067def find_all_elements(doc, gi):
68 nodes = []
69 if doc.nodeType == xml.dom.core.ELEMENT and doc.tagName == gi:
70 nodes.append(doc)
71 for child in doc.childNodes:
72 if child.nodeType == xml.dom.core.ELEMENT:
73 if child.tagName == gi:
74 nodes.append(child)
75 for node in child.getElementsByTagName(gi):
76 nodes.append(node)
77 return nodes
78
79
Fred Drake03204731998-11-23 17:02:03 +000080def simplify(doc):
81 # Try to rationalize the document a bit, since these things are simply
82 # not valid SGML/XML documents as they stand, and need a little work.
83 documentclass = "document"
84 inputs = []
85 node = extract_first_element(doc, "documentclass")
86 if node is not None:
87 documentclass = node.getAttribute("classname")
88 node = extract_first_element(doc, "title")
89 if node is not None:
90 inputs.append(node)
91 # update the name of the root element
92 node = get_first_element(doc, "document")
93 if node is not None:
94 node._node.name = documentclass
95 while 1:
96 node = extract_first_element(doc, "input")
97 if node is None:
98 break
99 inputs.append(node)
100 if inputs:
101 docelem = doc.documentElement
102 inputs.reverse()
103 for node in inputs:
104 text = doc.createTextNode("\n")
105 docelem.insertBefore(text, docelem.firstChild)
106 docelem.insertBefore(node, text)
107 docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
108 while doc.firstChild.nodeType == xml.dom.core.TEXT:
109 doc.removeChild(doc.firstChild)
110
111
112def cleanup_root_text(doc):
113 discards = []
114 skip = 0
115 for n in doc.childNodes:
116 prevskip = skip
117 skip = 0
118 if n.nodeType == xml.dom.core.TEXT and not prevskip:
119 discards.append(n)
Fred Drake4db5b461998-12-01 19:03:01 +0000120 elif n.nodeType == xml.dom.core.ELEMENT and n.tagName == "COMMENT":
Fred Drake03204731998-11-23 17:02:03 +0000121 skip = 1
122 for node in discards:
123 doc.removeChild(node)
124
125
Fred Drakecb657811999-01-29 20:55:07 +0000126DESCRIPTOR_ELEMENTS = (
127 "cfuncdesc", "cvardesc", "ctypedesc",
128 "classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni",
129 "excdesc", "funcdesc", "funcdescni", "opcodedesc",
130 "datadesc", "datadescni",
131 )
132
133def fixup_descriptors(doc):
Fred Drake3a7ff991999-01-29 21:31:12 +0000134 sections = find_all_elements(doc, "section")
135 for section in sections:
136 find_and_fix_descriptors(doc, section)
137
138
139def find_and_fix_descriptors(doc, container):
140 children = container.childNodes
141 for child in children:
142 if child.nodeType == xml.dom.core.ELEMENT:
143 tagName = child.tagName
144 if tagName in DESCRIPTOR_ELEMENTS:
145 rewrite_descriptor(doc, child)
146 elif tagName == "subsection":
147 find_and_fix_descriptors(doc, child)
148
Fred Drakecb657811999-01-29 20:55:07 +0000149
150def rewrite_descriptor(doc, descriptor):
151 #
152 # Do these things:
153 # 1. Add an "index=noindex" attribute to the element if the tagName
154 # ends in 'ni', removing the 'ni' from the name.
155 # 2. Create a <signature> from the name attribute and <args>.
156 # 3. Create additional <signature>s from <*line{,ni}> elements,
157 # if found.
158 # 4. Move remaining child nodes to a <description> element.
159 # 5. Put it back together.
160 #
161 descname = descriptor.tagName
162 index = 1
163 if descname[-2:] == "ni":
164 descname = descname[:-2]
165 descriptor.setAttribute("index", "noindex")
166 descriptor._node.name = descname
167 index = 0
168 desctype = descname[:-4] # remove 'desc'
169 linename = desctype + "line"
170 if not index:
171 linename = linename + "ni"
172 # 2.
173 signature = doc.createElement("signature")
174 name = doc.createElement("name")
175 signature.appendChild(doc.createTextNode("\n "))
176 signature.appendChild(name)
177 name.appendChild(doc.createTextNode(descriptor.getAttribute("name")))
178 descriptor.removeAttribute("name")
179 if descriptor.attributes.has_key("var"):
180 variable = descriptor.getAttribute("var")
181 if variable:
182 args = doc.createElement("args")
183 args.appendChild(doc.createTextNode(variable))
Fred Drake7dab6af1999-01-28 23:59:58 +0000184 signature.appendChild(doc.createTextNode("\n "))
Fred Drakecb657811999-01-29 20:55:07 +0000185 signature.appendChild(args)
186 descriptor.removeAttribute("var")
187 newchildren = [signature]
188 children = descriptor.childNodes
189 pos = skip_leading_nodes(children, 0)
190 if pos < len(children):
191 child = children[pos]
192 if child.nodeType == xml.dom.core.ELEMENT and child.tagName == "args":
193 # create an <args> in <signature>:
194 args = doc.createElement("args")
195 argchildren = []
196 map(argchildren.append, child.childNodes)
197 for n in argchildren:
198 child.removeChild(n)
199 args.appendChild(n)
200 signature.appendChild(doc.createTextNode("\n "))
201 signature.appendChild(args)
202 signature.appendChild(doc.createTextNode("\n "))
203 # 3.
204 pos = skip_leading_nodes(children, pos + 1)
205 while pos < len(children) \
206 and children[pos].nodeType == xml.dom.core.ELEMENT \
207 and children[pos].tagName == linename:
208 # this is really a supplemental signature, create <signature>
209 sig = methodline_to_signature(doc, children[pos])
210 newchildren.append(sig)
211 pos = skip_leading_nodes(children, pos + 1)
212 # 4.
213 description = doc.createElement("description")
214 description.appendChild(doc.createTextNode("\n"))
215 newchildren.append(description)
216 move_children(descriptor, description, pos)
217 last = description.childNodes[-1]
218 if last.nodeType == xml.dom.core.TEXT:
219 last.data = string.rstrip(last.data) + "\n "
220 # 5.
221 # should have nothing but whitespace and signature lines in <descriptor>;
222 # discard them
223 while descriptor.childNodes:
224 descriptor.removeChild(descriptor.childNodes[0])
225 for node in newchildren:
226 descriptor.appendChild(doc.createTextNode("\n "))
227 descriptor.appendChild(node)
228 descriptor.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000229
Fred Drake7dab6af1999-01-28 23:59:58 +0000230
231def methodline_to_signature(doc, methodline):
232 signature = doc.createElement("signature")
233 signature.appendChild(doc.createTextNode("\n "))
234 name = doc.createElement("name")
235 name.appendChild(doc.createTextNode(methodline.getAttribute("name")))
Fred Drakecb657811999-01-29 20:55:07 +0000236 methodline.removeAttribute("name")
Fred Drake7dab6af1999-01-28 23:59:58 +0000237 signature.appendChild(name)
Fred Drake7dab6af1999-01-28 23:59:58 +0000238 if len(methodline.childNodes):
Fred Drakecb657811999-01-29 20:55:07 +0000239 args = doc.createElement("args")
Fred Drake7dab6af1999-01-28 23:59:58 +0000240 signature.appendChild(doc.createTextNode("\n "))
Fred Drakecb657811999-01-29 20:55:07 +0000241 signature.appendChild(args)
242 move_children(methodline, args)
Fred Drake7dab6af1999-01-28 23:59:58 +0000243 signature.appendChild(doc.createTextNode("\n "))
244 return signature
Fred Drake03204731998-11-23 17:02:03 +0000245
246
Fred Drakecb657811999-01-29 20:55:07 +0000247def move_children(origin, dest, start=0):
248 children = origin.childNodes
249 while start < len(children):
250 node = children[start]
251 origin.removeChild(node)
252 dest.appendChild(node)
253
254
Fred Drake4db5b461998-12-01 19:03:01 +0000255def handle_appendix(doc):
256 # must be called after simplfy() if document is multi-rooted to begin with
257 docelem = doc.documentElement
258 toplevel = docelem.tagName == "manual" and "chapter" or "section"
259 appendices = 0
260 nodes = []
261 for node in docelem.childNodes:
262 if appendices:
263 nodes.append(node)
264 elif node.nodeType == xml.dom.core.ELEMENT:
265 appnodes = node.getElementsByTagName("appendix")
266 if appnodes:
267 appendices = 1
268 parent = appnodes[0].parentNode
269 parent.removeChild(appnodes[0])
270 parent.normalize()
271 if nodes:
272 map(docelem.removeChild, nodes)
273 docelem.appendChild(doc.createTextNode("\n\n\n"))
274 back = doc.createElement("back-matter")
275 docelem.appendChild(back)
276 back.appendChild(doc.createTextNode("\n"))
277 while nodes and nodes[0].nodeType == xml.dom.core.TEXT \
278 and not string.strip(nodes[0].data):
279 del nodes[0]
280 map(back.appendChild, nodes)
281 docelem.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000282
283
284def handle_labels(doc):
Fred Drake7dab6af1999-01-28 23:59:58 +0000285 for label in find_all_elements(doc, "label"):
286 id = label.getAttribute("id")
287 if not id:
288 continue
289 parent = label.parentNode
290 if parent.tagName == "title":
291 parent.parentNode.setAttribute("id", id)
292 else:
293 parent.setAttribute("id", id)
294 # now, remove <label id="..."/> from parent:
295 parent.removeChild(label)
Fred Drake03204731998-11-23 17:02:03 +0000296
297
Fred Drake1ff6db41998-11-23 23:10:35 +0000298def fixup_trailing_whitespace(doc, wsmap):
299 queue = [doc]
300 while queue:
301 node = queue[0]
302 del queue[0]
303 if node.nodeType == xml.dom.core.ELEMENT \
304 and wsmap.has_key(node.tagName):
305 ws = wsmap[node.tagName]
306 children = node.childNodes
307 children.reverse()
308 if children[0].nodeType == xml.dom.core.TEXT:
309 data = string.rstrip(children[0].data) + ws
310 children[0].data = data
311 children.reverse()
312 # hack to get the title in place:
313 if node.tagName == "title" \
314 and node.parentNode.firstChild.nodeType == xml.dom.core.ELEMENT:
315 node.parentNode.insertBefore(doc.createText("\n "),
316 node.parentNode.firstChild)
317 for child in node.childNodes:
318 if child.nodeType == xml.dom.core.ELEMENT:
319 queue.append(child)
320
321
322def normalize(doc):
323 for node in doc.childNodes:
324 if node.nodeType == xml.dom.core.ELEMENT:
325 node.normalize()
326
327
328def cleanup_trailing_parens(doc, element_names):
329 d = {}
330 for gi in element_names:
331 d[gi] = gi
332 rewrite_element = d.has_key
333 queue = []
334 for node in doc.childNodes:
335 if node.nodeType == xml.dom.core.ELEMENT:
336 queue.append(node)
337 while queue:
338 node = queue[0]
339 del queue[0]
340 if rewrite_element(node.tagName):
341 children = node.childNodes
342 if len(children) == 1 \
343 and children[0].nodeType == xml.dom.core.TEXT:
344 data = children[0].data
345 if data[-2:] == "()":
346 children[0].data = data[:-2]
347 else:
348 for child in node.childNodes:
349 if child.nodeType == xml.dom.core.ELEMENT:
350 queue.append(child)
351
352
Fred Drakeaaed9711998-12-10 20:25:30 +0000353def contents_match(left, right):
354 left_children = left.childNodes
355 right_children = right.childNodes
356 if len(left_children) != len(right_children):
357 return 0
358 for l, r in map(None, left_children, right_children):
359 nodeType = l.nodeType
360 if nodeType != r.nodeType:
361 return 0
362 if nodeType == xml.dom.core.ELEMENT:
363 if l.tagName != r.tagName:
364 return 0
365 # should check attributes, but that's not a problem here
366 if not contents_match(l, r):
367 return 0
368 elif nodeType == xml.dom.core.TEXT:
369 if l.data != r.data:
370 return 0
371 else:
372 # not quite right, but good enough
373 return 0
374 return 1
375
376
377def create_module_info(doc, section):
378 # Heavy.
379 node = extract_first_element(section, "modulesynopsis")
380 if node is None:
381 return
382 node._node.name = "synopsis"
383 lastchild = node.childNodes[-1]
384 if lastchild.nodeType == xml.dom.core.TEXT \
385 and lastchild.data[-1:] == ".":
386 lastchild.data = lastchild.data[:-1]
Fred Drake4259f0d1999-01-19 23:09:31 +0000387 modauthor = extract_first_element(section, "moduleauthor")
388 if modauthor:
389 modauthor._node.name = "author"
390 modauthor.appendChild(doc.createTextNode(
391 modauthor.getAttribute("name")))
392 modauthor.removeAttribute("name")
Fred Drakeaaed9711998-12-10 20:25:30 +0000393 if section.tagName == "section":
394 modinfo_pos = 2
395 modinfo = doc.createElement("moduleinfo")
396 moddecl = extract_first_element(section, "declaremodule")
397 name = None
398 if moddecl:
399 modinfo.appendChild(doc.createTextNode("\n "))
400 name = moddecl.attributes["name"].value
401 namenode = doc.createElement("name")
402 namenode.appendChild(doc.createTextNode(name))
403 modinfo.appendChild(namenode)
404 type = moddecl.attributes.get("type")
405 if type:
406 type = type.value
407 modinfo.appendChild(doc.createTextNode("\n "))
408 typenode = doc.createElement("type")
409 typenode.appendChild(doc.createTextNode(type))
410 modinfo.appendChild(typenode)
411 title = get_first_element(section, "title")
412 if title:
413 children = title.childNodes
414 if len(children) >= 2 \
415 and children[0].nodeType == xml.dom.core.ELEMENT \
416 and children[0].tagName == "module" \
417 and children[0].childNodes[0].data == name:
418 # this is it; morph the <title> into <short-synopsis>
419 first_data = children[1]
420 if first_data.data[:4] == " ---":
421 first_data.data = string.lstrip(first_data.data[4:])
422 title._node.name = "short-synopsis"
Fred Drake7dab6af1999-01-28 23:59:58 +0000423 if children[-1].nodeType == xml.dom.core.TEXT \
424 and children[-1].data[-1:] == ".":
Fred Drakeaaed9711998-12-10 20:25:30 +0000425 children[-1].data = children[-1].data[:-1]
426 section.removeChild(title)
427 section.removeChild(section.childNodes[0])
428 title.removeChild(children[0])
429 modinfo_pos = 0
430 else:
431 sys.stderr.write(
432 "module name in title doesn't match"
433 " <declaremodule>; no <short-synopsis>\n")
434 else:
435 sys.stderr.write(
436 "Unexpected condition: <section> without <title>\n")
437 modinfo.appendChild(doc.createTextNode("\n "))
438 modinfo.appendChild(node)
439 if title and not contents_match(title, node):
440 # The short synopsis is actually different,
441 # and needs to be stored:
442 modinfo.appendChild(doc.createTextNode("\n "))
443 modinfo.appendChild(title)
Fred Drake4259f0d1999-01-19 23:09:31 +0000444 if modauthor:
445 modinfo.appendChild(doc.createTextNode("\n "))
446 modinfo.appendChild(modauthor)
Fred Drakeaaed9711998-12-10 20:25:30 +0000447 modinfo.appendChild(doc.createTextNode("\n "))
448 section.insertBefore(modinfo, section.childNodes[modinfo_pos])
449 section.insertBefore(doc.createTextNode("\n "), modinfo)
450
451
Fred Drakefba0ba21998-12-10 05:07:09 +0000452def cleanup_synopses(doc):
Fred Drake7dab6af1999-01-28 23:59:58 +0000453 for node in find_all_elements(doc, "section"):
454 create_module_info(doc, node)
Fred Drakeaaed9711998-12-10 20:25:30 +0000455
456
Fred Drakef8ebb551999-01-14 19:45:38 +0000457def remap_element_names(root, name_map):
458 queue = []
459 for child in root.childNodes:
460 if child.nodeType == xml.dom.core.ELEMENT:
461 queue.append(child)
462 while queue:
463 node = queue.pop()
464 tagName = node.tagName
465 if name_map.has_key(tagName):
466 name, attrs = name_map[tagName]
467 node._node.name = name
468 for attr, value in attrs.items():
469 node.setAttribute(attr, value)
470 for child in node.childNodes:
471 if child.nodeType == xml.dom.core.ELEMENT:
472 queue.append(child)
473
474
475def fixup_table_structures(doc):
476 # must be done after remap_element_names(), or the tables won't be found
Fred Drake7dab6af1999-01-28 23:59:58 +0000477 for table in find_all_elements(doc, "table"):
478 fixup_table(doc, table)
479
Fred Drakef8ebb551999-01-14 19:45:38 +0000480
481def fixup_table(doc, table):
482 # create the table head
483 thead = doc.createElement("thead")
484 row = doc.createElement("row")
485 move_elements_by_name(doc, table, row, "entry")
486 thead.appendChild(doc.createTextNode("\n "))
487 thead.appendChild(row)
488 thead.appendChild(doc.createTextNode("\n "))
489 # create the table body
490 tbody = doc.createElement("tbody")
491 prev_row = None
492 last_was_hline = 0
493 children = table.childNodes
494 for child in children:
495 if child.nodeType == xml.dom.core.ELEMENT:
496 tagName = child.tagName
497 if tagName == "hline" and prev_row is not None:
498 prev_row.setAttribute("rowsep", "1")
499 elif tagName == "row":
500 prev_row = child
501 # save the rows:
502 tbody.appendChild(doc.createTextNode("\n "))
503 move_elements_by_name(doc, table, tbody, "row", sep="\n ")
504 # and toss the rest:
505 while children:
506 child = children[0]
507 nodeType = child.nodeType
508 if nodeType == xml.dom.core.TEXT:
509 if string.strip(child.data):
510 raise ConversionError("unexpected free data in table")
511 table.removeChild(child)
512 continue
513 if nodeType == xml.dom.core.ELEMENT:
514 if child.tagName != "hline":
515 raise ConversionError(
516 "unexpected <%s> in table" % child.tagName)
517 table.removeChild(child)
518 continue
519 raise ConversionError(
520 "unexpected %s node in table" % child.__class__.__name__)
521 # nothing left in the <table>; add the <thead> and <tbody>
522 tgroup = doc.createElement("tgroup")
523 tgroup.appendChild(doc.createTextNode("\n "))
524 tgroup.appendChild(thead)
525 tgroup.appendChild(doc.createTextNode("\n "))
526 tgroup.appendChild(tbody)
527 tgroup.appendChild(doc.createTextNode("\n "))
528 table.appendChild(tgroup)
529 # now make the <entry>s look nice:
530 for row in table.getElementsByTagName("row"):
531 fixup_row(doc, row)
532
533
534def fixup_row(doc, row):
535 entries = []
536 map(entries.append, row.childNodes[1:])
537 for entry in entries:
538 row.insertBefore(doc.createTextNode("\n "), entry)
539# row.appendChild(doc.createTextNode("\n "))
540
541
542def move_elements_by_name(doc, source, dest, name, sep=None):
543 nodes = []
544 for child in source.childNodes:
545 if child.nodeType == xml.dom.core.ELEMENT and child.tagName == name:
546 nodes.append(child)
547 for node in nodes:
548 source.removeChild(node)
549 dest.appendChild(node)
550 if sep:
551 dest.appendChild(doc.createTextNode(sep))
552
553
Fred Drake7dab6af1999-01-28 23:59:58 +0000554RECURSE_INTO_PARA_CONTAINERS = (
Fred Drakecb657811999-01-29 20:55:07 +0000555 "chapter", "abstract", "enumerate",
Fred Drake7dab6af1999-01-28 23:59:58 +0000556 "section", "subsection", "subsubsection",
557 "paragraph", "subparagraph",
Fred Drakecb657811999-01-29 20:55:07 +0000558 "howto", "manual",
Fred Drake4259f0d1999-01-19 23:09:31 +0000559 )
Fred Drakefcc59101999-01-06 22:50:52 +0000560
561PARA_LEVEL_ELEMENTS = (
Fred Drakecb657811999-01-29 20:55:07 +0000562 "moduleinfo", "title", "verbatim", "enumerate", "item",
563 "opcodedesc", "classdesc", "datadesc",
Fred Drake7dab6af1999-01-28 23:59:58 +0000564 "funcdesc", "methoddesc", "excdesc",
565 "funcdescni", "methoddescni", "excdescni",
Fred Drakefcc59101999-01-06 22:50:52 +0000566 "tableii", "tableiii", "tableiv", "localmoduletable",
Fred Drake7dab6af1999-01-28 23:59:58 +0000567 "sectionauthor", "seealso",
Fred Drakefcc59101999-01-06 22:50:52 +0000568 # include <para>, so we can just do it again to get subsequent paras:
569 "para",
570 )
571
572PARA_LEVEL_PRECEEDERS = (
Fred Drakecb657811999-01-29 20:55:07 +0000573 "index", "indexii", "indexiii", "indexiv", "setindexsubitem",
574 "stindex", "obindex", "COMMENT", "label", "input", "title",
Fred Drakefcc59101999-01-06 22:50:52 +0000575 )
576
Fred Drake7dab6af1999-01-28 23:59:58 +0000577
Fred Drakeaaed9711998-12-10 20:25:30 +0000578def fixup_paras(doc):
Fred Drakefcc59101999-01-06 22:50:52 +0000579 for child in doc.childNodes:
580 if child.nodeType == xml.dom.core.ELEMENT \
Fred Drake7dab6af1999-01-28 23:59:58 +0000581 and child.tagName in RECURSE_INTO_PARA_CONTAINERS:
582 #
Fred Drakefcc59101999-01-06 22:50:52 +0000583 fixup_paras_helper(doc, child)
Fred Drakecb657811999-01-29 20:55:07 +0000584 descriptions = find_all_elements(doc, "description")
585 for description in descriptions:
586 fixup_paras_helper(doc, description)
Fred Drakefcc59101999-01-06 22:50:52 +0000587
588
Fred Drake7dab6af1999-01-28 23:59:58 +0000589def fixup_paras_helper(doc, container, depth=0):
Fred Drakefcc59101999-01-06 22:50:52 +0000590 # document is already normalized
591 children = container.childNodes
592 start = 0
Fred Drake7dab6af1999-01-28 23:59:58 +0000593 while len(children) > start:
594 start = skip_leading_nodes(children, start)
595 if start >= len(children):
596 break
597 #
598 # Either paragraph material or something to recurse into:
599 #
600 if (children[start].nodeType == xml.dom.core.ELEMENT) \
601 and (children[start].tagName in RECURSE_INTO_PARA_CONTAINERS):
602 fixup_paras_helper(doc, children[start])
603 start = skip_leading_nodes(children, start + 1)
604 continue
605 #
606 # paragraph material:
607 #
608 build_para(doc, container, start, len(children))
609 if DEBUG_PARA_FIXER and depth == 10:
610 sys.exit(1)
611 start = start + 1
Fred Drakefcc59101999-01-06 22:50:52 +0000612
613
614def build_para(doc, parent, start, i):
615 children = parent.childNodes
Fred Drakefcc59101999-01-06 22:50:52 +0000616 after = start + 1
617 have_last = 0
Fred Drakecb657811999-01-29 20:55:07 +0000618 BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + RECURSE_INTO_PARA_CONTAINERS
Fred Drake7dab6af1999-01-28 23:59:58 +0000619 # Collect all children until \n\n+ is found in a text node or a
620 # member of BREAK_ELEMENTS is found.
Fred Drakefcc59101999-01-06 22:50:52 +0000621 for j in range(start, i):
622 after = j + 1
623 child = children[j]
624 nodeType = child.nodeType
625 if nodeType == xml.dom.core.ELEMENT:
626 if child.tagName in BREAK_ELEMENTS:
627 after = j
628 break
629 elif nodeType == xml.dom.core.TEXT:
630 pos = string.find(child.data, "\n\n")
631 if pos == 0:
632 after = j
633 break
634 if pos >= 1:
635 child.splitText(pos)
636 break
637 else:
638 have_last = 1
Fred Drake7dab6af1999-01-28 23:59:58 +0000639 if (start + 1) > after:
640 raise ConversionError(
641 "build_para() could not identify content to turn into a paragraph")
Fred Drakefcc59101999-01-06 22:50:52 +0000642 if children[after - 1].nodeType == xml.dom.core.TEXT:
643 # we may need to split off trailing white space:
644 child = children[after - 1]
645 data = child.data
646 if string.rstrip(data) != data:
647 have_last = 0
648 child.splitText(len(string.rstrip(data)))
Fred Drakefcc59101999-01-06 22:50:52 +0000649 para = doc.createElement("para")
650 prev = None
651 indexes = range(start, after)
652 indexes.reverse()
653 for j in indexes:
Fred Drake7dab6af1999-01-28 23:59:58 +0000654 node = parent.childNodes[j]
Fred Drakefcc59101999-01-06 22:50:52 +0000655 parent.removeChild(node)
656 para.insertBefore(node, prev)
657 prev = node
658 if have_last:
659 parent.appendChild(para)
Fred Drake7dab6af1999-01-28 23:59:58 +0000660 return len(parent.childNodes)
Fred Drakefcc59101999-01-06 22:50:52 +0000661 else:
662 parent.insertBefore(para, parent.childNodes[start])
Fred Drake7dab6af1999-01-28 23:59:58 +0000663 return start + 1
Fred Drakefcc59101999-01-06 22:50:52 +0000664
665
Fred Drake7dab6af1999-01-28 23:59:58 +0000666def skip_leading_nodes(children, start):
667 """Return index into children of a node at which paragraph building should
668 begin or a recursive call to fixup_paras_helper() should be made (for
669 subsections, etc.).
670
671 When the return value >= len(children), we've built all the paras we can
672 from this list of children.
673 """
674 i = len(children)
Fred Drakefcc59101999-01-06 22:50:52 +0000675 while i > start:
676 # skip over leading comments and whitespace:
Fred Drake7dab6af1999-01-28 23:59:58 +0000677 child = children[start]
Fred Drakefcc59101999-01-06 22:50:52 +0000678 nodeType = child.nodeType
Fred Drake7dab6af1999-01-28 23:59:58 +0000679 if nodeType == xml.dom.core.TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000680 data = child.data
681 shortened = string.lstrip(data)
682 if shortened:
683 if data != shortened:
684 # break into two nodes: whitespace and non-whitespace
685 child.splitText(len(data) - len(shortened))
Fred Drake7dab6af1999-01-28 23:59:58 +0000686 return start + 1
687 return start
Fred Drakefcc59101999-01-06 22:50:52 +0000688 # all whitespace, just skip
Fred Drakefcc59101999-01-06 22:50:52 +0000689 elif nodeType == xml.dom.core.ELEMENT:
Fred Drake7dab6af1999-01-28 23:59:58 +0000690 tagName = child.tagName
691 if tagName in RECURSE_INTO_PARA_CONTAINERS:
692 return start
693 if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
694 return start
695 start = start + 1
696 return start
Fred Drakefba0ba21998-12-10 05:07:09 +0000697
698
Fred Draked24167b1999-01-14 21:18:03 +0000699def fixup_rfc_references(doc):
Fred Drake7dab6af1999-01-28 23:59:58 +0000700 for rfcnode in find_all_elements(doc, "rfc"):
701 rfcnode.appendChild(doc.createTextNode(
702 "RFC " + rfcnode.getAttribute("num")))
Fred Draked24167b1999-01-14 21:18:03 +0000703
704
705def fixup_signatures(doc):
706 for child in doc.childNodes:
707 if child.nodeType == xml.dom.core.ELEMENT:
708 args = child.getElementsByTagName("args")
709 for arg in args:
710 fixup_args(doc, arg)
Fred Drake7dab6af1999-01-28 23:59:58 +0000711 arg.normalize()
Fred Draked24167b1999-01-14 21:18:03 +0000712 args = child.getElementsByTagName("constructor-args")
713 for arg in args:
714 fixup_args(doc, arg)
715 arg.normalize()
716
717
718def fixup_args(doc, arglist):
719 for child in arglist.childNodes:
720 if child.nodeType == xml.dom.core.ELEMENT \
721 and child.tagName == "optional":
722 # found it; fix and return
723 arglist.insertBefore(doc.createTextNode("["), child)
724 optkids = child.childNodes
725 while optkids:
726 k = optkids[0]
727 child.removeChild(k)
728 arglist.insertBefore(k, child)
729 arglist.insertBefore(doc.createTextNode("]"), child)
730 arglist.removeChild(child)
731 return fixup_args(doc, arglist)
732
733
Fred Drake7dab6af1999-01-28 23:59:58 +0000734def fixup_sectionauthors(doc):
735 for sectauth in find_all_elements(doc, "sectionauthor"):
736 section = sectauth.parentNode
737 section.removeChild(sectauth)
738 sectauth._node.name = "author"
739 sectauth.appendChild(doc.createTextNode(
740 sectauth.getAttribute("name")))
741 sectauth.removeAttribute("name")
742 after = section.childNodes[2]
743 title = section.childNodes[1]
744 if title.nodeType == xml.dom.core.ELEMENT and title.tagName != "title":
745 after = section.childNodes[0]
746 section.insertBefore(doc.createTextNode("\n "), after)
747 section.insertBefore(sectauth, after)
748
749
Fred Drake4db5b461998-12-01 19:03:01 +0000750_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
Fred Drakefcc59101999-01-06 22:50:52 +0000751
Fred Drake4db5b461998-12-01 19:03:01 +0000752def write_esis(doc, ofp, knownempty):
753 for node in doc.childNodes:
754 nodeType = node.nodeType
755 if nodeType == xml.dom.core.ELEMENT:
756 gi = node.tagName
757 if knownempty(gi):
758 if node.hasChildNodes():
759 raise ValueError, "declared-empty node has children"
760 ofp.write("e\n")
761 for k, v in node.attributes.items():
762 value = v.value
763 if _token_rx.match(value):
764 dtype = "TOKEN"
765 else:
766 dtype = "CDATA"
767 ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
768 ofp.write("(%s\n" % gi)
769 write_esis(node, ofp, knownempty)
770 ofp.write(")%s\n" % gi)
771 elif nodeType == xml.dom.core.TEXT:
772 ofp.write("-%s\n" % esistools.encode(node.data))
773 else:
774 raise RuntimeError, "unsupported node type: %s" % nodeType
775
776
Fred Drake03204731998-11-23 17:02:03 +0000777def convert(ifp, ofp):
Fred Drake4db5b461998-12-01 19:03:01 +0000778 p = esistools.ExtendedEsisBuilder()
Fred Drake03204731998-11-23 17:02:03 +0000779 p.feed(ifp.read())
780 doc = p.document
Fred Drake1ff6db41998-11-23 23:10:35 +0000781 normalize(doc)
Fred Drake03204731998-11-23 17:02:03 +0000782 simplify(doc)
783 handle_labels(doc)
Fred Drake4db5b461998-12-01 19:03:01 +0000784 handle_appendix(doc)
Fred Drake1ff6db41998-11-23 23:10:35 +0000785 fixup_trailing_whitespace(doc, {
786 "abstract": "\n",
787 "title": "",
788 "chapter": "\n\n",
789 "section": "\n\n",
790 "subsection": "\n\n",
791 "subsubsection": "\n\n",
792 "paragraph": "\n\n",
793 "subparagraph": "\n\n",
794 })
Fred Drake03204731998-11-23 17:02:03 +0000795 cleanup_root_text(doc)
Fred Drake1ff6db41998-11-23 23:10:35 +0000796 cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
Fred Drakefba0ba21998-12-10 05:07:09 +0000797 cleanup_synopses(doc)
Fred Drakecb657811999-01-29 20:55:07 +0000798 fixup_descriptors(doc)
Fred Drakeaaed9711998-12-10 20:25:30 +0000799 normalize(doc)
800 fixup_paras(doc)
Fred Drake7dab6af1999-01-28 23:59:58 +0000801 fixup_sectionauthors(doc)
Fred Drakef8ebb551999-01-14 19:45:38 +0000802 remap_element_names(doc, {
803 "tableii": ("table", {"cols": "2"}),
804 "tableiii": ("table", {"cols": "3"}),
805 "tableiv": ("table", {"cols": "4"}),
806 "lineii": ("row", {}),
807 "lineiii": ("row", {}),
808 "lineiv": ("row", {}),
Fred Draked6ced7d1999-01-19 17:11:23 +0000809 "refmodule": ("module", {"link": "link"}),
Fred Drakef8ebb551999-01-14 19:45:38 +0000810 })
811 fixup_table_structures(doc)
Fred Draked24167b1999-01-14 21:18:03 +0000812 fixup_rfc_references(doc)
813 fixup_signatures(doc)
Fred Drake4db5b461998-12-01 19:03:01 +0000814 #
815 d = {}
816 for gi in p.get_empties():
817 d[gi] = gi
Fred Draked24167b1999-01-14 21:18:03 +0000818 if d.has_key("rfc"):
819 del d["rfc"]
Fred Drake4db5b461998-12-01 19:03:01 +0000820 knownempty = d.has_key
821 #
Fred Drake03204731998-11-23 17:02:03 +0000822 try:
Fred Drake4db5b461998-12-01 19:03:01 +0000823 write_esis(doc, ofp, knownempty)
Fred Drake03204731998-11-23 17:02:03 +0000824 except IOError, (err, msg):
825 # Ignore EPIPE; it just means that whoever we're writing to stopped
826 # reading. The rest of the output would be ignored. All other errors
827 # should still be reported,
828 if err != errno.EPIPE:
829 raise
830
831
832def main():
833 if len(sys.argv) == 1:
834 ifp = sys.stdin
835 ofp = sys.stdout
836 elif len(sys.argv) == 2:
837 ifp = open(sys.argv[1])
838 ofp = sys.stdout
839 elif len(sys.argv) == 3:
840 ifp = open(sys.argv[1])
841 ofp = open(sys.argv[2], "w")
842 else:
843 usage()
844 sys.exit(2)
845 convert(ifp, ofp)
846
847
848if __name__ == "__main__":
849 main()