blob: 0b73126551600d08ca8bd837529a2c519524d330 [file] [log] [blame]
Fred Drake03204731998-11-23 17:02:03 +00001#! /usr/bin/env python
2
Fred Drake7dab6af1999-01-28 23:59:58 +00003"""Perform massive transformations on a document tree created from the LaTeX
4of the Python documentation, and dump the ESIS data for the transformed tree.
Fred Drake03204731998-11-23 17:02:03 +00005"""
6__version__ = '$Revision$'
7
8
9import errno
Fred Drake4db5b461998-12-01 19:03:01 +000010import esistools
11import re
Fred Drake03204731998-11-23 17:02:03 +000012import string
13import sys
14import xml.dom.core
15import xml.dom.esis_builder
16
17
Fred Drakef8ebb551999-01-14 19:45:38 +000018class ConversionError(Exception):
19 pass
20
21
Fred Drakefcc59101999-01-06 22:50:52 +000022DEBUG_PARA_FIXER = 0
23
Fred Drake7dab6af1999-01-28 23:59:58 +000024if DEBUG_PARA_FIXER:
25 def para_msg(s):
26 sys.stderr.write("*** %s\n" % s)
27else:
28 def para_msg(s):
29 pass
30
Fred Drakefcc59101999-01-06 22:50:52 +000031
Fred Drake03204731998-11-23 17:02:03 +000032# Workaround to deal with invalid documents (multiple root elements). This
33# does not indicate a bug in the DOM implementation.
34#
35def get_documentElement(self):
36 docelem = None
37 for n in self._node.children:
38 if n.type == xml.dom.core.ELEMENT:
39 docelem = xml.dom.core.Element(n, self, self)
40 return docelem
41
42xml.dom.core.Document.get_documentElement = get_documentElement
43
44
45# Replace get_childNodes for the Document class; without this, children
46# accessed from the Document object via .childNodes (no matter how many
47# levels of access are used) will be given an ownerDocument of None.
48#
49def get_childNodes(self):
50 return xml.dom.core.NodeList(self._node.children, self, self)
51
52xml.dom.core.Document.get_childNodes = get_childNodes
53
54
55def get_first_element(doc, gi):
56 for n in doc.childNodes:
57 if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi:
58 return n
59
60def extract_first_element(doc, gi):
61 node = get_first_element(doc, gi)
62 if node is not None:
63 doc.removeChild(node)
64 return node
65
66
Fred Drake7dab6af1999-01-28 23:59:58 +000067def find_all_elements(doc, gi):
68 nodes = []
69 if doc.nodeType == xml.dom.core.ELEMENT and doc.tagName == gi:
70 nodes.append(doc)
71 for child in doc.childNodes:
72 if child.nodeType == xml.dom.core.ELEMENT:
73 if child.tagName == gi:
74 nodes.append(child)
75 for node in child.getElementsByTagName(gi):
76 nodes.append(node)
77 return nodes
78
79
Fred Drake03204731998-11-23 17:02:03 +000080def simplify(doc):
81 # Try to rationalize the document a bit, since these things are simply
82 # not valid SGML/XML documents as they stand, and need a little work.
83 documentclass = "document"
84 inputs = []
85 node = extract_first_element(doc, "documentclass")
86 if node is not None:
87 documentclass = node.getAttribute("classname")
88 node = extract_first_element(doc, "title")
89 if node is not None:
90 inputs.append(node)
91 # update the name of the root element
92 node = get_first_element(doc, "document")
93 if node is not None:
94 node._node.name = documentclass
95 while 1:
96 node = extract_first_element(doc, "input")
97 if node is None:
98 break
99 inputs.append(node)
100 if inputs:
101 docelem = doc.documentElement
102 inputs.reverse()
103 for node in inputs:
104 text = doc.createTextNode("\n")
105 docelem.insertBefore(text, docelem.firstChild)
106 docelem.insertBefore(node, text)
107 docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
108 while doc.firstChild.nodeType == xml.dom.core.TEXT:
109 doc.removeChild(doc.firstChild)
110
111
112def cleanup_root_text(doc):
113 discards = []
114 skip = 0
115 for n in doc.childNodes:
116 prevskip = skip
117 skip = 0
118 if n.nodeType == xml.dom.core.TEXT and not prevskip:
119 discards.append(n)
Fred Drake4db5b461998-12-01 19:03:01 +0000120 elif n.nodeType == xml.dom.core.ELEMENT and n.tagName == "COMMENT":
Fred Drake03204731998-11-23 17:02:03 +0000121 skip = 1
122 for node in discards:
123 doc.removeChild(node)
124
125
Fred Drakecb657811999-01-29 20:55:07 +0000126DESCRIPTOR_ELEMENTS = (
127 "cfuncdesc", "cvardesc", "ctypedesc",
128 "classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni",
129 "excdesc", "funcdesc", "funcdescni", "opcodedesc",
130 "datadesc", "datadescni",
131 )
132
133def fixup_descriptors(doc):
134 for tagName in DESCRIPTOR_ELEMENTS:
135 nodes = find_all_elements(doc, tagName)
136 for node in nodes:
137 rewrite_descriptor(doc, node)
138
139def rewrite_descriptor(doc, descriptor):
140 #
141 # Do these things:
142 # 1. Add an "index=noindex" attribute to the element if the tagName
143 # ends in 'ni', removing the 'ni' from the name.
144 # 2. Create a <signature> from the name attribute and <args>.
145 # 3. Create additional <signature>s from <*line{,ni}> elements,
146 # if found.
147 # 4. Move remaining child nodes to a <description> element.
148 # 5. Put it back together.
149 #
150 descname = descriptor.tagName
151 index = 1
152 if descname[-2:] == "ni":
153 descname = descname[:-2]
154 descriptor.setAttribute("index", "noindex")
155 descriptor._node.name = descname
156 index = 0
157 desctype = descname[:-4] # remove 'desc'
158 linename = desctype + "line"
159 if not index:
160 linename = linename + "ni"
161 # 2.
162 signature = doc.createElement("signature")
163 name = doc.createElement("name")
164 signature.appendChild(doc.createTextNode("\n "))
165 signature.appendChild(name)
166 name.appendChild(doc.createTextNode(descriptor.getAttribute("name")))
167 descriptor.removeAttribute("name")
168 if descriptor.attributes.has_key("var"):
169 variable = descriptor.getAttribute("var")
170 if variable:
171 args = doc.createElement("args")
172 args.appendChild(doc.createTextNode(variable))
Fred Drake7dab6af1999-01-28 23:59:58 +0000173 signature.appendChild(doc.createTextNode("\n "))
Fred Drakecb657811999-01-29 20:55:07 +0000174 signature.appendChild(args)
175 descriptor.removeAttribute("var")
176 newchildren = [signature]
177 children = descriptor.childNodes
178 pos = skip_leading_nodes(children, 0)
179 if pos < len(children):
180 child = children[pos]
181 if child.nodeType == xml.dom.core.ELEMENT and child.tagName == "args":
182 # create an <args> in <signature>:
183 args = doc.createElement("args")
184 argchildren = []
185 map(argchildren.append, child.childNodes)
186 for n in argchildren:
187 child.removeChild(n)
188 args.appendChild(n)
189 signature.appendChild(doc.createTextNode("\n "))
190 signature.appendChild(args)
191 signature.appendChild(doc.createTextNode("\n "))
192 # 3.
193 pos = skip_leading_nodes(children, pos + 1)
194 while pos < len(children) \
195 and children[pos].nodeType == xml.dom.core.ELEMENT \
196 and children[pos].tagName == linename:
197 # this is really a supplemental signature, create <signature>
198 sig = methodline_to_signature(doc, children[pos])
199 newchildren.append(sig)
200 pos = skip_leading_nodes(children, pos + 1)
201 # 4.
202 description = doc.createElement("description")
203 description.appendChild(doc.createTextNode("\n"))
204 newchildren.append(description)
205 move_children(descriptor, description, pos)
206 last = description.childNodes[-1]
207 if last.nodeType == xml.dom.core.TEXT:
208 last.data = string.rstrip(last.data) + "\n "
209 # 5.
210 # should have nothing but whitespace and signature lines in <descriptor>;
211 # discard them
212 while descriptor.childNodes:
213 descriptor.removeChild(descriptor.childNodes[0])
214 for node in newchildren:
215 descriptor.appendChild(doc.createTextNode("\n "))
216 descriptor.appendChild(node)
217 descriptor.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000218
Fred Drake7dab6af1999-01-28 23:59:58 +0000219
220def methodline_to_signature(doc, methodline):
221 signature = doc.createElement("signature")
222 signature.appendChild(doc.createTextNode("\n "))
223 name = doc.createElement("name")
224 name.appendChild(doc.createTextNode(methodline.getAttribute("name")))
Fred Drakecb657811999-01-29 20:55:07 +0000225 methodline.removeAttribute("name")
Fred Drake7dab6af1999-01-28 23:59:58 +0000226 signature.appendChild(name)
Fred Drake7dab6af1999-01-28 23:59:58 +0000227 if len(methodline.childNodes):
Fred Drakecb657811999-01-29 20:55:07 +0000228 args = doc.createElement("args")
Fred Drake7dab6af1999-01-28 23:59:58 +0000229 signature.appendChild(doc.createTextNode("\n "))
Fred Drakecb657811999-01-29 20:55:07 +0000230 signature.appendChild(args)
231 move_children(methodline, args)
Fred Drake7dab6af1999-01-28 23:59:58 +0000232 signature.appendChild(doc.createTextNode("\n "))
233 return signature
Fred Drake03204731998-11-23 17:02:03 +0000234
235
Fred Drakecb657811999-01-29 20:55:07 +0000236def move_children(origin, dest, start=0):
237 children = origin.childNodes
238 while start < len(children):
239 node = children[start]
240 origin.removeChild(node)
241 dest.appendChild(node)
242
243
Fred Drake4db5b461998-12-01 19:03:01 +0000244def handle_appendix(doc):
245 # must be called after simplfy() if document is multi-rooted to begin with
246 docelem = doc.documentElement
247 toplevel = docelem.tagName == "manual" and "chapter" or "section"
248 appendices = 0
249 nodes = []
250 for node in docelem.childNodes:
251 if appendices:
252 nodes.append(node)
253 elif node.nodeType == xml.dom.core.ELEMENT:
254 appnodes = node.getElementsByTagName("appendix")
255 if appnodes:
256 appendices = 1
257 parent = appnodes[0].parentNode
258 parent.removeChild(appnodes[0])
259 parent.normalize()
260 if nodes:
261 map(docelem.removeChild, nodes)
262 docelem.appendChild(doc.createTextNode("\n\n\n"))
263 back = doc.createElement("back-matter")
264 docelem.appendChild(back)
265 back.appendChild(doc.createTextNode("\n"))
266 while nodes and nodes[0].nodeType == xml.dom.core.TEXT \
267 and not string.strip(nodes[0].data):
268 del nodes[0]
269 map(back.appendChild, nodes)
270 docelem.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000271
272
273def handle_labels(doc):
Fred Drake7dab6af1999-01-28 23:59:58 +0000274 for label in find_all_elements(doc, "label"):
275 id = label.getAttribute("id")
276 if not id:
277 continue
278 parent = label.parentNode
279 if parent.tagName == "title":
280 parent.parentNode.setAttribute("id", id)
281 else:
282 parent.setAttribute("id", id)
283 # now, remove <label id="..."/> from parent:
284 parent.removeChild(label)
Fred Drake03204731998-11-23 17:02:03 +0000285
286
Fred Drake1ff6db41998-11-23 23:10:35 +0000287def fixup_trailing_whitespace(doc, wsmap):
288 queue = [doc]
289 while queue:
290 node = queue[0]
291 del queue[0]
292 if node.nodeType == xml.dom.core.ELEMENT \
293 and wsmap.has_key(node.tagName):
294 ws = wsmap[node.tagName]
295 children = node.childNodes
296 children.reverse()
297 if children[0].nodeType == xml.dom.core.TEXT:
298 data = string.rstrip(children[0].data) + ws
299 children[0].data = data
300 children.reverse()
301 # hack to get the title in place:
302 if node.tagName == "title" \
303 and node.parentNode.firstChild.nodeType == xml.dom.core.ELEMENT:
304 node.parentNode.insertBefore(doc.createText("\n "),
305 node.parentNode.firstChild)
306 for child in node.childNodes:
307 if child.nodeType == xml.dom.core.ELEMENT:
308 queue.append(child)
309
310
311def normalize(doc):
312 for node in doc.childNodes:
313 if node.nodeType == xml.dom.core.ELEMENT:
314 node.normalize()
315
316
317def cleanup_trailing_parens(doc, element_names):
318 d = {}
319 for gi in element_names:
320 d[gi] = gi
321 rewrite_element = d.has_key
322 queue = []
323 for node in doc.childNodes:
324 if node.nodeType == xml.dom.core.ELEMENT:
325 queue.append(node)
326 while queue:
327 node = queue[0]
328 del queue[0]
329 if rewrite_element(node.tagName):
330 children = node.childNodes
331 if len(children) == 1 \
332 and children[0].nodeType == xml.dom.core.TEXT:
333 data = children[0].data
334 if data[-2:] == "()":
335 children[0].data = data[:-2]
336 else:
337 for child in node.childNodes:
338 if child.nodeType == xml.dom.core.ELEMENT:
339 queue.append(child)
340
341
Fred Drakeaaed9711998-12-10 20:25:30 +0000342def contents_match(left, right):
343 left_children = left.childNodes
344 right_children = right.childNodes
345 if len(left_children) != len(right_children):
346 return 0
347 for l, r in map(None, left_children, right_children):
348 nodeType = l.nodeType
349 if nodeType != r.nodeType:
350 return 0
351 if nodeType == xml.dom.core.ELEMENT:
352 if l.tagName != r.tagName:
353 return 0
354 # should check attributes, but that's not a problem here
355 if not contents_match(l, r):
356 return 0
357 elif nodeType == xml.dom.core.TEXT:
358 if l.data != r.data:
359 return 0
360 else:
361 # not quite right, but good enough
362 return 0
363 return 1
364
365
366def create_module_info(doc, section):
367 # Heavy.
368 node = extract_first_element(section, "modulesynopsis")
369 if node is None:
370 return
371 node._node.name = "synopsis"
372 lastchild = node.childNodes[-1]
373 if lastchild.nodeType == xml.dom.core.TEXT \
374 and lastchild.data[-1:] == ".":
375 lastchild.data = lastchild.data[:-1]
Fred Drake4259f0d1999-01-19 23:09:31 +0000376 modauthor = extract_first_element(section, "moduleauthor")
377 if modauthor:
378 modauthor._node.name = "author"
379 modauthor.appendChild(doc.createTextNode(
380 modauthor.getAttribute("name")))
381 modauthor.removeAttribute("name")
Fred Drakeaaed9711998-12-10 20:25:30 +0000382 if section.tagName == "section":
383 modinfo_pos = 2
384 modinfo = doc.createElement("moduleinfo")
385 moddecl = extract_first_element(section, "declaremodule")
386 name = None
387 if moddecl:
388 modinfo.appendChild(doc.createTextNode("\n "))
389 name = moddecl.attributes["name"].value
390 namenode = doc.createElement("name")
391 namenode.appendChild(doc.createTextNode(name))
392 modinfo.appendChild(namenode)
393 type = moddecl.attributes.get("type")
394 if type:
395 type = type.value
396 modinfo.appendChild(doc.createTextNode("\n "))
397 typenode = doc.createElement("type")
398 typenode.appendChild(doc.createTextNode(type))
399 modinfo.appendChild(typenode)
400 title = get_first_element(section, "title")
401 if title:
402 children = title.childNodes
403 if len(children) >= 2 \
404 and children[0].nodeType == xml.dom.core.ELEMENT \
405 and children[0].tagName == "module" \
406 and children[0].childNodes[0].data == name:
407 # this is it; morph the <title> into <short-synopsis>
408 first_data = children[1]
409 if first_data.data[:4] == " ---":
410 first_data.data = string.lstrip(first_data.data[4:])
411 title._node.name = "short-synopsis"
Fred Drake7dab6af1999-01-28 23:59:58 +0000412 if children[-1].nodeType == xml.dom.core.TEXT \
413 and children[-1].data[-1:] == ".":
Fred Drakeaaed9711998-12-10 20:25:30 +0000414 children[-1].data = children[-1].data[:-1]
415 section.removeChild(title)
416 section.removeChild(section.childNodes[0])
417 title.removeChild(children[0])
418 modinfo_pos = 0
419 else:
420 sys.stderr.write(
421 "module name in title doesn't match"
422 " <declaremodule>; no <short-synopsis>\n")
423 else:
424 sys.stderr.write(
425 "Unexpected condition: <section> without <title>\n")
426 modinfo.appendChild(doc.createTextNode("\n "))
427 modinfo.appendChild(node)
428 if title and not contents_match(title, node):
429 # The short synopsis is actually different,
430 # and needs to be stored:
431 modinfo.appendChild(doc.createTextNode("\n "))
432 modinfo.appendChild(title)
Fred Drake4259f0d1999-01-19 23:09:31 +0000433 if modauthor:
434 modinfo.appendChild(doc.createTextNode("\n "))
435 modinfo.appendChild(modauthor)
Fred Drakeaaed9711998-12-10 20:25:30 +0000436 modinfo.appendChild(doc.createTextNode("\n "))
437 section.insertBefore(modinfo, section.childNodes[modinfo_pos])
438 section.insertBefore(doc.createTextNode("\n "), modinfo)
439
440
Fred Drakefba0ba21998-12-10 05:07:09 +0000441def cleanup_synopses(doc):
Fred Drake7dab6af1999-01-28 23:59:58 +0000442 for node in find_all_elements(doc, "section"):
443 create_module_info(doc, node)
Fred Drakeaaed9711998-12-10 20:25:30 +0000444
445
Fred Drakef8ebb551999-01-14 19:45:38 +0000446def remap_element_names(root, name_map):
447 queue = []
448 for child in root.childNodes:
449 if child.nodeType == xml.dom.core.ELEMENT:
450 queue.append(child)
451 while queue:
452 node = queue.pop()
453 tagName = node.tagName
454 if name_map.has_key(tagName):
455 name, attrs = name_map[tagName]
456 node._node.name = name
457 for attr, value in attrs.items():
458 node.setAttribute(attr, value)
459 for child in node.childNodes:
460 if child.nodeType == xml.dom.core.ELEMENT:
461 queue.append(child)
462
463
464def fixup_table_structures(doc):
465 # must be done after remap_element_names(), or the tables won't be found
Fred Drake7dab6af1999-01-28 23:59:58 +0000466 for table in find_all_elements(doc, "table"):
467 fixup_table(doc, table)
468
Fred Drakef8ebb551999-01-14 19:45:38 +0000469
470def fixup_table(doc, table):
471 # create the table head
472 thead = doc.createElement("thead")
473 row = doc.createElement("row")
474 move_elements_by_name(doc, table, row, "entry")
475 thead.appendChild(doc.createTextNode("\n "))
476 thead.appendChild(row)
477 thead.appendChild(doc.createTextNode("\n "))
478 # create the table body
479 tbody = doc.createElement("tbody")
480 prev_row = None
481 last_was_hline = 0
482 children = table.childNodes
483 for child in children:
484 if child.nodeType == xml.dom.core.ELEMENT:
485 tagName = child.tagName
486 if tagName == "hline" and prev_row is not None:
487 prev_row.setAttribute("rowsep", "1")
488 elif tagName == "row":
489 prev_row = child
490 # save the rows:
491 tbody.appendChild(doc.createTextNode("\n "))
492 move_elements_by_name(doc, table, tbody, "row", sep="\n ")
493 # and toss the rest:
494 while children:
495 child = children[0]
496 nodeType = child.nodeType
497 if nodeType == xml.dom.core.TEXT:
498 if string.strip(child.data):
499 raise ConversionError("unexpected free data in table")
500 table.removeChild(child)
501 continue
502 if nodeType == xml.dom.core.ELEMENT:
503 if child.tagName != "hline":
504 raise ConversionError(
505 "unexpected <%s> in table" % child.tagName)
506 table.removeChild(child)
507 continue
508 raise ConversionError(
509 "unexpected %s node in table" % child.__class__.__name__)
510 # nothing left in the <table>; add the <thead> and <tbody>
511 tgroup = doc.createElement("tgroup")
512 tgroup.appendChild(doc.createTextNode("\n "))
513 tgroup.appendChild(thead)
514 tgroup.appendChild(doc.createTextNode("\n "))
515 tgroup.appendChild(tbody)
516 tgroup.appendChild(doc.createTextNode("\n "))
517 table.appendChild(tgroup)
518 # now make the <entry>s look nice:
519 for row in table.getElementsByTagName("row"):
520 fixup_row(doc, row)
521
522
523def fixup_row(doc, row):
524 entries = []
525 map(entries.append, row.childNodes[1:])
526 for entry in entries:
527 row.insertBefore(doc.createTextNode("\n "), entry)
528# row.appendChild(doc.createTextNode("\n "))
529
530
531def move_elements_by_name(doc, source, dest, name, sep=None):
532 nodes = []
533 for child in source.childNodes:
534 if child.nodeType == xml.dom.core.ELEMENT and child.tagName == name:
535 nodes.append(child)
536 for node in nodes:
537 source.removeChild(node)
538 dest.appendChild(node)
539 if sep:
540 dest.appendChild(doc.createTextNode(sep))
541
542
Fred Drake7dab6af1999-01-28 23:59:58 +0000543RECURSE_INTO_PARA_CONTAINERS = (
Fred Drakecb657811999-01-29 20:55:07 +0000544 "chapter", "abstract", "enumerate",
Fred Drake7dab6af1999-01-28 23:59:58 +0000545 "section", "subsection", "subsubsection",
546 "paragraph", "subparagraph",
Fred Drakecb657811999-01-29 20:55:07 +0000547 "howto", "manual",
Fred Drake4259f0d1999-01-19 23:09:31 +0000548 )
Fred Drakefcc59101999-01-06 22:50:52 +0000549
550PARA_LEVEL_ELEMENTS = (
Fred Drakecb657811999-01-29 20:55:07 +0000551 "moduleinfo", "title", "verbatim", "enumerate", "item",
552 "opcodedesc", "classdesc", "datadesc",
Fred Drake7dab6af1999-01-28 23:59:58 +0000553 "funcdesc", "methoddesc", "excdesc",
554 "funcdescni", "methoddescni", "excdescni",
Fred Drakefcc59101999-01-06 22:50:52 +0000555 "tableii", "tableiii", "tableiv", "localmoduletable",
Fred Drake7dab6af1999-01-28 23:59:58 +0000556 "sectionauthor", "seealso",
Fred Drakefcc59101999-01-06 22:50:52 +0000557 # include <para>, so we can just do it again to get subsequent paras:
558 "para",
559 )
560
561PARA_LEVEL_PRECEEDERS = (
Fred Drakecb657811999-01-29 20:55:07 +0000562 "index", "indexii", "indexiii", "indexiv", "setindexsubitem",
563 "stindex", "obindex", "COMMENT", "label", "input", "title",
Fred Drakefcc59101999-01-06 22:50:52 +0000564 )
565
Fred Drake7dab6af1999-01-28 23:59:58 +0000566
Fred Drakeaaed9711998-12-10 20:25:30 +0000567def fixup_paras(doc):
Fred Drakefcc59101999-01-06 22:50:52 +0000568 for child in doc.childNodes:
569 if child.nodeType == xml.dom.core.ELEMENT \
Fred Drake7dab6af1999-01-28 23:59:58 +0000570 and child.tagName in RECURSE_INTO_PARA_CONTAINERS:
571 #
Fred Drakefcc59101999-01-06 22:50:52 +0000572 fixup_paras_helper(doc, child)
Fred Drakecb657811999-01-29 20:55:07 +0000573 descriptions = find_all_elements(doc, "description")
574 for description in descriptions:
575 fixup_paras_helper(doc, description)
Fred Drakefcc59101999-01-06 22:50:52 +0000576
577
Fred Drake7dab6af1999-01-28 23:59:58 +0000578def fixup_paras_helper(doc, container, depth=0):
Fred Drakefcc59101999-01-06 22:50:52 +0000579 # document is already normalized
580 children = container.childNodes
581 start = 0
Fred Drake7dab6af1999-01-28 23:59:58 +0000582 while len(children) > start:
583 start = skip_leading_nodes(children, start)
584 if start >= len(children):
585 break
586 #
587 # Either paragraph material or something to recurse into:
588 #
589 if (children[start].nodeType == xml.dom.core.ELEMENT) \
590 and (children[start].tagName in RECURSE_INTO_PARA_CONTAINERS):
591 fixup_paras_helper(doc, children[start])
592 start = skip_leading_nodes(children, start + 1)
593 continue
594 #
595 # paragraph material:
596 #
597 build_para(doc, container, start, len(children))
598 if DEBUG_PARA_FIXER and depth == 10:
599 sys.exit(1)
600 start = start + 1
Fred Drakefcc59101999-01-06 22:50:52 +0000601
602
603def build_para(doc, parent, start, i):
604 children = parent.childNodes
Fred Drakefcc59101999-01-06 22:50:52 +0000605 after = start + 1
606 have_last = 0
Fred Drakecb657811999-01-29 20:55:07 +0000607 BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + RECURSE_INTO_PARA_CONTAINERS
Fred Drake7dab6af1999-01-28 23:59:58 +0000608 # Collect all children until \n\n+ is found in a text node or a
609 # member of BREAK_ELEMENTS is found.
Fred Drakefcc59101999-01-06 22:50:52 +0000610 for j in range(start, i):
611 after = j + 1
612 child = children[j]
613 nodeType = child.nodeType
614 if nodeType == xml.dom.core.ELEMENT:
615 if child.tagName in BREAK_ELEMENTS:
616 after = j
617 break
618 elif nodeType == xml.dom.core.TEXT:
619 pos = string.find(child.data, "\n\n")
620 if pos == 0:
621 after = j
622 break
623 if pos >= 1:
624 child.splitText(pos)
625 break
626 else:
627 have_last = 1
Fred Drake7dab6af1999-01-28 23:59:58 +0000628 if (start + 1) > after:
629 raise ConversionError(
630 "build_para() could not identify content to turn into a paragraph")
Fred Drakefcc59101999-01-06 22:50:52 +0000631 if children[after - 1].nodeType == xml.dom.core.TEXT:
632 # we may need to split off trailing white space:
633 child = children[after - 1]
634 data = child.data
635 if string.rstrip(data) != data:
636 have_last = 0
637 child.splitText(len(string.rstrip(data)))
Fred Drakefcc59101999-01-06 22:50:52 +0000638 para = doc.createElement("para")
639 prev = None
640 indexes = range(start, after)
641 indexes.reverse()
642 for j in indexes:
Fred Drake7dab6af1999-01-28 23:59:58 +0000643 node = parent.childNodes[j]
Fred Drakefcc59101999-01-06 22:50:52 +0000644 parent.removeChild(node)
645 para.insertBefore(node, prev)
646 prev = node
647 if have_last:
648 parent.appendChild(para)
Fred Drake7dab6af1999-01-28 23:59:58 +0000649 return len(parent.childNodes)
Fred Drakefcc59101999-01-06 22:50:52 +0000650 else:
651 parent.insertBefore(para, parent.childNodes[start])
Fred Drake7dab6af1999-01-28 23:59:58 +0000652 return start + 1
Fred Drakefcc59101999-01-06 22:50:52 +0000653
654
Fred Drake7dab6af1999-01-28 23:59:58 +0000655def skip_leading_nodes(children, start):
656 """Return index into children of a node at which paragraph building should
657 begin or a recursive call to fixup_paras_helper() should be made (for
658 subsections, etc.).
659
660 When the return value >= len(children), we've built all the paras we can
661 from this list of children.
662 """
663 i = len(children)
Fred Drakefcc59101999-01-06 22:50:52 +0000664 while i > start:
665 # skip over leading comments and whitespace:
Fred Drake7dab6af1999-01-28 23:59:58 +0000666 child = children[start]
Fred Drakefcc59101999-01-06 22:50:52 +0000667 nodeType = child.nodeType
Fred Drake7dab6af1999-01-28 23:59:58 +0000668 if nodeType == xml.dom.core.TEXT:
Fred Drakefcc59101999-01-06 22:50:52 +0000669 data = child.data
670 shortened = string.lstrip(data)
671 if shortened:
672 if data != shortened:
673 # break into two nodes: whitespace and non-whitespace
674 child.splitText(len(data) - len(shortened))
Fred Drake7dab6af1999-01-28 23:59:58 +0000675 return start + 1
676 return start
Fred Drakefcc59101999-01-06 22:50:52 +0000677 # all whitespace, just skip
Fred Drakefcc59101999-01-06 22:50:52 +0000678 elif nodeType == xml.dom.core.ELEMENT:
Fred Drake7dab6af1999-01-28 23:59:58 +0000679 tagName = child.tagName
680 if tagName in RECURSE_INTO_PARA_CONTAINERS:
681 return start
682 if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
683 return start
684 start = start + 1
685 return start
Fred Drakefba0ba21998-12-10 05:07:09 +0000686
687
Fred Draked24167b1999-01-14 21:18:03 +0000688def fixup_rfc_references(doc):
Fred Drake7dab6af1999-01-28 23:59:58 +0000689 for rfcnode in find_all_elements(doc, "rfc"):
690 rfcnode.appendChild(doc.createTextNode(
691 "RFC " + rfcnode.getAttribute("num")))
Fred Draked24167b1999-01-14 21:18:03 +0000692
693
694def fixup_signatures(doc):
695 for child in doc.childNodes:
696 if child.nodeType == xml.dom.core.ELEMENT:
697 args = child.getElementsByTagName("args")
698 for arg in args:
699 fixup_args(doc, arg)
Fred Drake7dab6af1999-01-28 23:59:58 +0000700 arg.normalize()
Fred Draked24167b1999-01-14 21:18:03 +0000701 args = child.getElementsByTagName("constructor-args")
702 for arg in args:
703 fixup_args(doc, arg)
704 arg.normalize()
705
706
707def fixup_args(doc, arglist):
708 for child in arglist.childNodes:
709 if child.nodeType == xml.dom.core.ELEMENT \
710 and child.tagName == "optional":
711 # found it; fix and return
712 arglist.insertBefore(doc.createTextNode("["), child)
713 optkids = child.childNodes
714 while optkids:
715 k = optkids[0]
716 child.removeChild(k)
717 arglist.insertBefore(k, child)
718 arglist.insertBefore(doc.createTextNode("]"), child)
719 arglist.removeChild(child)
720 return fixup_args(doc, arglist)
721
722
Fred Drake7dab6af1999-01-28 23:59:58 +0000723def fixup_sectionauthors(doc):
724 for sectauth in find_all_elements(doc, "sectionauthor"):
725 section = sectauth.parentNode
726 section.removeChild(sectauth)
727 sectauth._node.name = "author"
728 sectauth.appendChild(doc.createTextNode(
729 sectauth.getAttribute("name")))
730 sectauth.removeAttribute("name")
731 after = section.childNodes[2]
732 title = section.childNodes[1]
733 if title.nodeType == xml.dom.core.ELEMENT and title.tagName != "title":
734 after = section.childNodes[0]
735 section.insertBefore(doc.createTextNode("\n "), after)
736 section.insertBefore(sectauth, after)
737
738
Fred Drake4db5b461998-12-01 19:03:01 +0000739_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
Fred Drakefcc59101999-01-06 22:50:52 +0000740
Fred Drake4db5b461998-12-01 19:03:01 +0000741def write_esis(doc, ofp, knownempty):
742 for node in doc.childNodes:
743 nodeType = node.nodeType
744 if nodeType == xml.dom.core.ELEMENT:
745 gi = node.tagName
746 if knownempty(gi):
747 if node.hasChildNodes():
748 raise ValueError, "declared-empty node has children"
749 ofp.write("e\n")
750 for k, v in node.attributes.items():
751 value = v.value
752 if _token_rx.match(value):
753 dtype = "TOKEN"
754 else:
755 dtype = "CDATA"
756 ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
757 ofp.write("(%s\n" % gi)
758 write_esis(node, ofp, knownempty)
759 ofp.write(")%s\n" % gi)
760 elif nodeType == xml.dom.core.TEXT:
761 ofp.write("-%s\n" % esistools.encode(node.data))
762 else:
763 raise RuntimeError, "unsupported node type: %s" % nodeType
764
765
Fred Drake03204731998-11-23 17:02:03 +0000766def convert(ifp, ofp):
Fred Drake4db5b461998-12-01 19:03:01 +0000767 p = esistools.ExtendedEsisBuilder()
Fred Drake03204731998-11-23 17:02:03 +0000768 p.feed(ifp.read())
769 doc = p.document
Fred Drake1ff6db41998-11-23 23:10:35 +0000770 normalize(doc)
Fred Drake03204731998-11-23 17:02:03 +0000771 simplify(doc)
772 handle_labels(doc)
Fred Drake4db5b461998-12-01 19:03:01 +0000773 handle_appendix(doc)
Fred Drake1ff6db41998-11-23 23:10:35 +0000774 fixup_trailing_whitespace(doc, {
775 "abstract": "\n",
776 "title": "",
777 "chapter": "\n\n",
778 "section": "\n\n",
779 "subsection": "\n\n",
780 "subsubsection": "\n\n",
781 "paragraph": "\n\n",
782 "subparagraph": "\n\n",
783 })
Fred Drake03204731998-11-23 17:02:03 +0000784 cleanup_root_text(doc)
Fred Drake1ff6db41998-11-23 23:10:35 +0000785 cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
Fred Drakefba0ba21998-12-10 05:07:09 +0000786 cleanup_synopses(doc)
Fred Drakecb657811999-01-29 20:55:07 +0000787 fixup_descriptors(doc)
Fred Drakeaaed9711998-12-10 20:25:30 +0000788 normalize(doc)
789 fixup_paras(doc)
Fred Drake7dab6af1999-01-28 23:59:58 +0000790 fixup_sectionauthors(doc)
Fred Drakef8ebb551999-01-14 19:45:38 +0000791 remap_element_names(doc, {
792 "tableii": ("table", {"cols": "2"}),
793 "tableiii": ("table", {"cols": "3"}),
794 "tableiv": ("table", {"cols": "4"}),
795 "lineii": ("row", {}),
796 "lineiii": ("row", {}),
797 "lineiv": ("row", {}),
Fred Draked6ced7d1999-01-19 17:11:23 +0000798 "refmodule": ("module", {"link": "link"}),
Fred Drakef8ebb551999-01-14 19:45:38 +0000799 })
800 fixup_table_structures(doc)
Fred Draked24167b1999-01-14 21:18:03 +0000801 fixup_rfc_references(doc)
802 fixup_signatures(doc)
Fred Drake4db5b461998-12-01 19:03:01 +0000803 #
804 d = {}
805 for gi in p.get_empties():
806 d[gi] = gi
Fred Draked24167b1999-01-14 21:18:03 +0000807 if d.has_key("rfc"):
808 del d["rfc"]
Fred Drake4db5b461998-12-01 19:03:01 +0000809 knownempty = d.has_key
810 #
Fred Drake03204731998-11-23 17:02:03 +0000811 try:
Fred Drake4db5b461998-12-01 19:03:01 +0000812 write_esis(doc, ofp, knownempty)
Fred Drake03204731998-11-23 17:02:03 +0000813 except IOError, (err, msg):
814 # Ignore EPIPE; it just means that whoever we're writing to stopped
815 # reading. The rest of the output would be ignored. All other errors
816 # should still be reported,
817 if err != errno.EPIPE:
818 raise
819
820
821def main():
822 if len(sys.argv) == 1:
823 ifp = sys.stdin
824 ofp = sys.stdout
825 elif len(sys.argv) == 2:
826 ifp = open(sys.argv[1])
827 ofp = sys.stdout
828 elif len(sys.argv) == 3:
829 ifp = open(sys.argv[1])
830 ofp = open(sys.argv[2], "w")
831 else:
832 usage()
833 sys.exit(2)
834 convert(ifp, ofp)
835
836
837if __name__ == "__main__":
838 main()