blob: b23b0f6f1ae91aac3d1ee95b700d04d8f0d29478 [file] [log] [blame]
Fred Drake03204731998-11-23 17:02:03 +00001#! /usr/bin/env python
2
3"""Promote the IDs from <label/> elements to the enclosing section / chapter /
4whatever, then remove the <label/> elements. This allows *ML style internal
5linking rather than the bogus LaTeX model.
6
7Note that <label/>s in <title> elements are promoted two steps, since the
8<title> elements are artificially created from the section parameter, and the
9label really refers to the sectioning construct.
10"""
11__version__ = '$Revision$'
12
13
14import errno
Fred Drake4db5b461998-12-01 19:03:01 +000015import esistools
16import re
Fred Drake03204731998-11-23 17:02:03 +000017import string
18import sys
19import xml.dom.core
20import xml.dom.esis_builder
21
22
23# Workaround to deal with invalid documents (multiple root elements). This
24# does not indicate a bug in the DOM implementation.
25#
26def get_documentElement(self):
27 docelem = None
28 for n in self._node.children:
29 if n.type == xml.dom.core.ELEMENT:
30 docelem = xml.dom.core.Element(n, self, self)
31 return docelem
32
33xml.dom.core.Document.get_documentElement = get_documentElement
34
35
36# Replace get_childNodes for the Document class; without this, children
37# accessed from the Document object via .childNodes (no matter how many
38# levels of access are used) will be given an ownerDocument of None.
39#
40def get_childNodes(self):
41 return xml.dom.core.NodeList(self._node.children, self, self)
42
43xml.dom.core.Document.get_childNodes = get_childNodes
44
45
46def get_first_element(doc, gi):
47 for n in doc.childNodes:
48 if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi:
49 return n
50
51def extract_first_element(doc, gi):
52 node = get_first_element(doc, gi)
53 if node is not None:
54 doc.removeChild(node)
55 return node
56
57
58def simplify(doc):
59 # Try to rationalize the document a bit, since these things are simply
60 # not valid SGML/XML documents as they stand, and need a little work.
61 documentclass = "document"
62 inputs = []
63 node = extract_first_element(doc, "documentclass")
64 if node is not None:
65 documentclass = node.getAttribute("classname")
66 node = extract_first_element(doc, "title")
67 if node is not None:
68 inputs.append(node)
69 # update the name of the root element
70 node = get_first_element(doc, "document")
71 if node is not None:
72 node._node.name = documentclass
73 while 1:
74 node = extract_first_element(doc, "input")
75 if node is None:
76 break
77 inputs.append(node)
78 if inputs:
79 docelem = doc.documentElement
80 inputs.reverse()
81 for node in inputs:
82 text = doc.createTextNode("\n")
83 docelem.insertBefore(text, docelem.firstChild)
84 docelem.insertBefore(node, text)
85 docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
86 while doc.firstChild.nodeType == xml.dom.core.TEXT:
87 doc.removeChild(doc.firstChild)
88
89
90def cleanup_root_text(doc):
91 discards = []
92 skip = 0
93 for n in doc.childNodes:
94 prevskip = skip
95 skip = 0
96 if n.nodeType == xml.dom.core.TEXT and not prevskip:
97 discards.append(n)
Fred Drake4db5b461998-12-01 19:03:01 +000098 elif n.nodeType == xml.dom.core.ELEMENT and n.tagName == "COMMENT":
Fred Drake03204731998-11-23 17:02:03 +000099 skip = 1
100 for node in discards:
101 doc.removeChild(node)
102
103
104def rewrite_desc_entries(doc, argname_gi):
105 argnodes = doc.getElementsByTagName(argname_gi)
106 for node in argnodes:
107 parent = node.parentNode
108 nodes = []
109 for n in parent.childNodes:
110 if n.nodeType != xml.dom.core.ELEMENT or n.tagName != argname_gi:
111 nodes.append(n)
112 desc = doc.createElement("description")
113 for n in nodes:
114 parent.removeChild(n)
115 desc.appendChild(n)
116 if node.childNodes:
117 # keep the <args>...</args>, newline & indent
118 parent.insertBefore(doc.createText("\n "), node)
119 else:
120 # no arguments, remove the <args/> node
121 parent.removeChild(node)
122 parent.appendChild(doc.createText("\n "))
123 parent.appendChild(desc)
124 parent.appendChild(doc.createText("\n"))
125
126def handle_args(doc):
127 rewrite_desc_entries(doc, "args")
128 rewrite_desc_entries(doc, "constructor-args")
129
130
Fred Drake4db5b461998-12-01 19:03:01 +0000131def handle_appendix(doc):
132 # must be called after simplfy() if document is multi-rooted to begin with
133 docelem = doc.documentElement
134 toplevel = docelem.tagName == "manual" and "chapter" or "section"
135 appendices = 0
136 nodes = []
137 for node in docelem.childNodes:
138 if appendices:
139 nodes.append(node)
140 elif node.nodeType == xml.dom.core.ELEMENT:
141 appnodes = node.getElementsByTagName("appendix")
142 if appnodes:
143 appendices = 1
144 parent = appnodes[0].parentNode
145 parent.removeChild(appnodes[0])
146 parent.normalize()
147 if nodes:
148 map(docelem.removeChild, nodes)
149 docelem.appendChild(doc.createTextNode("\n\n\n"))
150 back = doc.createElement("back-matter")
151 docelem.appendChild(back)
152 back.appendChild(doc.createTextNode("\n"))
153 while nodes and nodes[0].nodeType == xml.dom.core.TEXT \
154 and not string.strip(nodes[0].data):
155 del nodes[0]
156 map(back.appendChild, nodes)
157 docelem.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000158
159
160def handle_labels(doc):
161 labels = doc.getElementsByTagName("label")
162 for label in labels:
163 id = label.getAttribute("id")
164 if not id:
165 continue
166 parent = label.parentNode
167 if parent.tagName == "title":
168 parent.parentNode.setAttribute("id", id)
169 else:
170 parent.setAttribute("id", id)
171 # now, remove <label id="..."/> from parent:
172 parent.removeChild(label)
173
174
Fred Drake1ff6db41998-11-23 23:10:35 +0000175def fixup_trailing_whitespace(doc, wsmap):
176 queue = [doc]
177 while queue:
178 node = queue[0]
179 del queue[0]
180 if node.nodeType == xml.dom.core.ELEMENT \
181 and wsmap.has_key(node.tagName):
182 ws = wsmap[node.tagName]
183 children = node.childNodes
184 children.reverse()
185 if children[0].nodeType == xml.dom.core.TEXT:
186 data = string.rstrip(children[0].data) + ws
187 children[0].data = data
188 children.reverse()
189 # hack to get the title in place:
190 if node.tagName == "title" \
191 and node.parentNode.firstChild.nodeType == xml.dom.core.ELEMENT:
192 node.parentNode.insertBefore(doc.createText("\n "),
193 node.parentNode.firstChild)
194 for child in node.childNodes:
195 if child.nodeType == xml.dom.core.ELEMENT:
196 queue.append(child)
197
198
199def normalize(doc):
200 for node in doc.childNodes:
201 if node.nodeType == xml.dom.core.ELEMENT:
202 node.normalize()
203
204
205def cleanup_trailing_parens(doc, element_names):
206 d = {}
207 for gi in element_names:
208 d[gi] = gi
209 rewrite_element = d.has_key
210 queue = []
211 for node in doc.childNodes:
212 if node.nodeType == xml.dom.core.ELEMENT:
213 queue.append(node)
214 while queue:
215 node = queue[0]
216 del queue[0]
217 if rewrite_element(node.tagName):
218 children = node.childNodes
219 if len(children) == 1 \
220 and children[0].nodeType == xml.dom.core.TEXT:
221 data = children[0].data
222 if data[-2:] == "()":
223 children[0].data = data[:-2]
224 else:
225 for child in node.childNodes:
226 if child.nodeType == xml.dom.core.ELEMENT:
227 queue.append(child)
228
229
Fred Drakeaaed9711998-12-10 20:25:30 +0000230def contents_match(left, right):
231 left_children = left.childNodes
232 right_children = right.childNodes
233 if len(left_children) != len(right_children):
234 return 0
235 for l, r in map(None, left_children, right_children):
236 nodeType = l.nodeType
237 if nodeType != r.nodeType:
238 return 0
239 if nodeType == xml.dom.core.ELEMENT:
240 if l.tagName != r.tagName:
241 return 0
242 # should check attributes, but that's not a problem here
243 if not contents_match(l, r):
244 return 0
245 elif nodeType == xml.dom.core.TEXT:
246 if l.data != r.data:
247 return 0
248 else:
249 # not quite right, but good enough
250 return 0
251 return 1
252
253
254def create_module_info(doc, section):
255 # Heavy.
256 node = extract_first_element(section, "modulesynopsis")
257 if node is None:
258 return
259 node._node.name = "synopsis"
260 lastchild = node.childNodes[-1]
261 if lastchild.nodeType == xml.dom.core.TEXT \
262 and lastchild.data[-1:] == ".":
263 lastchild.data = lastchild.data[:-1]
264 if section.tagName == "section":
265 modinfo_pos = 2
266 modinfo = doc.createElement("moduleinfo")
267 moddecl = extract_first_element(section, "declaremodule")
268 name = None
269 if moddecl:
270 modinfo.appendChild(doc.createTextNode("\n "))
271 name = moddecl.attributes["name"].value
272 namenode = doc.createElement("name")
273 namenode.appendChild(doc.createTextNode(name))
274 modinfo.appendChild(namenode)
275 type = moddecl.attributes.get("type")
276 if type:
277 type = type.value
278 modinfo.appendChild(doc.createTextNode("\n "))
279 typenode = doc.createElement("type")
280 typenode.appendChild(doc.createTextNode(type))
281 modinfo.appendChild(typenode)
282 title = get_first_element(section, "title")
283 if title:
284 children = title.childNodes
285 if len(children) >= 2 \
286 and children[0].nodeType == xml.dom.core.ELEMENT \
287 and children[0].tagName == "module" \
288 and children[0].childNodes[0].data == name:
289 # this is it; morph the <title> into <short-synopsis>
290 first_data = children[1]
291 if first_data.data[:4] == " ---":
292 first_data.data = string.lstrip(first_data.data[4:])
293 title._node.name = "short-synopsis"
294 if children[-1].data[-1:] == ".":
295 children[-1].data = children[-1].data[:-1]
296 section.removeChild(title)
297 section.removeChild(section.childNodes[0])
298 title.removeChild(children[0])
299 modinfo_pos = 0
300 else:
301 sys.stderr.write(
302 "module name in title doesn't match"
303 " <declaremodule>; no <short-synopsis>\n")
304 else:
305 sys.stderr.write(
306 "Unexpected condition: <section> without <title>\n")
307 modinfo.appendChild(doc.createTextNode("\n "))
308 modinfo.appendChild(node)
309 if title and not contents_match(title, node):
310 # The short synopsis is actually different,
311 # and needs to be stored:
312 modinfo.appendChild(doc.createTextNode("\n "))
313 modinfo.appendChild(title)
314 modinfo.appendChild(doc.createTextNode("\n "))
315 section.insertBefore(modinfo, section.childNodes[modinfo_pos])
316 section.insertBefore(doc.createTextNode("\n "), modinfo)
317
318
Fred Drakefba0ba21998-12-10 05:07:09 +0000319def cleanup_synopses(doc):
Fred Drakeaaed9711998-12-10 20:25:30 +0000320 for node in doc.childNodes:
321 if node.nodeType == xml.dom.core.ELEMENT \
322 and node.tagName == "section":
323 create_module_info(doc, node)
324
325
326def fixup_paras(doc):
327 pass
Fred Drakefba0ba21998-12-10 05:07:09 +0000328
329
Fred Drake4db5b461998-12-01 19:03:01 +0000330_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
331
332def write_esis(doc, ofp, knownempty):
333 for node in doc.childNodes:
334 nodeType = node.nodeType
335 if nodeType == xml.dom.core.ELEMENT:
336 gi = node.tagName
337 if knownempty(gi):
338 if node.hasChildNodes():
339 raise ValueError, "declared-empty node has children"
340 ofp.write("e\n")
341 for k, v in node.attributes.items():
342 value = v.value
343 if _token_rx.match(value):
344 dtype = "TOKEN"
345 else:
346 dtype = "CDATA"
347 ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
348 ofp.write("(%s\n" % gi)
349 write_esis(node, ofp, knownempty)
350 ofp.write(")%s\n" % gi)
351 elif nodeType == xml.dom.core.TEXT:
352 ofp.write("-%s\n" % esistools.encode(node.data))
353 else:
354 raise RuntimeError, "unsupported node type: %s" % nodeType
355
356
Fred Drake03204731998-11-23 17:02:03 +0000357def convert(ifp, ofp):
Fred Drake4db5b461998-12-01 19:03:01 +0000358 p = esistools.ExtendedEsisBuilder()
Fred Drake03204731998-11-23 17:02:03 +0000359 p.feed(ifp.read())
360 doc = p.document
Fred Drake1ff6db41998-11-23 23:10:35 +0000361 normalize(doc)
Fred Drake03204731998-11-23 17:02:03 +0000362 handle_args(doc)
Fred Drake03204731998-11-23 17:02:03 +0000363 simplify(doc)
364 handle_labels(doc)
Fred Drake4db5b461998-12-01 19:03:01 +0000365 handle_appendix(doc)
Fred Drake1ff6db41998-11-23 23:10:35 +0000366 fixup_trailing_whitespace(doc, {
367 "abstract": "\n",
368 "title": "",
369 "chapter": "\n\n",
370 "section": "\n\n",
371 "subsection": "\n\n",
372 "subsubsection": "\n\n",
373 "paragraph": "\n\n",
374 "subparagraph": "\n\n",
375 })
Fred Drake03204731998-11-23 17:02:03 +0000376 cleanup_root_text(doc)
Fred Drake1ff6db41998-11-23 23:10:35 +0000377 cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
Fred Drakefba0ba21998-12-10 05:07:09 +0000378 cleanup_synopses(doc)
Fred Drakeaaed9711998-12-10 20:25:30 +0000379 normalize(doc)
380 fixup_paras(doc)
Fred Drake4db5b461998-12-01 19:03:01 +0000381 #
382 d = {}
383 for gi in p.get_empties():
384 d[gi] = gi
385 knownempty = d.has_key
386 #
Fred Drake03204731998-11-23 17:02:03 +0000387 try:
Fred Drake4db5b461998-12-01 19:03:01 +0000388 write_esis(doc, ofp, knownempty)
Fred Drake03204731998-11-23 17:02:03 +0000389 except IOError, (err, msg):
390 # Ignore EPIPE; it just means that whoever we're writing to stopped
391 # reading. The rest of the output would be ignored. All other errors
392 # should still be reported,
393 if err != errno.EPIPE:
394 raise
395
396
397def main():
398 if len(sys.argv) == 1:
399 ifp = sys.stdin
400 ofp = sys.stdout
401 elif len(sys.argv) == 2:
402 ifp = open(sys.argv[1])
403 ofp = sys.stdout
404 elif len(sys.argv) == 3:
405 ifp = open(sys.argv[1])
406 ofp = open(sys.argv[2], "w")
407 else:
408 usage()
409 sys.exit(2)
410 convert(ifp, ofp)
411
412
413if __name__ == "__main__":
414 main()