blob: 97bcb09e3eed43b012a2a5cea63cf28117434ff7 [file] [log] [blame]
Fred Drake03204731998-11-23 17:02:03 +00001#! /usr/bin/env python
2
3"""Promote the IDs from <label/> elements to the enclosing section / chapter /
4whatever, then remove the <label/> elements. This allows *ML style internal
5linking rather than the bogus LaTeX model.
6
7Note that <label/>s in <title> elements are promoted two steps, since the
8<title> elements are artificially created from the section parameter, and the
9label really refers to the sectioning construct.
10"""
11__version__ = '$Revision$'
12
13
14import errno
Fred Drake4db5b461998-12-01 19:03:01 +000015import esistools
16import re
Fred Drake03204731998-11-23 17:02:03 +000017import string
18import sys
19import xml.dom.core
20import xml.dom.esis_builder
21
22
23# Workaround to deal with invalid documents (multiple root elements). This
24# does not indicate a bug in the DOM implementation.
25#
26def get_documentElement(self):
27 docelem = None
28 for n in self._node.children:
29 if n.type == xml.dom.core.ELEMENT:
30 docelem = xml.dom.core.Element(n, self, self)
31 return docelem
32
33xml.dom.core.Document.get_documentElement = get_documentElement
34
35
36# Replace get_childNodes for the Document class; without this, children
37# accessed from the Document object via .childNodes (no matter how many
38# levels of access are used) will be given an ownerDocument of None.
39#
40def get_childNodes(self):
41 return xml.dom.core.NodeList(self._node.children, self, self)
42
43xml.dom.core.Document.get_childNodes = get_childNodes
44
45
46def get_first_element(doc, gi):
47 for n in doc.childNodes:
48 if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi:
49 return n
50
51def extract_first_element(doc, gi):
52 node = get_first_element(doc, gi)
53 if node is not None:
54 doc.removeChild(node)
55 return node
56
57
58def simplify(doc):
59 # Try to rationalize the document a bit, since these things are simply
60 # not valid SGML/XML documents as they stand, and need a little work.
61 documentclass = "document"
62 inputs = []
63 node = extract_first_element(doc, "documentclass")
64 if node is not None:
65 documentclass = node.getAttribute("classname")
66 node = extract_first_element(doc, "title")
67 if node is not None:
68 inputs.append(node)
69 # update the name of the root element
70 node = get_first_element(doc, "document")
71 if node is not None:
72 node._node.name = documentclass
73 while 1:
74 node = extract_first_element(doc, "input")
75 if node is None:
76 break
77 inputs.append(node)
78 if inputs:
79 docelem = doc.documentElement
80 inputs.reverse()
81 for node in inputs:
82 text = doc.createTextNode("\n")
83 docelem.insertBefore(text, docelem.firstChild)
84 docelem.insertBefore(node, text)
85 docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
86 while doc.firstChild.nodeType == xml.dom.core.TEXT:
87 doc.removeChild(doc.firstChild)
88
89
90def cleanup_root_text(doc):
91 discards = []
92 skip = 0
93 for n in doc.childNodes:
94 prevskip = skip
95 skip = 0
96 if n.nodeType == xml.dom.core.TEXT and not prevskip:
97 discards.append(n)
Fred Drake4db5b461998-12-01 19:03:01 +000098 elif n.nodeType == xml.dom.core.ELEMENT and n.tagName == "COMMENT":
Fred Drake03204731998-11-23 17:02:03 +000099 skip = 1
100 for node in discards:
101 doc.removeChild(node)
102
103
104def rewrite_desc_entries(doc, argname_gi):
105 argnodes = doc.getElementsByTagName(argname_gi)
106 for node in argnodes:
107 parent = node.parentNode
108 nodes = []
109 for n in parent.childNodes:
110 if n.nodeType != xml.dom.core.ELEMENT or n.tagName != argname_gi:
111 nodes.append(n)
112 desc = doc.createElement("description")
113 for n in nodes:
114 parent.removeChild(n)
115 desc.appendChild(n)
116 if node.childNodes:
117 # keep the <args>...</args>, newline & indent
118 parent.insertBefore(doc.createText("\n "), node)
119 else:
120 # no arguments, remove the <args/> node
121 parent.removeChild(node)
122 parent.appendChild(doc.createText("\n "))
123 parent.appendChild(desc)
124 parent.appendChild(doc.createText("\n"))
125
126def handle_args(doc):
127 rewrite_desc_entries(doc, "args")
128 rewrite_desc_entries(doc, "constructor-args")
129
130
Fred Drake4db5b461998-12-01 19:03:01 +0000131def handle_appendix(doc):
132 # must be called after simplfy() if document is multi-rooted to begin with
133 docelem = doc.documentElement
134 toplevel = docelem.tagName == "manual" and "chapter" or "section"
135 appendices = 0
136 nodes = []
137 for node in docelem.childNodes:
138 if appendices:
139 nodes.append(node)
140 elif node.nodeType == xml.dom.core.ELEMENT:
141 appnodes = node.getElementsByTagName("appendix")
142 if appnodes:
143 appendices = 1
144 parent = appnodes[0].parentNode
145 parent.removeChild(appnodes[0])
146 parent.normalize()
147 if nodes:
148 map(docelem.removeChild, nodes)
149 docelem.appendChild(doc.createTextNode("\n\n\n"))
150 back = doc.createElement("back-matter")
151 docelem.appendChild(back)
152 back.appendChild(doc.createTextNode("\n"))
153 while nodes and nodes[0].nodeType == xml.dom.core.TEXT \
154 and not string.strip(nodes[0].data):
155 del nodes[0]
156 map(back.appendChild, nodes)
157 docelem.appendChild(doc.createTextNode("\n"))
Fred Drake03204731998-11-23 17:02:03 +0000158
159
160def handle_labels(doc):
161 labels = doc.getElementsByTagName("label")
162 for label in labels:
163 id = label.getAttribute("id")
164 if not id:
165 continue
166 parent = label.parentNode
167 if parent.tagName == "title":
168 parent.parentNode.setAttribute("id", id)
169 else:
170 parent.setAttribute("id", id)
171 # now, remove <label id="..."/> from parent:
172 parent.removeChild(label)
173
174
Fred Drake1ff6db41998-11-23 23:10:35 +0000175def fixup_trailing_whitespace(doc, wsmap):
176 queue = [doc]
177 while queue:
178 node = queue[0]
179 del queue[0]
180 if node.nodeType == xml.dom.core.ELEMENT \
181 and wsmap.has_key(node.tagName):
182 ws = wsmap[node.tagName]
183 children = node.childNodes
184 children.reverse()
185 if children[0].nodeType == xml.dom.core.TEXT:
186 data = string.rstrip(children[0].data) + ws
187 children[0].data = data
188 children.reverse()
189 # hack to get the title in place:
190 if node.tagName == "title" \
191 and node.parentNode.firstChild.nodeType == xml.dom.core.ELEMENT:
192 node.parentNode.insertBefore(doc.createText("\n "),
193 node.parentNode.firstChild)
194 for child in node.childNodes:
195 if child.nodeType == xml.dom.core.ELEMENT:
196 queue.append(child)
197
198
199def normalize(doc):
200 for node in doc.childNodes:
201 if node.nodeType == xml.dom.core.ELEMENT:
202 node.normalize()
203
204
205def cleanup_trailing_parens(doc, element_names):
206 d = {}
207 for gi in element_names:
208 d[gi] = gi
209 rewrite_element = d.has_key
210 queue = []
211 for node in doc.childNodes:
212 if node.nodeType == xml.dom.core.ELEMENT:
213 queue.append(node)
214 while queue:
215 node = queue[0]
216 del queue[0]
217 if rewrite_element(node.tagName):
218 children = node.childNodes
219 if len(children) == 1 \
220 and children[0].nodeType == xml.dom.core.TEXT:
221 data = children[0].data
222 if data[-2:] == "()":
223 children[0].data = data[:-2]
224 else:
225 for child in node.childNodes:
226 if child.nodeType == xml.dom.core.ELEMENT:
227 queue.append(child)
228
229
Fred Drakefba0ba21998-12-10 05:07:09 +0000230def cleanup_synopses(doc):
231 # Actually, this should build a "moduleinfo" element from various
232 # parts of the meta-information in the section. <moduleinfo> needs
233 # some design work before we can really do anything real.
234 synopses = doc.getElementsByTagName("modulesynopsis")
235 for node in synopses:
236 node._node.name = "synopsis"
237 parent = node.parentNode
238 if parent.tagName == "section":
239 children = parent.childNodes
240 parent.removeChild(node)
241 parent.insertBefore(node, children[2])
242 text = doc.createTextNode("\n ")
243 parent.insertBefore(text, node)
244
245
Fred Drake4db5b461998-12-01 19:03:01 +0000246_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
247
248def write_esis(doc, ofp, knownempty):
249 for node in doc.childNodes:
250 nodeType = node.nodeType
251 if nodeType == xml.dom.core.ELEMENT:
252 gi = node.tagName
253 if knownempty(gi):
254 if node.hasChildNodes():
255 raise ValueError, "declared-empty node has children"
256 ofp.write("e\n")
257 for k, v in node.attributes.items():
258 value = v.value
259 if _token_rx.match(value):
260 dtype = "TOKEN"
261 else:
262 dtype = "CDATA"
263 ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
264 ofp.write("(%s\n" % gi)
265 write_esis(node, ofp, knownempty)
266 ofp.write(")%s\n" % gi)
267 elif nodeType == xml.dom.core.TEXT:
268 ofp.write("-%s\n" % esistools.encode(node.data))
269 else:
270 raise RuntimeError, "unsupported node type: %s" % nodeType
271
272
Fred Drake03204731998-11-23 17:02:03 +0000273def convert(ifp, ofp):
Fred Drake4db5b461998-12-01 19:03:01 +0000274 p = esistools.ExtendedEsisBuilder()
Fred Drake03204731998-11-23 17:02:03 +0000275 p.feed(ifp.read())
276 doc = p.document
Fred Drake1ff6db41998-11-23 23:10:35 +0000277 normalize(doc)
Fred Drake03204731998-11-23 17:02:03 +0000278 handle_args(doc)
Fred Drake03204731998-11-23 17:02:03 +0000279 simplify(doc)
280 handle_labels(doc)
Fred Drake4db5b461998-12-01 19:03:01 +0000281 handle_appendix(doc)
Fred Drake1ff6db41998-11-23 23:10:35 +0000282 fixup_trailing_whitespace(doc, {
283 "abstract": "\n",
284 "title": "",
285 "chapter": "\n\n",
286 "section": "\n\n",
287 "subsection": "\n\n",
288 "subsubsection": "\n\n",
289 "paragraph": "\n\n",
290 "subparagraph": "\n\n",
291 })
Fred Drake03204731998-11-23 17:02:03 +0000292 cleanup_root_text(doc)
Fred Drake1ff6db41998-11-23 23:10:35 +0000293 cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
Fred Drakefba0ba21998-12-10 05:07:09 +0000294 cleanup_synopses(doc)
Fred Drake4db5b461998-12-01 19:03:01 +0000295 #
296 d = {}
297 for gi in p.get_empties():
298 d[gi] = gi
299 knownempty = d.has_key
300 #
Fred Drake03204731998-11-23 17:02:03 +0000301 try:
Fred Drake4db5b461998-12-01 19:03:01 +0000302 write_esis(doc, ofp, knownempty)
Fred Drake03204731998-11-23 17:02:03 +0000303 except IOError, (err, msg):
304 # Ignore EPIPE; it just means that whoever we're writing to stopped
305 # reading. The rest of the output would be ignored. All other errors
306 # should still be reported,
307 if err != errno.EPIPE:
308 raise
309
310
311def main():
312 if len(sys.argv) == 1:
313 ifp = sys.stdin
314 ofp = sys.stdout
315 elif len(sys.argv) == 2:
316 ifp = open(sys.argv[1])
317 ofp = sys.stdout
318 elif len(sys.argv) == 3:
319 ifp = open(sys.argv[1])
320 ofp = open(sys.argv[2], "w")
321 else:
322 usage()
323 sys.exit(2)
324 convert(ifp, ofp)
325
326
327if __name__ == "__main__":
328 main()