blob: dff20760403563cc15dd8895d9f1529489b3da10 [file] [log] [blame]
Fred Drake03204731998-11-23 17:02:03 +00001#! /usr/bin/env python
2
3"""Promote the IDs from <label/> elements to the enclosing section / chapter /
4whatever, then remove the <label/> elements. This allows *ML style internal
5linking rather than the bogus LaTeX model.
6
7Note that <label/>s in <title> elements are promoted two steps, since the
8<title> elements are artificially created from the section parameter, and the
9label really refers to the sectioning construct.
10"""
11__version__ = '$Revision$'
12
13
14import errno
15import string
16import sys
17import xml.dom.core
18import xml.dom.esis_builder
19
20
21# Workaround to deal with invalid documents (multiple root elements). This
22# does not indicate a bug in the DOM implementation.
23#
24def get_documentElement(self):
25 docelem = None
26 for n in self._node.children:
27 if n.type == xml.dom.core.ELEMENT:
28 docelem = xml.dom.core.Element(n, self, self)
29 return docelem
30
31xml.dom.core.Document.get_documentElement = get_documentElement
32
33
34# Replace get_childNodes for the Document class; without this, children
35# accessed from the Document object via .childNodes (no matter how many
36# levels of access are used) will be given an ownerDocument of None.
37#
38def get_childNodes(self):
39 return xml.dom.core.NodeList(self._node.children, self, self)
40
41xml.dom.core.Document.get_childNodes = get_childNodes
42
43
44def get_first_element(doc, gi):
45 for n in doc.childNodes:
46 if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi:
47 return n
48
49def extract_first_element(doc, gi):
50 node = get_first_element(doc, gi)
51 if node is not None:
52 doc.removeChild(node)
53 return node
54
55
56def simplify(doc):
57 # Try to rationalize the document a bit, since these things are simply
58 # not valid SGML/XML documents as they stand, and need a little work.
59 documentclass = "document"
60 inputs = []
61 node = extract_first_element(doc, "documentclass")
62 if node is not None:
63 documentclass = node.getAttribute("classname")
64 node = extract_first_element(doc, "title")
65 if node is not None:
66 inputs.append(node)
67 # update the name of the root element
68 node = get_first_element(doc, "document")
69 if node is not None:
70 node._node.name = documentclass
71 while 1:
72 node = extract_first_element(doc, "input")
73 if node is None:
74 break
75 inputs.append(node)
76 if inputs:
77 docelem = doc.documentElement
78 inputs.reverse()
79 for node in inputs:
80 text = doc.createTextNode("\n")
81 docelem.insertBefore(text, docelem.firstChild)
82 docelem.insertBefore(node, text)
83 docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
84 while doc.firstChild.nodeType == xml.dom.core.TEXT:
85 doc.removeChild(doc.firstChild)
86
87
88def cleanup_root_text(doc):
89 discards = []
90 skip = 0
91 for n in doc.childNodes:
92 prevskip = skip
93 skip = 0
94 if n.nodeType == xml.dom.core.TEXT and not prevskip:
95 discards.append(n)
96 elif n.nodeType == xml.dom.core.COMMENT:
97 skip = 1
98 for node in discards:
99 doc.removeChild(node)
100
101
102def rewrite_desc_entries(doc, argname_gi):
103 argnodes = doc.getElementsByTagName(argname_gi)
104 for node in argnodes:
105 parent = node.parentNode
106 nodes = []
107 for n in parent.childNodes:
108 if n.nodeType != xml.dom.core.ELEMENT or n.tagName != argname_gi:
109 nodes.append(n)
110 desc = doc.createElement("description")
111 for n in nodes:
112 parent.removeChild(n)
113 desc.appendChild(n)
114 if node.childNodes:
115 # keep the <args>...</args>, newline & indent
116 parent.insertBefore(doc.createText("\n "), node)
117 else:
118 # no arguments, remove the <args/> node
119 parent.removeChild(node)
120 parent.appendChild(doc.createText("\n "))
121 parent.appendChild(desc)
122 parent.appendChild(doc.createText("\n"))
123
124def handle_args(doc):
125 rewrite_desc_entries(doc, "args")
126 rewrite_desc_entries(doc, "constructor-args")
127
128
129def handle_comments(doc, node=None):
130 if node is None:
131 node = doc
132 for n in node.childNodes:
133 if n.nodeType == xml.dom.core.ELEMENT:
134 if n.tagName == "COMMENT":
135 comment = doc.createComment(n.childNodes[0].data)
136 node.replaceChild(comment, n)
137 else:
138 handle_comments(doc, n)
139
140
141def handle_labels(doc):
142 labels = doc.getElementsByTagName("label")
143 for label in labels:
144 id = label.getAttribute("id")
145 if not id:
146 continue
147 parent = label.parentNode
148 if parent.tagName == "title":
149 parent.parentNode.setAttribute("id", id)
150 else:
151 parent.setAttribute("id", id)
152 # now, remove <label id="..."/> from parent:
153 parent.removeChild(label)
154
155
Fred Drake1ff6db41998-11-23 23:10:35 +0000156def fixup_trailing_whitespace(doc, wsmap):
157 queue = [doc]
158 while queue:
159 node = queue[0]
160 del queue[0]
161 if node.nodeType == xml.dom.core.ELEMENT \
162 and wsmap.has_key(node.tagName):
163 ws = wsmap[node.tagName]
164 children = node.childNodes
165 children.reverse()
166 if children[0].nodeType == xml.dom.core.TEXT:
167 data = string.rstrip(children[0].data) + ws
168 children[0].data = data
169 children.reverse()
170 # hack to get the title in place:
171 if node.tagName == "title" \
172 and node.parentNode.firstChild.nodeType == xml.dom.core.ELEMENT:
173 node.parentNode.insertBefore(doc.createText("\n "),
174 node.parentNode.firstChild)
175 for child in node.childNodes:
176 if child.nodeType == xml.dom.core.ELEMENT:
177 queue.append(child)
178
179
180def normalize(doc):
181 for node in doc.childNodes:
182 if node.nodeType == xml.dom.core.ELEMENT:
183 node.normalize()
184
185
186def cleanup_trailing_parens(doc, element_names):
187 d = {}
188 for gi in element_names:
189 d[gi] = gi
190 rewrite_element = d.has_key
191 queue = []
192 for node in doc.childNodes:
193 if node.nodeType == xml.dom.core.ELEMENT:
194 queue.append(node)
195 while queue:
196 node = queue[0]
197 del queue[0]
198 if rewrite_element(node.tagName):
199 children = node.childNodes
200 if len(children) == 1 \
201 and children[0].nodeType == xml.dom.core.TEXT:
202 data = children[0].data
203 if data[-2:] == "()":
204 children[0].data = data[:-2]
205 else:
206 for child in node.childNodes:
207 if child.nodeType == xml.dom.core.ELEMENT:
208 queue.append(child)
209
210
Fred Drake03204731998-11-23 17:02:03 +0000211def convert(ifp, ofp):
212 p = xml.dom.esis_builder.EsisBuilder()
213 p.feed(ifp.read())
214 doc = p.document
Fred Drake1ff6db41998-11-23 23:10:35 +0000215 normalize(doc)
Fred Drake03204731998-11-23 17:02:03 +0000216 handle_args(doc)
217 handle_comments(doc)
218 simplify(doc)
219 handle_labels(doc)
Fred Drake1ff6db41998-11-23 23:10:35 +0000220 fixup_trailing_whitespace(doc, {
221 "abstract": "\n",
222 "title": "",
223 "chapter": "\n\n",
224 "section": "\n\n",
225 "subsection": "\n\n",
226 "subsubsection": "\n\n",
227 "paragraph": "\n\n",
228 "subparagraph": "\n\n",
229 })
Fred Drake03204731998-11-23 17:02:03 +0000230 cleanup_root_text(doc)
Fred Drake1ff6db41998-11-23 23:10:35 +0000231 cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
Fred Drake03204731998-11-23 17:02:03 +0000232 try:
233 ofp.write(doc.toxml())
234 ofp.write("\n")
235 except IOError, (err, msg):
236 # Ignore EPIPE; it just means that whoever we're writing to stopped
237 # reading. The rest of the output would be ignored. All other errors
238 # should still be reported,
239 if err != errno.EPIPE:
240 raise
241
242
243def main():
244 if len(sys.argv) == 1:
245 ifp = sys.stdin
246 ofp = sys.stdout
247 elif len(sys.argv) == 2:
248 ifp = open(sys.argv[1])
249 ofp = sys.stdout
250 elif len(sys.argv) == 3:
251 ifp = open(sys.argv[1])
252 ofp = open(sys.argv[2], "w")
253 else:
254 usage()
255 sys.exit(2)
256 convert(ifp, ofp)
257
258
259if __name__ == "__main__":
260 main()