blob: 00d4727a1abeba92777ff5928518871b1eec2a1a [file] [log] [blame]
Fred Drake03204731998-11-23 17:02:03 +00001#! /usr/bin/env python
2
3"""Promote the IDs from <label/> elements to the enclosing section / chapter /
4whatever, then remove the <label/> elements. This allows *ML style internal
5linking rather than the bogus LaTeX model.
6
7Note that <label/>s in <title> elements are promoted two steps, since the
8<title> elements are artificially created from the section parameter, and the
9label really refers to the sectioning construct.
10"""
11__version__ = '$Revision$'
12
13
14import errno
15import string
16import sys
17import xml.dom.core
18import xml.dom.esis_builder
19
20
21# Workaround to deal with invalid documents (multiple root elements). This
22# does not indicate a bug in the DOM implementation.
23#
24def get_documentElement(self):
25 docelem = None
26 for n in self._node.children:
27 if n.type == xml.dom.core.ELEMENT:
28 docelem = xml.dom.core.Element(n, self, self)
29 return docelem
30
31xml.dom.core.Document.get_documentElement = get_documentElement
32
33
34# Replace get_childNodes for the Document class; without this, children
35# accessed from the Document object via .childNodes (no matter how many
36# levels of access are used) will be given an ownerDocument of None.
37#
38def get_childNodes(self):
39 return xml.dom.core.NodeList(self._node.children, self, self)
40
41xml.dom.core.Document.get_childNodes = get_childNodes
42
43
44def get_first_element(doc, gi):
45 for n in doc.childNodes:
46 if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi:
47 return n
48
49def extract_first_element(doc, gi):
50 node = get_first_element(doc, gi)
51 if node is not None:
52 doc.removeChild(node)
53 return node
54
55
56def simplify(doc):
57 # Try to rationalize the document a bit, since these things are simply
58 # not valid SGML/XML documents as they stand, and need a little work.
59 documentclass = "document"
60 inputs = []
61 node = extract_first_element(doc, "documentclass")
62 if node is not None:
63 documentclass = node.getAttribute("classname")
64 node = extract_first_element(doc, "title")
65 if node is not None:
66 inputs.append(node)
67 # update the name of the root element
68 node = get_first_element(doc, "document")
69 if node is not None:
70 node._node.name = documentclass
71 while 1:
72 node = extract_first_element(doc, "input")
73 if node is None:
74 break
75 inputs.append(node)
76 if inputs:
77 docelem = doc.documentElement
78 inputs.reverse()
79 for node in inputs:
80 text = doc.createTextNode("\n")
81 docelem.insertBefore(text, docelem.firstChild)
82 docelem.insertBefore(node, text)
83 docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
84 while doc.firstChild.nodeType == xml.dom.core.TEXT:
85 doc.removeChild(doc.firstChild)
86
87
88def cleanup_root_text(doc):
89 discards = []
90 skip = 0
91 for n in doc.childNodes:
92 prevskip = skip
93 skip = 0
94 if n.nodeType == xml.dom.core.TEXT and not prevskip:
95 discards.append(n)
96 elif n.nodeType == xml.dom.core.COMMENT:
97 skip = 1
98 for node in discards:
99 doc.removeChild(node)
100
101
102def rewrite_desc_entries(doc, argname_gi):
103 argnodes = doc.getElementsByTagName(argname_gi)
104 for node in argnodes:
105 parent = node.parentNode
106 nodes = []
107 for n in parent.childNodes:
108 if n.nodeType != xml.dom.core.ELEMENT or n.tagName != argname_gi:
109 nodes.append(n)
110 desc = doc.createElement("description")
111 for n in nodes:
112 parent.removeChild(n)
113 desc.appendChild(n)
114 if node.childNodes:
115 # keep the <args>...</args>, newline & indent
116 parent.insertBefore(doc.createText("\n "), node)
117 else:
118 # no arguments, remove the <args/> node
119 parent.removeChild(node)
120 parent.appendChild(doc.createText("\n "))
121 parent.appendChild(desc)
122 parent.appendChild(doc.createText("\n"))
123
124def handle_args(doc):
125 rewrite_desc_entries(doc, "args")
126 rewrite_desc_entries(doc, "constructor-args")
127
128
129def handle_comments(doc, node=None):
130 if node is None:
131 node = doc
132 for n in node.childNodes:
133 if n.nodeType == xml.dom.core.ELEMENT:
134 if n.tagName == "COMMENT":
135 comment = doc.createComment(n.childNodes[0].data)
136 node.replaceChild(comment, n)
137 else:
138 handle_comments(doc, n)
139
140
141def handle_labels(doc):
142 labels = doc.getElementsByTagName("label")
143 for label in labels:
144 id = label.getAttribute("id")
145 if not id:
146 continue
147 parent = label.parentNode
148 if parent.tagName == "title":
149 parent.parentNode.setAttribute("id", id)
150 else:
151 parent.setAttribute("id", id)
152 # now, remove <label id="..."/> from parent:
153 parent.removeChild(label)
154
155
156def convert(ifp, ofp):
157 p = xml.dom.esis_builder.EsisBuilder()
158 p.feed(ifp.read())
159 doc = p.document
160 handle_args(doc)
161 handle_comments(doc)
162 simplify(doc)
163 handle_labels(doc)
164 cleanup_root_text(doc)
165 try:
166 ofp.write(doc.toxml())
167 ofp.write("\n")
168 except IOError, (err, msg):
169 # Ignore EPIPE; it just means that whoever we're writing to stopped
170 # reading. The rest of the output would be ignored. All other errors
171 # should still be reported,
172 if err != errno.EPIPE:
173 raise
174
175
176def main():
177 if len(sys.argv) == 1:
178 ifp = sys.stdin
179 ofp = sys.stdout
180 elif len(sys.argv) == 2:
181 ifp = open(sys.argv[1])
182 ofp = sys.stdout
183 elif len(sys.argv) == 3:
184 ifp = open(sys.argv[1])
185 ofp = open(sys.argv[2], "w")
186 else:
187 usage()
188 sys.exit(2)
189 convert(ifp, ofp)
190
191
192if __name__ == "__main__":
193 main()