blob: a5ebe5f7e3a189dd336db0a07199801fe2bb597a [file] [log] [blame]
Fred Drake1f549022000-09-24 05:21:58 +00001"""\
Fred Drakef7cf40d2000-12-14 18:16:11 +00002minidom.py -- a lightweight DOM implementation.
Fred Drake55c38192000-06-29 19:39:57 +00003
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +00004parse("foo.xml")
Paul Prescod623511b2000-07-21 22:05:49 +00005
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +00006parseString("<foo><bar/></foo>")
Paul Prescod623511b2000-07-21 22:05:49 +00007
Fred Drake55c38192000-06-29 19:39:57 +00008Todo:
9=====
10 * convenience methods for getting elements and text.
11 * more testing
12 * bring some of the writer and linearizer code into conformance with this
13 interface
14 * SAX 2 namespaces
15"""
16
Fred Drakef7cf40d2000-12-14 18:16:11 +000017import xml.dom
Fred Drake55c38192000-06-29 19:39:57 +000018
Martin v. Löwis995359c2003-01-26 08:59:32 +000019from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg
Martin v. Löwis787354c2003-01-25 15:28:29 +000020from xml.dom.minicompat import *
21from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS
Fred Drake3ac6a092001-09-28 04:33:06 +000022
Martin v. Löwis787354c2003-01-25 15:28:29 +000023_TupleType = type(())
Fred Drake3ac6a092001-09-28 04:33:06 +000024
Martin v. Löwis787354c2003-01-25 15:28:29 +000025# This is used by the ID-cache invalidation checks; the list isn't
26# actually complete, since the nodes being checked will never be the
27# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is
28# the node being added or removed, not the node being modified.)
29#
30_nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE,
31 xml.dom.Node.ENTITY_REFERENCE_NODE)
Martin v. Löwis95700f72002-03-15 13:51:59 +000032
Fred Drake3ac6a092001-09-28 04:33:06 +000033
Martin v. Löwis787354c2003-01-25 15:28:29 +000034class Node(xml.dom.Node, GetattrMagic):
Martin v. Löwis126f2f62001-03-13 10:50:13 +000035 namespaceURI = None # this is non-null only for elements and attributes
Fred Drake575712e2001-09-28 20:25:45 +000036 parentNode = None
37 ownerDocument = None
Martin v. Löwis787354c2003-01-25 15:28:29 +000038 nextSibling = None
39 previousSibling = None
Martin v. Löwis52ce0d02001-01-27 08:47:37 +000040
Martin v. Löwis787354c2003-01-25 15:28:29 +000041 prefix = EMPTY_PREFIX # non-null only for NS elements and attributes
Fred Drake55c38192000-06-29 19:39:57 +000042
Fred Drake1f549022000-09-24 05:21:58 +000043 def __nonzero__(self):
Martin v. Löwis787354c2003-01-25 15:28:29 +000044 return True
Fred Drake55c38192000-06-29 19:39:57 +000045
Martin v. Löwis7d650ca2002-06-30 15:05:00 +000046 def toxml(self, encoding = None):
47 return self.toprettyxml("", "", encoding)
Fred Drake55c38192000-06-29 19:39:57 +000048
Martin v. Löwis7d650ca2002-06-30 15:05:00 +000049 def toprettyxml(self, indent="\t", newl="\n", encoding = None):
Martin v. Löwiscb67ea12001-03-31 16:30:40 +000050 # indent = the indentation string to prepend, per level
51 # newl = the newline string to append
52 writer = _get_StringIO()
Martin v. Löwis7d650ca2002-06-30 15:05:00 +000053 if encoding is not None:
54 import codecs
55 # Can't use codecs.getwriter to preserve 2.0 compatibility
56 writer = codecs.lookup(encoding)[3](writer)
57 if self.nodeType == Node.DOCUMENT_NODE:
58 # Can pass encoding only to document, to put it into XML header
59 self.writexml(writer, "", indent, newl, encoding)
60 else:
61 self.writexml(writer, "", indent, newl)
Martin v. Löwiscb67ea12001-03-31 16:30:40 +000062 return writer.getvalue()
Martin v. Löwis46fa39a2001-02-06 00:14:08 +000063
Fred Drake1f549022000-09-24 05:21:58 +000064 def hasChildNodes(self):
65 if self.childNodes:
Martin v. Löwis787354c2003-01-25 15:28:29 +000066 return True
Fred Drake1f549022000-09-24 05:21:58 +000067 else:
Martin v. Löwis787354c2003-01-25 15:28:29 +000068 return False
69
70 def _get_childNodes(self):
71 return self.childNodes
Fred Drake55c38192000-06-29 19:39:57 +000072
Fred Drake1f549022000-09-24 05:21:58 +000073 def _get_firstChild(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +000074 if self.childNodes:
75 return self.childNodes[0]
Paul Prescod73678da2000-07-01 04:58:47 +000076
Fred Drake1f549022000-09-24 05:21:58 +000077 def _get_lastChild(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +000078 if self.childNodes:
79 return self.childNodes[-1]
Paul Prescod73678da2000-07-01 04:58:47 +000080
Fred Drake1f549022000-09-24 05:21:58 +000081 def insertBefore(self, newChild, refChild):
Martin v. Löwis126f2f62001-03-13 10:50:13 +000082 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
Fred Drakee50959a2001-12-06 04:32:18 +000083 for c in tuple(newChild.childNodes):
Martin v. Löwis126f2f62001-03-13 10:50:13 +000084 self.insertBefore(c, refChild)
85 ### The DOM does not clearly specify what to return in this case
86 return newChild
Martin v. Löwis787354c2003-01-25 15:28:29 +000087 if newChild.nodeType not in self._child_node_types:
88 raise xml.dom.HierarchyRequestErr(
89 "%s cannot be child of %s" % (repr(newChild), repr(self)))
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +000090 if newChild.parentNode is not None:
91 newChild.parentNode.removeChild(newChild)
Fred Drake4ccf4a12000-11-21 22:02:22 +000092 if refChild is None:
93 self.appendChild(newChild)
94 else:
Martin v. Löwis787354c2003-01-25 15:28:29 +000095 try:
96 index = self.childNodes.index(refChild)
97 except ValueError:
98 raise xml.dom.NotFoundErr()
99 if newChild.nodeType in _nodeTypes_with_children:
100 _clear_id_cache(self)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000101 self.childNodes.insert(index, newChild)
102 newChild.nextSibling = refChild
103 refChild.previousSibling = newChild
104 if index:
105 node = self.childNodes[index-1]
106 node.nextSibling = newChild
107 newChild.previousSibling = node
108 else:
109 newChild.previousSibling = None
Martin v. Löwis787354c2003-01-25 15:28:29 +0000110 newChild.parentNode = self
Fred Drake4ccf4a12000-11-21 22:02:22 +0000111 return newChild
Fred Drake55c38192000-06-29 19:39:57 +0000112
Fred Drake1f549022000-09-24 05:21:58 +0000113 def appendChild(self, node):
Martin v. Löwis126f2f62001-03-13 10:50:13 +0000114 if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
Fred Drakee50959a2001-12-06 04:32:18 +0000115 for c in tuple(node.childNodes):
Martin v. Löwis126f2f62001-03-13 10:50:13 +0000116 self.appendChild(c)
117 ### The DOM does not clearly specify what to return in this case
118 return node
Martin v. Löwis787354c2003-01-25 15:28:29 +0000119 if node.nodeType not in self._child_node_types:
120 raise xml.dom.HierarchyRequestErr(
121 "%s cannot be child of %s" % (repr(node), repr(self)))
122 elif node.nodeType in _nodeTypes_with_children:
123 _clear_id_cache(self)
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000124 if node.parentNode is not None:
125 node.parentNode.removeChild(node)
Martin v. Löwis787354c2003-01-25 15:28:29 +0000126 _append_child(self, node)
Fred Drake13a30692000-10-09 20:04:16 +0000127 node.nextSibling = None
Paul Prescod73678da2000-07-01 04:58:47 +0000128 return node
129
Fred Drake1f549022000-09-24 05:21:58 +0000130 def replaceChild(self, newChild, oldChild):
Martin v. Löwis126f2f62001-03-13 10:50:13 +0000131 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
132 refChild = oldChild.nextSibling
133 self.removeChild(oldChild)
134 return self.insertBefore(newChild, refChild)
Martin v. Löwis787354c2003-01-25 15:28:29 +0000135 if newChild.nodeType not in self._child_node_types:
136 raise xml.dom.HierarchyRequestErr(
137 "%s cannot be child of %s" % (repr(newChild), repr(self)))
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000138 if newChild.parentNode is not None:
139 newChild.parentNode.removeChild(newChild)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000140 if newChild is oldChild:
141 return
Martin v. Löwis787354c2003-01-25 15:28:29 +0000142 try:
143 index = self.childNodes.index(oldChild)
144 except ValueError:
145 raise xml.dom.NotFoundErr()
Fred Drake4ccf4a12000-11-21 22:02:22 +0000146 self.childNodes[index] = newChild
Martin v. Löwis787354c2003-01-25 15:28:29 +0000147 newChild.parentNode = self
148 oldChild.parentNode = None
149 if (newChild.nodeType in _nodeTypes_with_children
150 or oldChild.nodeType in _nodeTypes_with_children):
151 _clear_id_cache(self)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000152 newChild.nextSibling = oldChild.nextSibling
153 newChild.previousSibling = oldChild.previousSibling
Martin v. Löwis156c3372000-12-28 18:40:56 +0000154 oldChild.nextSibling = None
Fred Drake4ccf4a12000-11-21 22:02:22 +0000155 oldChild.previousSibling = None
Martin v. Löwis156c3372000-12-28 18:40:56 +0000156 if newChild.previousSibling:
157 newChild.previousSibling.nextSibling = newChild
158 if newChild.nextSibling:
159 newChild.nextSibling.previousSibling = newChild
Fred Drake4ccf4a12000-11-21 22:02:22 +0000160 return oldChild
Paul Prescod73678da2000-07-01 04:58:47 +0000161
Fred Drake1f549022000-09-24 05:21:58 +0000162 def removeChild(self, oldChild):
Martin v. Löwis787354c2003-01-25 15:28:29 +0000163 try:
164 self.childNodes.remove(oldChild)
165 except ValueError:
166 raise xml.dom.NotFoundErr()
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000167 if oldChild.nextSibling is not None:
168 oldChild.nextSibling.previousSibling = oldChild.previousSibling
169 if oldChild.previousSibling is not None:
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000170 oldChild.previousSibling.nextSibling = oldChild.nextSibling
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000171 oldChild.nextSibling = oldChild.previousSibling = None
Martin v. Löwis787354c2003-01-25 15:28:29 +0000172 if oldChild.nodeType in _nodeTypes_with_children:
173 _clear_id_cache(self)
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000174
Martin v. Löwis787354c2003-01-25 15:28:29 +0000175 oldChild.parentNode = None
Fred Drake4ccf4a12000-11-21 22:02:22 +0000176 return oldChild
177
178 def normalize(self):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000179 L = []
180 for child in self.childNodes:
181 if child.nodeType == Node.TEXT_NODE:
182 data = child.data
183 if data and L and L[-1].nodeType == child.nodeType:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000184 # collapse text node
185 node = L[-1]
Martin v. Löwis787354c2003-01-25 15:28:29 +0000186 node.data = node.data + child.data
Fred Drake4ccf4a12000-11-21 22:02:22 +0000187 node.nextSibling = child.nextSibling
188 child.unlink()
Fred Drakef7cf40d2000-12-14 18:16:11 +0000189 elif data:
190 if L:
191 L[-1].nextSibling = child
192 child.previousSibling = L[-1]
193 else:
194 child.previousSibling = None
195 L.append(child)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000196 else:
Fred Drakef7cf40d2000-12-14 18:16:11 +0000197 # empty text node; discard
198 child.unlink()
199 else:
200 if L:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000201 L[-1].nextSibling = child
202 child.previousSibling = L[-1]
Fred Drakef7cf40d2000-12-14 18:16:11 +0000203 else:
204 child.previousSibling = None
205 L.append(child)
206 if child.nodeType == Node.ELEMENT_NODE:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000207 child.normalize()
Fred Drakef7cf40d2000-12-14 18:16:11 +0000208 self.childNodes[:] = L
Paul Prescod73678da2000-07-01 04:58:47 +0000209
Fred Drake1f549022000-09-24 05:21:58 +0000210 def cloneNode(self, deep):
Martin v. Löwis787354c2003-01-25 15:28:29 +0000211 return _clone_node(self, deep, self.ownerDocument or self)
Fred Drake55c38192000-06-29 19:39:57 +0000212
Martin v. Löwis787354c2003-01-25 15:28:29 +0000213 def isSupported(self, feature, version):
214 return self.ownerDocument.implementation.hasFeature(feature, version)
215
216 def _get_localName(self):
217 # Overridden in Element and Attr where localName can be Non-Null
218 return None
219
220 # Node interfaces from Level 3 (WD 9 April 2002)
Fred Drake25239772001-02-02 19:40:19 +0000221
222 def isSameNode(self, other):
223 return self is other
224
Martin v. Löwis787354c2003-01-25 15:28:29 +0000225 def getInterface(self, feature):
226 if self.isSupported(feature, None):
227 return self
228 else:
229 return None
230
231 # The "user data" functions use a dictionary that is only present
232 # if some user data has been set, so be careful not to assume it
233 # exists.
234
235 def getUserData(self, key):
236 try:
237 return self._user_data[key][0]
238 except (AttributeError, KeyError):
239 return None
240
241 def setUserData(self, key, data, handler):
242 old = None
243 try:
244 d = self._user_data
245 except AttributeError:
246 d = {}
247 self._user_data = d
248 if d.has_key(key):
249 old = d[key][0]
250 if data is None:
251 # ignore handlers passed for None
252 handler = None
253 if old is not None:
254 del d[key]
255 else:
256 d[key] = (data, handler)
257 return old
258
259 def _call_user_data_handler(self, operation, src, dst):
260 if hasattr(self, "_user_data"):
261 for key, (data, handler) in self._user_data.items():
262 if handler is not None:
263 handler.handle(operation, key, data, src, dst)
264
Fred Drake25239772001-02-02 19:40:19 +0000265 # minidom-specific API:
266
Fred Drake1f549022000-09-24 05:21:58 +0000267 def unlink(self):
Martin v. Löwis126f2f62001-03-13 10:50:13 +0000268 self.parentNode = self.ownerDocument = None
Martin v. Löwis787354c2003-01-25 15:28:29 +0000269 if self.childNodes:
270 for child in self.childNodes:
271 child.unlink()
272 self.childNodes = NodeList()
Paul Prescod4221ff02000-10-13 20:11:42 +0000273 self.previousSibling = None
274 self.nextSibling = None
Martin v. Löwis787354c2003-01-25 15:28:29 +0000275
276defproperty(Node, "firstChild", doc="First child node, or None.")
277defproperty(Node, "lastChild", doc="Last child node, or None.")
278defproperty(Node, "localName", doc="Namespace-local name of this node.")
279
280
281def _append_child(self, node):
282 # fast path with less checks; usable by DOM builders if careful
283 childNodes = self.childNodes
284 if childNodes:
285 last = childNodes[-1]
286 node.__dict__["previousSibling"] = last
287 last.__dict__["nextSibling"] = node
288 childNodes.append(node)
289 node.__dict__["parentNode"] = self
290
291def _in_document(node):
292 # return True iff node is part of a document tree
293 while node is not None:
294 if node.nodeType == Node.DOCUMENT_NODE:
295 return True
296 node = node.parentNode
297 return False
Fred Drake55c38192000-06-29 19:39:57 +0000298
Fred Drake1f549022000-09-24 05:21:58 +0000299def _write_data(writer, data):
Fred Drake55c38192000-06-29 19:39:57 +0000300 "Writes datachars to writer."
Martin v. Löwis787354c2003-01-25 15:28:29 +0000301 data = data.replace("&", "&amp;").replace("<", "&lt;")
302 data = data.replace("\"", "&quot;").replace(">", "&gt;")
Fred Drake55c38192000-06-29 19:39:57 +0000303 writer.write(data)
304
Martin v. Löwis787354c2003-01-25 15:28:29 +0000305def _get_elements_by_tagName_helper(parent, name, rc):
Fred Drake55c38192000-06-29 19:39:57 +0000306 for node in parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +0000307 if node.nodeType == Node.ELEMENT_NODE and \
308 (name == "*" or node.tagName == name):
309 rc.append(node)
Martin v. Löwis787354c2003-01-25 15:28:29 +0000310 _get_elements_by_tagName_helper(node, name, rc)
Fred Drake55c38192000-06-29 19:39:57 +0000311 return rc
312
Martin v. Löwis787354c2003-01-25 15:28:29 +0000313def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc):
Fred Drake55c38192000-06-29 19:39:57 +0000314 for node in parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +0000315 if node.nodeType == Node.ELEMENT_NODE:
Martin v. Löwised525fb2001-06-03 14:06:42 +0000316 if ((localName == "*" or node.localName == localName) and
Fred Drake1f549022000-09-24 05:21:58 +0000317 (nsURI == "*" or node.namespaceURI == nsURI)):
318 rc.append(node)
Martin v. Löwis787354c2003-01-25 15:28:29 +0000319 _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc)
Fred Drakef7cf40d2000-12-14 18:16:11 +0000320 return rc
Fred Drake55c38192000-06-29 19:39:57 +0000321
Martin v. Löwis126f2f62001-03-13 10:50:13 +0000322class DocumentFragment(Node):
323 nodeType = Node.DOCUMENT_FRAGMENT_NODE
324 nodeName = "#document-fragment"
325 nodeValue = None
326 attributes = None
327 parentNode = None
Martin v. Löwis787354c2003-01-25 15:28:29 +0000328 _child_node_types = (Node.ELEMENT_NODE,
329 Node.TEXT_NODE,
330 Node.CDATA_SECTION_NODE,
331 Node.ENTITY_REFERENCE_NODE,
332 Node.PROCESSING_INSTRUCTION_NODE,
333 Node.COMMENT_NODE,
334 Node.NOTATION_NODE)
335
336 def __init__(self):
337 self.childNodes = NodeList()
Martin v. Löwis126f2f62001-03-13 10:50:13 +0000338
339
Fred Drake55c38192000-06-29 19:39:57 +0000340class Attr(Node):
Fred Drake1f549022000-09-24 05:21:58 +0000341 nodeType = Node.ATTRIBUTE_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000342 attributes = None
343 ownerElement = None
Martin v. Löwis787354c2003-01-25 15:28:29 +0000344 specified = False
345 _is_id = False
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000346
Martin v. Löwis787354c2003-01-25 15:28:29 +0000347 _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
348
349 def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
350 prefix=None):
Fred Drake55c38192000-06-29 19:39:57 +0000351 # skip setattr for performance
Fred Drake4ccf4a12000-11-21 22:02:22 +0000352 d = self.__dict__
Fred Drake4ccf4a12000-11-21 22:02:22 +0000353 d["nodeName"] = d["name"] = qName
354 d["namespaceURI"] = namespaceURI
355 d["prefix"] = prefix
Martin v. Löwis787354c2003-01-25 15:28:29 +0000356 d['childNodes'] = NodeList()
357
358 # Add the single child node that represents the value of the attr
359 self.childNodes.append(Text())
360
Paul Prescod73678da2000-07-01 04:58:47 +0000361 # nodeValue and value are set elsewhere
Fred Drake55c38192000-06-29 19:39:57 +0000362
Martin v. Löwis787354c2003-01-25 15:28:29 +0000363 def _get_localName(self):
364 return self.nodeName.split(":", 1)[-1]
365
366 def _get_name(self):
367 return self.name
368
369 def _get_specified(self):
370 return self.specified
371
Fred Drake1f549022000-09-24 05:21:58 +0000372 def __setattr__(self, name, value):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000373 d = self.__dict__
Fred Drake1f549022000-09-24 05:21:58 +0000374 if name in ("value", "nodeValue"):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000375 d["value"] = d["nodeValue"] = value
Martin v. Löwis787354c2003-01-25 15:28:29 +0000376 d2 = self.childNodes[0].__dict__
377 d2["data"] = d2["nodeValue"] = value
378 if self.ownerElement is not None:
379 _clear_id_cache(self.ownerElement)
Fred Drakef7cf40d2000-12-14 18:16:11 +0000380 elif name in ("name", "nodeName"):
381 d["name"] = d["nodeName"] = value
Martin v. Löwis787354c2003-01-25 15:28:29 +0000382 if self.ownerElement is not None:
383 _clear_id_cache(self.ownerElement)
Fred Drake55c38192000-06-29 19:39:57 +0000384 else:
Fred Drakef7cf40d2000-12-14 18:16:11 +0000385 d[name] = value
Fred Drake55c38192000-06-29 19:39:57 +0000386
Martin v. Löwis995359c2003-01-26 08:59:32 +0000387 def _set_prefix(self, prefix):
Martin v. Löwis787354c2003-01-25 15:28:29 +0000388 nsuri = self.namespaceURI
Martin v. Löwis995359c2003-01-26 08:59:32 +0000389 if prefix == "xmlns":
390 if nsuri and nsuri != XMLNS_NAMESPACE:
Martin v. Löwis787354c2003-01-25 15:28:29 +0000391 raise xml.dom.NamespaceErr(
392 "illegal use of 'xmlns' prefix for the wrong namespace")
393 d = self.__dict__
394 d['prefix'] = prefix
395 if prefix is None:
396 newName = self.localName
397 else:
Martin v. Löwis995359c2003-01-26 08:59:32 +0000398 newName = "%s:%s" % (prefix, self.localName)
Martin v. Löwis787354c2003-01-25 15:28:29 +0000399 if self.ownerElement:
400 _clear_id_cache(self.ownerElement)
401 d['nodeName'] = d['name'] = newName
402
403 def _set_value(self, value):
404 d = self.__dict__
405 d['value'] = d['nodeValue'] = value
406 if self.ownerElement:
407 _clear_id_cache(self.ownerElement)
408 self.childNodes[0].data = value
409
410 def unlink(self):
411 # This implementation does not call the base implementation
412 # since most of that is not needed, and the expense of the
413 # method call is not warranted. We duplicate the removal of
414 # children, but that's all we needed from the base class.
415 elem = self.ownerElement
416 if elem is not None:
417 del elem._attrs[self.nodeName]
418 del elem._attrsNS[(self.namespaceURI, self.localName)]
419 if self._is_id:
420 self._is_id = False
421 elem._magic_id_nodes -= 1
422 self.ownerDocument._magic_id_count -= 1
423 for child in self.childNodes:
424 child.unlink()
425 del self.childNodes[:]
426
427 def _get_isId(self):
428 if self._is_id:
429 return True
430 doc = self.ownerDocument
431 elem = self.ownerElement
432 if doc is None or elem is None:
433 return False
434
435 info = doc._get_elem_info(elem)
436 if info is None:
437 return False
438 if self.namespaceURI:
439 return info.isIdNS(self.namespaceURI, self.localName)
440 else:
441 return info.isId(self.nodeName)
442
443 def _get_schemaType(self):
444 doc = self.ownerDocument
445 elem = self.ownerElement
446 if doc is None or elem is None:
447 return _no_type
448
449 info = doc._get_elem_info(elem)
450 if info is None:
451 return _no_type
452 if self.namespaceURI:
453 return info.getAttributeTypeNS(self.namespaceURI, self.localName)
454 else:
455 return info.getAttributeType(self.nodeName)
456
457defproperty(Attr, "isId", doc="True if this attribute is an ID.")
458defproperty(Attr, "localName", doc="Namespace-local name of this attribute.")
459defproperty(Attr, "schemaType", doc="Schema type for this attribute.")
Fred Drake4ccf4a12000-11-21 22:02:22 +0000460
Fred Drakef7cf40d2000-12-14 18:16:11 +0000461
Martin v. Löwis787354c2003-01-25 15:28:29 +0000462class NamedNodeMap(NewStyle, GetattrMagic):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000463 """The attribute list is a transient interface to the underlying
464 dictionaries. Mutations here will change the underlying element's
Fred Drakef7cf40d2000-12-14 18:16:11 +0000465 dictionary.
466
467 Ordering is imposed artificially and does not reflect the order of
468 attributes as found in an input document.
469 """
Fred Drake4ccf4a12000-11-21 22:02:22 +0000470
Martin v. Löwis787354c2003-01-25 15:28:29 +0000471 __slots__ = ('_attrs', '_attrsNS', '_ownerElement')
472
Fred Drake2998a552001-12-06 18:27:48 +0000473 def __init__(self, attrs, attrsNS, ownerElement):
Fred Drake1f549022000-09-24 05:21:58 +0000474 self._attrs = attrs
475 self._attrsNS = attrsNS
Fred Drake2998a552001-12-06 18:27:48 +0000476 self._ownerElement = ownerElement
Fred Drakef7cf40d2000-12-14 18:16:11 +0000477
Martin v. Löwis787354c2003-01-25 15:28:29 +0000478 def _get_length(self):
479 return len(self._attrs)
Fred Drake55c38192000-06-29 19:39:57 +0000480
Fred Drake1f549022000-09-24 05:21:58 +0000481 def item(self, index):
Fred Drake55c38192000-06-29 19:39:57 +0000482 try:
Fred Drakef7cf40d2000-12-14 18:16:11 +0000483 return self[self._attrs.keys()[index]]
Fred Drake55c38192000-06-29 19:39:57 +0000484 except IndexError:
485 return None
Fred Drake55c38192000-06-29 19:39:57 +0000486
Fred Drake1f549022000-09-24 05:21:58 +0000487 def items(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000488 L = []
489 for node in self._attrs.values():
Martin v. Löwisd5fb58f2001-01-27 08:38:34 +0000490 L.append((node.nodeName, node.value))
Fred Drake4ccf4a12000-11-21 22:02:22 +0000491 return L
Fred Drake1f549022000-09-24 05:21:58 +0000492
493 def itemsNS(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000494 L = []
495 for node in self._attrs.values():
Fred Drake49a5d032001-11-30 22:21:58 +0000496 L.append(((node.namespaceURI, node.localName), node.value))
Fred Drake4ccf4a12000-11-21 22:02:22 +0000497 return L
Fred Drake16f63292000-10-23 18:09:50 +0000498
Martin v. Löwis787354c2003-01-25 15:28:29 +0000499 def has_key(self, key):
500 if isinstance(key, StringTypes):
501 return self._attrs.has_key(key)
502 else:
503 return self._attrsNS.has_key(key)
504
Fred Drake1f549022000-09-24 05:21:58 +0000505 def keys(self):
Paul Prescod73678da2000-07-01 04:58:47 +0000506 return self._attrs.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000507
Fred Drake1f549022000-09-24 05:21:58 +0000508 def keysNS(self):
Paul Prescod73678da2000-07-01 04:58:47 +0000509 return self._attrsNS.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000510
Fred Drake1f549022000-09-24 05:21:58 +0000511 def values(self):
Paul Prescod73678da2000-07-01 04:58:47 +0000512 return self._attrs.values()
Fred Drake55c38192000-06-29 19:39:57 +0000513
Martin v. Löwis787354c2003-01-25 15:28:29 +0000514 def get(self, name, value=None):
Martin v. Löwisd5fb58f2001-01-27 08:38:34 +0000515 return self._attrs.get(name, value)
516
Martin v. Löwis787354c2003-01-25 15:28:29 +0000517 __len__ = _get_length
Fred Drake55c38192000-06-29 19:39:57 +0000518
Fred Drake1f549022000-09-24 05:21:58 +0000519 def __cmp__(self, other):
520 if self._attrs is getattr(other, "_attrs", None):
Fred Drake55c38192000-06-29 19:39:57 +0000521 return 0
Fred Drake16f63292000-10-23 18:09:50 +0000522 else:
Fred Drake1f549022000-09-24 05:21:58 +0000523 return cmp(id(self), id(other))
Fred Drake55c38192000-06-29 19:39:57 +0000524
Fred Drake1f549022000-09-24 05:21:58 +0000525 def __getitem__(self, attname_or_tuple):
Martin v. Löwis787354c2003-01-25 15:28:29 +0000526 if isinstance(attname_or_tuple, _TupleType):
Paul Prescod73678da2000-07-01 04:58:47 +0000527 return self._attrsNS[attname_or_tuple]
Fred Drake55c38192000-06-29 19:39:57 +0000528 else:
Paul Prescod73678da2000-07-01 04:58:47 +0000529 return self._attrs[attname_or_tuple]
Fred Drake55c38192000-06-29 19:39:57 +0000530
Paul Prescod1e688272000-07-01 19:21:47 +0000531 # same as set
Fred Drake1f549022000-09-24 05:21:58 +0000532 def __setitem__(self, attname, value):
Martin v. Löwis787354c2003-01-25 15:28:29 +0000533 if isinstance(value, StringTypes):
534 try:
535 node = self._attrs[attname]
536 except KeyError:
537 node = Attr(attname)
538 node.ownerDocument = self._ownerElement.ownerDocument
Martin v. Löwis995359c2003-01-26 08:59:32 +0000539 self.setNamedItem(node)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000540 node.value = value
Paul Prescod1e688272000-07-01 19:21:47 +0000541 else:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000542 if not isinstance(value, Attr):
543 raise TypeError, "value must be a string or Attr object"
Fred Drake1f549022000-09-24 05:21:58 +0000544 node = value
Martin v. Löwis787354c2003-01-25 15:28:29 +0000545 self.setNamedItem(node)
546
547 def getNamedItem(self, name):
548 try:
549 return self._attrs[name]
550 except KeyError:
551 return None
552
553 def getNamedItemNS(self, namespaceURI, localName):
554 try:
555 return self._attrsNS[(namespaceURI, localName)]
556 except KeyError:
557 return None
558
559 def removeNamedItem(self, name):
560 n = self.getNamedItem(name)
561 if n is not None:
562 _clear_id_cache(self._ownerElement)
563 del self._attrs[n.nodeName]
564 del self._attrsNS[(n.namespaceURI, n.localName)]
565 if n.__dict__.has_key('ownerElement'):
566 n.__dict__['ownerElement'] = None
567 return n
568 else:
569 raise xml.dom.NotFoundErr()
570
571 def removeNamedItemNS(self, namespaceURI, localName):
572 n = self.getNamedItemNS(namespaceURI, localName)
573 if n is not None:
574 _clear_id_cache(self._ownerElement)
575 del self._attrsNS[(n.namespaceURI, n.localName)]
576 del self._attrs[n.nodeName]
577 if n.__dict__.has_key('ownerElement'):
578 n.__dict__['ownerElement'] = None
579 return n
580 else:
581 raise xml.dom.NotFoundErr()
Fred Drakef7cf40d2000-12-14 18:16:11 +0000582
583 def setNamedItem(self, node):
Andrew M. Kuchlingbc8f72c2001-02-21 01:30:26 +0000584 if not isinstance(node, Attr):
Martin v. Löwis787354c2003-01-25 15:28:29 +0000585 raise xml.dom.HierarchyRequestErr(
586 "%s cannot be child of %s" % (repr(node), repr(self)))
Fred Drakef7cf40d2000-12-14 18:16:11 +0000587 old = self._attrs.get(node.name)
Paul Prescod1e688272000-07-01 19:21:47 +0000588 if old:
589 old.unlink()
Fred Drake1f549022000-09-24 05:21:58 +0000590 self._attrs[node.name] = node
591 self._attrsNS[(node.namespaceURI, node.localName)] = node
Fred Drake2998a552001-12-06 18:27:48 +0000592 node.ownerElement = self._ownerElement
Martin v. Löwis787354c2003-01-25 15:28:29 +0000593 _clear_id_cache(node.ownerElement)
Fred Drakef7cf40d2000-12-14 18:16:11 +0000594 return old
595
596 def setNamedItemNS(self, node):
597 return self.setNamedItem(node)
Paul Prescod73678da2000-07-01 04:58:47 +0000598
Fred Drake1f549022000-09-24 05:21:58 +0000599 def __delitem__(self, attname_or_tuple):
600 node = self[attname_or_tuple]
Martin v. Löwis787354c2003-01-25 15:28:29 +0000601 _clear_id_cache(node.ownerElement)
Paul Prescod73678da2000-07-01 04:58:47 +0000602 node.unlink()
Martin v. Löwis787354c2003-01-25 15:28:29 +0000603
604 def __getstate__(self):
605 return self._attrs, self._attrsNS, self._ownerElement
606
607 def __setstate__(self, state):
608 self._attrs, self._attrsNS, self._ownerElement = state
609
610defproperty(NamedNodeMap, "length",
611 doc="Number of nodes in the NamedNodeMap.")
Fred Drakef7cf40d2000-12-14 18:16:11 +0000612
613AttributeList = NamedNodeMap
614
Fred Drake1f549022000-09-24 05:21:58 +0000615
Martin v. Löwis787354c2003-01-25 15:28:29 +0000616class TypeInfo(NewStyle):
617 __slots__ = 'namespace', 'name'
618
619 def __init__(self, namespace, name):
620 self.namespace = namespace
621 self.name = name
622
623 def __repr__(self):
624 if self.namespace:
625 return "<TypeInfo %s (from %s)>" % (`self.name`, `self.namespace`)
626 else:
627 return "<TypeInfo %s>" % `self.name`
628
629 def _get_name(self):
630 return self.name
631
632 def _get_namespace(self):
633 return self.namespace
634
635_no_type = TypeInfo(None, None)
636
Martin v. Löwisa2fda0d2000-10-07 12:10:28 +0000637class Element(Node):
Fred Drake1f549022000-09-24 05:21:58 +0000638 nodeType = Node.ELEMENT_NODE
Martin v. Löwis787354c2003-01-25 15:28:29 +0000639 nodeValue = None
640 schemaType = _no_type
641
642 _magic_id_nodes = 0
643
644 _child_node_types = (Node.ELEMENT_NODE,
645 Node.PROCESSING_INSTRUCTION_NODE,
646 Node.COMMENT_NODE,
647 Node.TEXT_NODE,
648 Node.CDATA_SECTION_NODE,
649 Node.ENTITY_REFERENCE_NODE)
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000650
Fred Drake49a5d032001-11-30 22:21:58 +0000651 def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
Fred Drake1f549022000-09-24 05:21:58 +0000652 localName=None):
Fred Drake55c38192000-06-29 19:39:57 +0000653 self.tagName = self.nodeName = tagName
Fred Drake1f549022000-09-24 05:21:58 +0000654 self.prefix = prefix
655 self.namespaceURI = namespaceURI
Martin v. Löwis787354c2003-01-25 15:28:29 +0000656 self.childNodes = NodeList()
Fred Drake55c38192000-06-29 19:39:57 +0000657
Fred Drake4ccf4a12000-11-21 22:02:22 +0000658 self._attrs = {} # attributes are double-indexed:
659 self._attrsNS = {} # tagName -> Attribute
660 # URI,localName -> Attribute
661 # in the future: consider lazy generation
662 # of attribute objects this is too tricky
663 # for now because of headaches with
664 # namespaces.
665
Martin v. Löwis787354c2003-01-25 15:28:29 +0000666 def _get_localName(self):
667 return self.tagName.split(":", 1)[-1]
668
669 def _get_tagName(self):
670 return self.tagName
Fred Drake4ccf4a12000-11-21 22:02:22 +0000671
672 def unlink(self):
673 for attr in self._attrs.values():
674 attr.unlink()
675 self._attrs = None
676 self._attrsNS = None
677 Node.unlink(self)
Fred Drake55c38192000-06-29 19:39:57 +0000678
Fred Drake1f549022000-09-24 05:21:58 +0000679 def getAttribute(self, attname):
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000680 try:
681 return self._attrs[attname].value
682 except KeyError:
683 return ""
Fred Drake55c38192000-06-29 19:39:57 +0000684
Fred Drake1f549022000-09-24 05:21:58 +0000685 def getAttributeNS(self, namespaceURI, localName):
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000686 try:
687 return self._attrsNS[(namespaceURI, localName)].value
688 except KeyError:
689 return ""
Fred Drake1f549022000-09-24 05:21:58 +0000690
691 def setAttribute(self, attname, value):
Martin v. Löwis787354c2003-01-25 15:28:29 +0000692 attr = self.getAttributeNode(attname)
693 if attr is None:
694 attr = Attr(attname)
695 # for performance
696 d = attr.__dict__
697 d["value"] = d["nodeValue"] = value
698 d["ownerDocument"] = self.ownerDocument
699 self.setAttributeNode(attr)
700 elif value != attr.value:
701 d = attr.__dict__
702 d["value"] = d["nodeValue"] = value
703 if attr.isId:
704 _clear_id_cache(self)
Fred Drake55c38192000-06-29 19:39:57 +0000705
Fred Drake1f549022000-09-24 05:21:58 +0000706 def setAttributeNS(self, namespaceURI, qualifiedName, value):
707 prefix, localname = _nssplit(qualifiedName)
Martin v. Löwis787354c2003-01-25 15:28:29 +0000708 attr = self.getAttributeNodeNS(namespaceURI, localname)
709 if attr is None:
710 # for performance
711 attr = Attr(qualifiedName, namespaceURI, localname, prefix)
712 d = attr.__dict__
713 d["prefix"] = prefix
714 d["nodeName"] = qualifiedName
715 d["value"] = d["nodeValue"] = value
716 d["ownerDocument"] = self.ownerDocument
717 self.setAttributeNode(attr)
718 else:
719 d = attr.__dict__
720 if value != attr.value:
721 d["value"] = d["nodeValue"] = value
722 if attr.isId:
723 _clear_id_cache(self)
724 if attr.prefix != prefix:
725 d["prefix"] = prefix
726 d["nodeName"] = qualifiedName
Fred Drake55c38192000-06-29 19:39:57 +0000727
Fred Drake1f549022000-09-24 05:21:58 +0000728 def getAttributeNode(self, attrname):
729 return self._attrs.get(attrname)
Paul Prescod73678da2000-07-01 04:58:47 +0000730
Fred Drake1f549022000-09-24 05:21:58 +0000731 def getAttributeNodeNS(self, namespaceURI, localName):
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000732 return self._attrsNS.get((namespaceURI, localName))
Paul Prescod73678da2000-07-01 04:58:47 +0000733
Fred Drake1f549022000-09-24 05:21:58 +0000734 def setAttributeNode(self, attr):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000735 if attr.ownerElement not in (None, self):
Martin v. Löwisd5fb58f2001-01-27 08:38:34 +0000736 raise xml.dom.InuseAttributeErr("attribute node already owned")
Martin v. Löwis787354c2003-01-25 15:28:29 +0000737 old1 = self._attrs.get(attr.name, None)
738 if old1 is not None:
739 self.removeAttributeNode(old1)
740 old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None)
741 if old2 is not None and old2 is not old1:
742 self.removeAttributeNode(old2)
743 _set_attribute_node(self, attr)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000744
Martin v. Löwis787354c2003-01-25 15:28:29 +0000745 if old1 is not attr:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000746 # It might have already been part of this node, in which case
747 # it doesn't represent a change, and should not be returned.
Martin v. Löwis787354c2003-01-25 15:28:29 +0000748 return old1
749 if old2 is not attr:
750 return old2
Fred Drake55c38192000-06-29 19:39:57 +0000751
Martin v. Löwis126f2f62001-03-13 10:50:13 +0000752 setAttributeNodeNS = setAttributeNode
753
Fred Drake1f549022000-09-24 05:21:58 +0000754 def removeAttribute(self, name):
Martin v. Löwis787354c2003-01-25 15:28:29 +0000755 try:
756 attr = self._attrs[name]
757 except KeyError:
758 raise xml.dom.NotFoundErr()
Fred Drake1f549022000-09-24 05:21:58 +0000759 self.removeAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000760
Fred Drake1f549022000-09-24 05:21:58 +0000761 def removeAttributeNS(self, namespaceURI, localName):
Martin v. Löwis787354c2003-01-25 15:28:29 +0000762 try:
763 attr = self._attrsNS[(namespaceURI, localName)]
764 except KeyError:
765 raise xml.dom.NotFoundErr()
Fred Drake1f549022000-09-24 05:21:58 +0000766 self.removeAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000767
Fred Drake1f549022000-09-24 05:21:58 +0000768 def removeAttributeNode(self, node):
Martin v. Löwis787354c2003-01-25 15:28:29 +0000769 if node is None:
770 raise xml.dom.NotFoundErr()
771 try:
772 self._attrs[node.name]
773 except KeyError:
774 raise xml.dom.NotFoundErr()
775 _clear_id_cache(self)
Paul Prescod73678da2000-07-01 04:58:47 +0000776 node.unlink()
Martin v. Löwis787354c2003-01-25 15:28:29 +0000777 # Restore this since the node is still useful and otherwise
778 # unlinked
779 node.ownerDocument = self.ownerDocument
Fred Drake16f63292000-10-23 18:09:50 +0000780
Martin v. Löwis126f2f62001-03-13 10:50:13 +0000781 removeAttributeNodeNS = removeAttributeNode
782
Martin v. Löwis156c3372000-12-28 18:40:56 +0000783 def hasAttribute(self, name):
784 return self._attrs.has_key(name)
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000785
Martin v. Löwis156c3372000-12-28 18:40:56 +0000786 def hasAttributeNS(self, namespaceURI, localName):
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000787 return self._attrsNS.has_key((namespaceURI, localName))
788
Fred Drake1f549022000-09-24 05:21:58 +0000789 def getElementsByTagName(self, name):
Martin v. Löwis787354c2003-01-25 15:28:29 +0000790 return _get_elements_by_tagName_helper(self, name, NodeList())
Fred Drake55c38192000-06-29 19:39:57 +0000791
Fred Drake1f549022000-09-24 05:21:58 +0000792 def getElementsByTagNameNS(self, namespaceURI, localName):
Martin v. Löwis787354c2003-01-25 15:28:29 +0000793 return _get_elements_by_tagName_ns_helper(
794 self, namespaceURI, localName, NodeList())
Fred Drake55c38192000-06-29 19:39:57 +0000795
Fred Drake1f549022000-09-24 05:21:58 +0000796 def __repr__(self):
Martin v. Löwis787354c2003-01-25 15:28:29 +0000797 return "<DOM Element: %s at %#x>" % (self.tagName, id(self))
Fred Drake55c38192000-06-29 19:39:57 +0000798
Martin v. Löwis46fa39a2001-02-06 00:14:08 +0000799 def writexml(self, writer, indent="", addindent="", newl=""):
800 # indent = current indentation
801 # addindent = indentation to add to higher levels
802 # newl = newline string
803 writer.write(indent+"<" + self.tagName)
Fred Drake16f63292000-10-23 18:09:50 +0000804
Fred Drake4ccf4a12000-11-21 22:02:22 +0000805 attrs = self._get_attributes()
806 a_names = attrs.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000807 a_names.sort()
808
809 for a_name in a_names:
Fred Drake1f549022000-09-24 05:21:58 +0000810 writer.write(" %s=\"" % a_name)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000811 _write_data(writer, attrs[a_name].value)
Fred Drake55c38192000-06-29 19:39:57 +0000812 writer.write("\"")
813 if self.childNodes:
Martin v. Löwis46fa39a2001-02-06 00:14:08 +0000814 writer.write(">%s"%(newl))
Fred Drake55c38192000-06-29 19:39:57 +0000815 for node in self.childNodes:
Martin v. Löwis46fa39a2001-02-06 00:14:08 +0000816 node.writexml(writer,indent+addindent,addindent,newl)
817 writer.write("%s</%s>%s" % (indent,self.tagName,newl))
Fred Drake55c38192000-06-29 19:39:57 +0000818 else:
Martin v. Löwis46fa39a2001-02-06 00:14:08 +0000819 writer.write("/>%s"%(newl))
Fred Drake55c38192000-06-29 19:39:57 +0000820
Fred Drake1f549022000-09-24 05:21:58 +0000821 def _get_attributes(self):
Fred Drake2998a552001-12-06 18:27:48 +0000822 return NamedNodeMap(self._attrs, self._attrsNS, self)
Fred Drake55c38192000-06-29 19:39:57 +0000823
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000824 def hasAttributes(self):
Martin v. Löwis787354c2003-01-25 15:28:29 +0000825 if self._attrs:
826 return True
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000827 else:
Martin v. Löwis787354c2003-01-25 15:28:29 +0000828 return False
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000829
Martin v. Löwis787354c2003-01-25 15:28:29 +0000830 # DOM Level 3 attributes, based on the 22 Oct 2002 draft
831
832 def setIdAttribute(self, name):
833 idAttr = self.getAttributeNode(name)
834 self.setIdAttributeNode(idAttr)
835
836 def setIdAttributeNS(self, namespaceURI, localName):
837 idAttr = self.getAttributeNodeNS(namespaceURI, localName)
838 self.setIdAttributeNode(idAttr)
839
840 def setIdAttributeNode(self, idAttr):
841 if idAttr is None or not self.isSameNode(idAttr.ownerElement):
842 raise xml.dom.NotFoundErr()
843 if _get_containing_entref(self) is not None:
844 raise xml.dom.NoModificationAllowedErr()
845 if not idAttr._is_id:
846 idAttr.__dict__['_is_id'] = True
847 self._magic_id_nodes += 1
848 self.ownerDocument._magic_id_count += 1
849 _clear_id_cache(self)
850
851defproperty(Element, "attributes",
852 doc="NamedNodeMap of attributes on the element.")
853defproperty(Element, "localName",
854 doc="Namespace-local name of this element.")
855
856
857def _set_attribute_node(element, attr):
858 _clear_id_cache(element)
859 element._attrs[attr.name] = attr
860 element._attrsNS[(attr.namespaceURI, attr.localName)] = attr
861
862 # This creates a circular reference, but Element.unlink()
863 # breaks the cycle since the references to the attribute
864 # dictionaries are tossed.
865 attr.__dict__['ownerElement'] = element
866
867
868class Childless:
869 """Mixin that makes childless-ness easy to implement and avoids
870 the complexity of the Node methods that deal with children.
871 """
872
Fred Drake4ccf4a12000-11-21 22:02:22 +0000873 attributes = None
Martin v. Löwis787354c2003-01-25 15:28:29 +0000874 childNodes = EmptyNodeList()
875 firstChild = None
876 lastChild = None
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000877
Martin v. Löwis787354c2003-01-25 15:28:29 +0000878 def _get_firstChild(self):
879 return None
Fred Drake55c38192000-06-29 19:39:57 +0000880
Martin v. Löwis787354c2003-01-25 15:28:29 +0000881 def _get_lastChild(self):
882 return None
Fred Drake1f549022000-09-24 05:21:58 +0000883
Martin v. Löwis787354c2003-01-25 15:28:29 +0000884 def appendChild(self, node):
885 raise xml.dom.HierarchyRequestErr(
886 self.nodeName + " nodes cannot have children")
887
888 def hasChildNodes(self):
889 return False
890
891 def insertBefore(self, newChild, refChild):
892 raise xml.dom.HierarchyRequestErr(
893 self.nodeName + " nodes do not have children")
894
895 def removeChild(self, oldChild):
896 raise xml.dom.NotFoundErr(
897 self.nodeName + " nodes do not have children")
898
899 def replaceChild(self, newChild, oldChild):
900 raise xml.dom.HierarchyRequestErr(
901 self.nodeName + " nodes do not have children")
902
903
904class ProcessingInstruction(Childless, Node):
Fred Drake1f549022000-09-24 05:21:58 +0000905 nodeType = Node.PROCESSING_INSTRUCTION_NODE
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000906
Fred Drake1f549022000-09-24 05:21:58 +0000907 def __init__(self, target, data):
Fred Drake55c38192000-06-29 19:39:57 +0000908 self.target = self.nodeName = target
909 self.data = self.nodeValue = data
Fred Drake55c38192000-06-29 19:39:57 +0000910
Martin v. Löwis787354c2003-01-25 15:28:29 +0000911 def _get_data(self):
912 return self.data
913 def _set_data(self, value):
914 d = self.__dict__
915 d['data'] = d['nodeValue'] = value
916
917 def _get_target(self):
918 return self.target
919 def _set_target(self, value):
920 d = self.__dict__
921 d['target'] = d['nodeName'] = value
922
923 def __setattr__(self, name, value):
924 if name == "data" or name == "nodeValue":
925 self.__dict__['data'] = self.__dict__['nodeValue'] = value
926 elif name == "target" or name == "nodeName":
927 self.__dict__['target'] = self.__dict__['nodeName'] = value
928 else:
929 self.__dict__[name] = value
930
Martin v. Löwis46fa39a2001-02-06 00:14:08 +0000931 def writexml(self, writer, indent="", addindent="", newl=""):
932 writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
Fred Drake55c38192000-06-29 19:39:57 +0000933
Martin v. Löwis787354c2003-01-25 15:28:29 +0000934
935class CharacterData(Childless, Node):
936 def _get_length(self):
937 return len(self.data)
938 __len__ = _get_length
939
940 def _get_data(self):
941 return self.__dict__['data']
942 def _set_data(self, data):
943 d = self.__dict__
944 d['data'] = d['nodeValue'] = data
945
946 _get_nodeValue = _get_data
947 _set_nodeValue = _set_data
948
949 def __setattr__(self, name, value):
950 if name == "data" or name == "nodeValue":
951 self.__dict__['data'] = self.__dict__['nodeValue'] = value
952 else:
953 self.__dict__[name] = value
Fred Drake87432f42001-04-04 14:09:46 +0000954
Fred Drake55c38192000-06-29 19:39:57 +0000955 def __repr__(self):
Martin v. Löwis787354c2003-01-25 15:28:29 +0000956 data = self.data
957 if len(data) > 10:
Fred Drake1f549022000-09-24 05:21:58 +0000958 dotdotdot = "..."
Fred Drake55c38192000-06-29 19:39:57 +0000959 else:
Fred Drake1f549022000-09-24 05:21:58 +0000960 dotdotdot = ""
Fred Drake87432f42001-04-04 14:09:46 +0000961 return "<DOM %s node \"%s%s\">" % (
Martin v. Löwis787354c2003-01-25 15:28:29 +0000962 self.__class__.__name__, data[0:10], dotdotdot)
Fred Drake87432f42001-04-04 14:09:46 +0000963
964 def substringData(self, offset, count):
965 if offset < 0:
966 raise xml.dom.IndexSizeErr("offset cannot be negative")
967 if offset >= len(self.data):
968 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
969 if count < 0:
970 raise xml.dom.IndexSizeErr("count cannot be negative")
971 return self.data[offset:offset+count]
972
973 def appendData(self, arg):
974 self.data = self.data + arg
Fred Drake87432f42001-04-04 14:09:46 +0000975
976 def insertData(self, offset, arg):
977 if offset < 0:
978 raise xml.dom.IndexSizeErr("offset cannot be negative")
979 if offset >= len(self.data):
980 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
981 if arg:
982 self.data = "%s%s%s" % (
983 self.data[:offset], arg, self.data[offset:])
Fred Drake87432f42001-04-04 14:09:46 +0000984
985 def deleteData(self, offset, count):
986 if offset < 0:
987 raise xml.dom.IndexSizeErr("offset cannot be negative")
988 if offset >= len(self.data):
989 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
990 if count < 0:
991 raise xml.dom.IndexSizeErr("count cannot be negative")
992 if count:
993 self.data = self.data[:offset] + self.data[offset+count:]
Fred Drake87432f42001-04-04 14:09:46 +0000994
995 def replaceData(self, offset, count, arg):
996 if offset < 0:
997 raise xml.dom.IndexSizeErr("offset cannot be negative")
998 if offset >= len(self.data):
999 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1000 if count < 0:
1001 raise xml.dom.IndexSizeErr("count cannot be negative")
1002 if count:
1003 self.data = "%s%s%s" % (
1004 self.data[:offset], arg, self.data[offset+count:])
Martin v. Löwis787354c2003-01-25 15:28:29 +00001005
1006defproperty(CharacterData, "length", doc="Length of the string data.")
1007
Fred Drake87432f42001-04-04 14:09:46 +00001008
1009class Text(CharacterData):
Martin v. Löwis787354c2003-01-25 15:28:29 +00001010 # Make sure we don't add an instance __dict__ if we don't already
1011 # have one, at least when that's possible:
Martin v. Löwis995359c2003-01-26 08:59:32 +00001012 # XXX this does not work, CharacterData is an old-style class
1013 # __slots__ = ()
Martin v. Löwis787354c2003-01-25 15:28:29 +00001014
Fred Drake87432f42001-04-04 14:09:46 +00001015 nodeType = Node.TEXT_NODE
1016 nodeName = "#text"
1017 attributes = None
Fred Drake55c38192000-06-29 19:39:57 +00001018
Fred Drakef7cf40d2000-12-14 18:16:11 +00001019 def splitText(self, offset):
1020 if offset < 0 or offset > len(self.data):
Martin v. Löwisd5fb58f2001-01-27 08:38:34 +00001021 raise xml.dom.IndexSizeErr("illegal offset value")
Martin v. Löwis787354c2003-01-25 15:28:29 +00001022 newText = self.__class__()
1023 newText.data = self.data[offset:]
1024 newText.ownerDocument = self.ownerDocument
Fred Drakef7cf40d2000-12-14 18:16:11 +00001025 next = self.nextSibling
1026 if self.parentNode and self in self.parentNode.childNodes:
1027 if next is None:
1028 self.parentNode.appendChild(newText)
1029 else:
1030 self.parentNode.insertBefore(newText, next)
1031 self.data = self.data[:offset]
1032 return newText
1033
Martin v. Löwis46fa39a2001-02-06 00:14:08 +00001034 def writexml(self, writer, indent="", addindent="", newl=""):
1035 _write_data(writer, "%s%s%s"%(indent, self.data, newl))
Fred Drake55c38192000-06-29 19:39:57 +00001036
Martin v. Löwis787354c2003-01-25 15:28:29 +00001037 # DOM Level 3 (WD 9 April 2002)
1038
1039 def _get_wholeText(self):
1040 L = [self.data]
1041 n = self.previousSibling
1042 while n is not None:
1043 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1044 L.insert(0, n.data)
1045 n = n.previousSibling
1046 else:
1047 break
1048 n = self.nextSibling
1049 while n is not None:
1050 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1051 L.append(n.data)
1052 n = n.nextSibling
1053 else:
1054 break
1055 return ''.join(L)
1056
1057 def replaceWholeText(self, content):
1058 # XXX This needs to be seriously changed if minidom ever
1059 # supports EntityReference nodes.
1060 parent = self.parentNode
1061 n = self.previousSibling
1062 while n is not None:
1063 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1064 next = n.previousSibling
1065 parent.removeChild(n)
1066 n = next
1067 else:
1068 break
1069 n = self.nextSibling
1070 if not content:
1071 parent.removeChild(self)
1072 while n is not None:
1073 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1074 next = n.nextSibling
1075 parent.removeChild(n)
1076 n = next
1077 else:
1078 break
1079 if content:
1080 d = self.__dict__
1081 d['data'] = content
1082 d['nodeValue'] = content
1083 return self
1084 else:
1085 return None
1086
1087 def _get_isWhitespaceInElementContent(self):
1088 if self.data.strip():
1089 return False
1090 elem = _get_containing_element(self)
1091 if elem is None:
1092 return False
1093 info = self.ownerDocument._get_elem_info(elem)
1094 if info is None:
1095 return False
1096 else:
1097 return info.isElementContent()
1098
1099defproperty(Text, "isWhitespaceInElementContent",
1100 doc="True iff this text node contains only whitespace"
1101 " and is in element content.")
1102defproperty(Text, "wholeText",
1103 doc="The text of all logically-adjacent text nodes.")
1104
1105
1106def _get_containing_element(node):
1107 c = node.parentNode
1108 while c is not None:
1109 if c.nodeType == Node.ELEMENT_NODE:
1110 return c
1111 c = c.parentNode
1112 return None
1113
1114def _get_containing_entref(node):
1115 c = node.parentNode
1116 while c is not None:
1117 if c.nodeType == Node.ENTITY_REFERENCE_NODE:
1118 return c
1119 c = c.parentNode
1120 return None
1121
1122
1123class Comment(Childless, CharacterData):
1124 nodeType = Node.COMMENT_NODE
1125 nodeName = "#comment"
1126
1127 def __init__(self, data):
1128 self.data = self.nodeValue = data
1129
1130 def writexml(self, writer, indent="", addindent="", newl=""):
1131 writer.write("%s<!--%s-->%s" % (indent, self.data, newl))
1132
Fred Drake87432f42001-04-04 14:09:46 +00001133
1134class CDATASection(Text):
Martin v. Löwis787354c2003-01-25 15:28:29 +00001135 # Make sure we don't add an instance __dict__ if we don't already
1136 # have one, at least when that's possible:
Martin v. Löwis995359c2003-01-26 08:59:32 +00001137 # XXX this does not work, Text is an old-style class
1138 # __slots__ = ()
Martin v. Löwis787354c2003-01-25 15:28:29 +00001139
Fred Drake87432f42001-04-04 14:09:46 +00001140 nodeType = Node.CDATA_SECTION_NODE
1141 nodeName = "#cdata-section"
1142
1143 def writexml(self, writer, indent="", addindent="", newl=""):
Martin v. Löwis787354c2003-01-25 15:28:29 +00001144 if self.data.find("]]>") >= 0:
1145 raise ValueError("']]>' not allowed in a CDATA section")
Guido van Rossum5b5e0b92001-09-19 13:28:25 +00001146 writer.write("<![CDATA[%s]]>" % self.data)
Fred Drake87432f42001-04-04 14:09:46 +00001147
1148
Martin v. Löwis787354c2003-01-25 15:28:29 +00001149class ReadOnlySequentialNamedNodeMap(NewStyle, GetattrMagic):
1150 __slots__ = '_seq',
1151
1152 def __init__(self, seq=()):
1153 # seq should be a list or tuple
1154 self._seq = seq
1155
1156 def __len__(self):
1157 return len(self._seq)
1158
1159 def _get_length(self):
1160 return len(self._seq)
1161
1162 def getNamedItem(self, name):
1163 for n in self._seq:
1164 if n.nodeName == name:
1165 return n
1166
1167 def getNamedItemNS(self, namespaceURI, localName):
1168 for n in self._seq:
1169 if n.namespaceURI == namespaceURI and n.localName == localName:
1170 return n
1171
1172 def __getitem__(self, name_or_tuple):
1173 if isinstance(name_or_tuple, _TupleType):
1174 node = self.getNamedItemNS(*name_or_tuple)
1175 else:
1176 node = self.getNamedItem(name_or_tuple)
1177 if node is None:
1178 raise KeyError, name_or_tuple
1179 return node
1180
1181 def item(self, index):
1182 if index < 0:
1183 return None
1184 try:
1185 return self._seq[index]
1186 except IndexError:
1187 return None
1188
1189 def removeNamedItem(self, name):
1190 raise xml.dom.NoModificationAllowedErr(
1191 "NamedNodeMap instance is read-only")
1192
1193 def removeNamedItemNS(self, namespaceURI, localName):
1194 raise xml.dom.NoModificationAllowedErr(
1195 "NamedNodeMap instance is read-only")
1196
1197 def setNamedItem(self, node):
1198 raise xml.dom.NoModificationAllowedErr(
1199 "NamedNodeMap instance is read-only")
1200
1201 def setNamedItemNS(self, node):
1202 raise xml.dom.NoModificationAllowedErr(
1203 "NamedNodeMap instance is read-only")
1204
1205 def __getstate__(self):
1206 return [self._seq]
1207
1208 def __setstate__(self, state):
1209 self._seq = state[0]
1210
1211defproperty(ReadOnlySequentialNamedNodeMap, "length",
1212 doc="Number of entries in the NamedNodeMap.")
Paul Prescod73678da2000-07-01 04:58:47 +00001213
Fred Drakef7cf40d2000-12-14 18:16:11 +00001214
Martin v. Löwis787354c2003-01-25 15:28:29 +00001215class Identified:
1216 """Mix-in class that supports the publicId and systemId attributes."""
1217
Martin v. Löwis995359c2003-01-26 08:59:32 +00001218 # XXX this does not work, this is an old-style class
1219 # __slots__ = 'publicId', 'systemId'
Martin v. Löwis787354c2003-01-25 15:28:29 +00001220
1221 def _identified_mixin_init(self, publicId, systemId):
1222 self.publicId = publicId
1223 self.systemId = systemId
1224
1225 def _get_publicId(self):
1226 return self.publicId
1227
1228 def _get_systemId(self):
1229 return self.systemId
1230
1231class DocumentType(Identified, Childless, Node):
Fred Drakef7cf40d2000-12-14 18:16:11 +00001232 nodeType = Node.DOCUMENT_TYPE_NODE
1233 nodeValue = None
Fred Drakef7cf40d2000-12-14 18:16:11 +00001234 name = None
1235 publicId = None
1236 systemId = None
Fred Drakedc806702001-04-05 14:41:30 +00001237 internalSubset = None
Fred Drakef7cf40d2000-12-14 18:16:11 +00001238
1239 def __init__(self, qualifiedName):
Martin v. Löwis787354c2003-01-25 15:28:29 +00001240 self.entities = ReadOnlySequentialNamedNodeMap()
1241 self.notations = ReadOnlySequentialNamedNodeMap()
Fred Drakef7cf40d2000-12-14 18:16:11 +00001242 if qualifiedName:
1243 prefix, localname = _nssplit(qualifiedName)
1244 self.name = localname
Martin v. Löwis787354c2003-01-25 15:28:29 +00001245 self.nodeName = self.name
1246
1247 def _get_internalSubset(self):
1248 return self.internalSubset
1249
1250 def cloneNode(self, deep):
1251 if self.ownerDocument is None:
1252 # it's ok
1253 clone = DocumentType(None)
1254 clone.name = self.name
1255 clone.nodeName = self.name
1256 operation = xml.dom.UserDataHandler.NODE_CLONED
1257 if deep:
1258 clone.entities._seq = []
1259 clone.notations._seq = []
1260 for n in self.notations._seq:
1261 notation = Notation(n.nodeName, n.publicId, n.systemId)
1262 clone.notations._seq.append(notation)
1263 n._call_user_data_handler(operation, n, notation)
1264 for e in self.entities._seq:
1265 entity = Entity(e.nodeName, e.publicId, e.systemId,
1266 e.notationName)
1267 entity.actualEncoding = e.actualEncoding
1268 entity.encoding = e.encoding
1269 entity.version = e.version
1270 clone.entities._seq.append(entity)
1271 e._call_user_data_handler(operation, n, entity)
1272 self._call_user_data_handler(operation, self, clone)
1273 return clone
1274 else:
1275 return None
1276
1277 def writexml(self, writer, indent="", addindent="", newl=""):
1278 writer.write("<!DOCTYPE ")
1279 writer.write(self.name)
1280 if self.publicId:
1281 writer.write("\n PUBLIC '%s'\n '%s'"
1282 % (self.publicId, self.systemId))
1283 elif self.systemId:
1284 writer.write("\n SYSTEM '%s'" % self.systemId)
1285 if self.internalSubset is not None:
1286 writer.write(" [")
1287 writer.write(self.internalSubset)
1288 writer.write("]")
1289 writer.write(">\n")
1290
1291class Entity(Identified, Node):
1292 attributes = None
1293 nodeType = Node.ENTITY_NODE
1294 nodeValue = None
1295
1296 actualEncoding = None
1297 encoding = None
1298 version = None
1299
1300 def __init__(self, name, publicId, systemId, notation):
1301 self.nodeName = name
1302 self.notationName = notation
1303 self.childNodes = NodeList()
1304 self._identified_mixin_init(publicId, systemId)
1305
1306 def _get_actualEncoding(self):
1307 return self.actualEncoding
1308
1309 def _get_encoding(self):
1310 return self.encoding
1311
1312 def _get_version(self):
1313 return self.version
1314
1315 def appendChild(self, newChild):
1316 raise xml.dom.HierarchyRequestErr(
1317 "cannot append children to an entity node")
1318
1319 def insertBefore(self, newChild, refChild):
1320 raise xml.dom.HierarchyRequestErr(
1321 "cannot insert children below an entity node")
1322
1323 def removeChild(self, oldChild):
1324 raise xml.dom.HierarchyRequestErr(
1325 "cannot remove children from an entity node")
1326
1327 def replaceChild(self, newChild, oldChild):
1328 raise xml.dom.HierarchyRequestErr(
1329 "cannot replace children of an entity node")
1330
1331class Notation(Identified, Childless, Node):
1332 nodeType = Node.NOTATION_NODE
1333 nodeValue = None
1334
1335 def __init__(self, name, publicId, systemId):
1336 self.nodeName = name
1337 self._identified_mixin_init(publicId, systemId)
Fred Drakef7cf40d2000-12-14 18:16:11 +00001338
1339
Martin v. Löwis787354c2003-01-25 15:28:29 +00001340class DOMImplementation(DOMImplementationLS):
1341 _features = [("core", "1.0"),
1342 ("core", "2.0"),
1343 ("core", "3.0"),
1344 ("core", None),
1345 ("xml", "1.0"),
1346 ("xml", "2.0"),
1347 ("xml", "3.0"),
1348 ("xml", None),
1349 ("ls-load", "3.0"),
1350 ("ls-load", None),
1351 ]
1352
Fred Drakef7cf40d2000-12-14 18:16:11 +00001353 def hasFeature(self, feature, version):
Martin v. Löwis787354c2003-01-25 15:28:29 +00001354 if version == "":
1355 version = None
1356 return (feature.lower(), version) in self._features
Fred Drakef7cf40d2000-12-14 18:16:11 +00001357
1358 def createDocument(self, namespaceURI, qualifiedName, doctype):
1359 if doctype and doctype.parentNode is not None:
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +00001360 raise xml.dom.WrongDocumentErr(
1361 "doctype object owned by another DOM tree")
Martin v. Löwis787354c2003-01-25 15:28:29 +00001362 doc = self._create_document()
1363
1364 add_root_element = not (namespaceURI is None
1365 and qualifiedName is None
1366 and doctype is None)
1367
1368 if not qualifiedName and add_root_element:
Martin v. Löwisb417be22001-02-06 01:16:06 +00001369 # The spec is unclear what to raise here; SyntaxErr
1370 # would be the other obvious candidate. Since Xerces raises
1371 # InvalidCharacterErr, and since SyntaxErr is not listed
1372 # for createDocument, that seems to be the better choice.
1373 # XXX: need to check for illegal characters here and in
1374 # createElement.
Martin v. Löwis787354c2003-01-25 15:28:29 +00001375
1376 # DOM Level III clears this up when talking about the return value
1377 # of this function. If namespaceURI, qName and DocType are
1378 # Null the document is returned without a document element
1379 # Otherwise if doctype or namespaceURI are not None
1380 # Then we go back to the above problem
Martin v. Löwisb417be22001-02-06 01:16:06 +00001381 raise xml.dom.InvalidCharacterErr("Element with no name")
Martin v. Löwis787354c2003-01-25 15:28:29 +00001382
1383 if add_root_element:
1384 prefix, localname = _nssplit(qualifiedName)
1385 if prefix == "xml" \
1386 and namespaceURI != "http://www.w3.org/XML/1998/namespace":
1387 raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
1388 if prefix and not namespaceURI:
1389 raise xml.dom.NamespaceErr(
1390 "illegal use of prefix without namespaces")
1391 element = doc.createElementNS(namespaceURI, qualifiedName)
1392 if doctype:
1393 doc.appendChild(doctype)
1394 doc.appendChild(element)
1395
1396 if doctype:
1397 doctype.parentNode = doctype.ownerDocument = doc
1398
Fred Drakef7cf40d2000-12-14 18:16:11 +00001399 doc.doctype = doctype
1400 doc.implementation = self
1401 return doc
1402
1403 def createDocumentType(self, qualifiedName, publicId, systemId):
1404 doctype = DocumentType(qualifiedName)
1405 doctype.publicId = publicId
1406 doctype.systemId = systemId
1407 return doctype
1408
Martin v. Löwis787354c2003-01-25 15:28:29 +00001409 # DOM Level 3 (WD 9 April 2002)
1410
1411 def getInterface(self, feature):
1412 if self.hasFeature(feature, None):
1413 return self
1414 else:
1415 return None
1416
Martin v. Löwis126f2f62001-03-13 10:50:13 +00001417 # internal
Martin v. Löwis787354c2003-01-25 15:28:29 +00001418 def _create_document(self):
Martin v. Löwis126f2f62001-03-13 10:50:13 +00001419 return Document()
Fred Drakef7cf40d2000-12-14 18:16:11 +00001420
Martin v. Löwis787354c2003-01-25 15:28:29 +00001421class ElementInfo(NewStyle):
1422 """Object that represents content-model information for an element.
1423
1424 This implementation is not expected to be used in practice; DOM
1425 builders should provide implementations which do the right thing
1426 using information available to it.
1427
1428 """
1429
1430 __slots__ = 'tagName',
1431
1432 def __init__(self, name):
1433 self.tagName = name
1434
1435 def getAttributeType(self, aname):
1436 return _no_type
1437
1438 def getAttributeTypeNS(self, namespaceURI, localName):
1439 return _no_type
1440
1441 def isElementContent(self):
1442 return False
1443
1444 def isEmpty(self):
1445 """Returns true iff this element is declared to have an EMPTY
1446 content model."""
1447 return False
1448
1449 def isId(self, aname):
1450 """Returns true iff the named attribte is a DTD-style ID."""
1451 return False
1452
1453 def isIdNS(self, namespaceURI, localName):
1454 """Returns true iff the identified attribute is a DTD-style ID."""
1455 return False
1456
1457 def __getstate__(self):
1458 return self.tagName
1459
1460 def __setstate__(self, state):
1461 self.tagName = state
1462
1463def _clear_id_cache(node):
1464 if node.nodeType == Node.DOCUMENT_NODE:
1465 node._id_cache.clear()
1466 node._id_search_stack = None
1467 elif _in_document(node):
1468 node.ownerDocument._id_cache.clear()
1469 node.ownerDocument._id_search_stack= None
1470
1471class Document(Node, DocumentLS):
1472 _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
1473 Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
1474
Fred Drake1f549022000-09-24 05:21:58 +00001475 nodeType = Node.DOCUMENT_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +00001476 nodeName = "#document"
1477 nodeValue = None
1478 attributes = None
Fred Drakef7cf40d2000-12-14 18:16:11 +00001479 doctype = None
1480 parentNode = None
Martin v. Löwis126f2f62001-03-13 10:50:13 +00001481 previousSibling = nextSibling = None
Fred Drakef7cf40d2000-12-14 18:16:11 +00001482
1483 implementation = DOMImplementation()
Martin v. Löwis787354c2003-01-25 15:28:29 +00001484
1485 # Document attributes from Level 3 (WD 9 April 2002)
1486
1487 actualEncoding = None
1488 encoding = None
1489 standalone = None
1490 version = None
1491 strictErrorChecking = False
1492 errorHandler = None
1493 documentURI = None
1494
1495 _magic_id_count = 0
1496
1497 def __init__(self):
1498 self.childNodes = NodeList()
1499 # mapping of (namespaceURI, localName) -> ElementInfo
1500 # and tagName -> ElementInfo
1501 self._elem_info = {}
1502 self._id_cache = {}
1503 self._id_search_stack = None
1504
1505 def _get_elem_info(self, element):
1506 if element.namespaceURI:
1507 key = element.namespaceURI, element.localName
1508 else:
1509 key = element.tagName
1510 return self._elem_info.get(key)
1511
1512 def _get_actualEncoding(self):
1513 return self.actualEncoding
1514
1515 def _get_doctype(self):
1516 return self.doctype
1517
1518 def _get_documentURI(self):
1519 return self.documentURI
1520
1521 def _get_encoding(self):
1522 return self.encoding
1523
1524 def _get_errorHandler(self):
1525 return self.errorHandler
1526
1527 def _get_standalone(self):
1528 return self.standalone
1529
1530 def _get_strictErrorChecking(self):
1531 return self.strictErrorChecking
1532
1533 def _get_version(self):
1534 return self.version
Fred Drake55c38192000-06-29 19:39:57 +00001535
Fred Drake1f549022000-09-24 05:21:58 +00001536 def appendChild(self, node):
Martin v. Löwis787354c2003-01-25 15:28:29 +00001537 if node.nodeType not in self._child_node_types:
1538 raise xml.dom.HierarchyRequestErr(
1539 "%s cannot be child of %s" % (repr(node), repr(self)))
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +00001540 if node.parentNode is not None:
Martin v. Löwis787354c2003-01-25 15:28:29 +00001541 # This needs to be done before the next test since this
1542 # may *be* the document element, in which case it should
1543 # end up re-ordered to the end.
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +00001544 node.parentNode.removeChild(node)
1545
Fred Drakef7cf40d2000-12-14 18:16:11 +00001546 if node.nodeType == Node.ELEMENT_NODE \
1547 and self._get_documentElement():
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +00001548 raise xml.dom.HierarchyRequestErr(
1549 "two document elements disallowed")
Fred Drake4ccf4a12000-11-21 22:02:22 +00001550 return Node.appendChild(self, node)
Paul Prescod73678da2000-07-01 04:58:47 +00001551
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +00001552 def removeChild(self, oldChild):
Martin v. Löwis787354c2003-01-25 15:28:29 +00001553 try:
1554 self.childNodes.remove(oldChild)
1555 except ValueError:
1556 raise xml.dom.NotFoundErr()
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +00001557 oldChild.nextSibling = oldChild.previousSibling = None
1558 oldChild.parentNode = None
1559 if self.documentElement is oldChild:
1560 self.documentElement = None
Martin v. Löwis52ce0d02001-01-27 08:47:37 +00001561
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +00001562 return oldChild
1563
Fred Drakef7cf40d2000-12-14 18:16:11 +00001564 def _get_documentElement(self):
1565 for node in self.childNodes:
1566 if node.nodeType == Node.ELEMENT_NODE:
1567 return node
1568
1569 def unlink(self):
1570 if self.doctype is not None:
1571 self.doctype.unlink()
1572 self.doctype = None
1573 Node.unlink(self)
1574
Martin v. Löwis787354c2003-01-25 15:28:29 +00001575 def cloneNode(self, deep):
1576 if not deep:
1577 return None
1578 clone = self.implementation.createDocument(None, None, None)
1579 clone.encoding = self.encoding
1580 clone.standalone = self.standalone
1581 clone.version = self.version
1582 for n in self.childNodes:
1583 childclone = _clone_node(n, deep, clone)
1584 assert childclone.ownerDocument.isSameNode(clone)
1585 clone.childNodes.append(childclone)
1586 if childclone.nodeType == Node.DOCUMENT_NODE:
1587 assert clone.documentElement is None
1588 elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE:
1589 assert clone.doctype is None
1590 clone.doctype = childclone
1591 childclone.parentNode = clone
1592 self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED,
1593 self, clone)
1594 return clone
1595
Martin v. Löwis126f2f62001-03-13 10:50:13 +00001596 def createDocumentFragment(self):
1597 d = DocumentFragment()
Martin v. Löwis787354c2003-01-25 15:28:29 +00001598 d.ownerDocument = self
Martin v. Löwis126f2f62001-03-13 10:50:13 +00001599 return d
Fred Drake55c38192000-06-29 19:39:57 +00001600
Martin v. Löwis126f2f62001-03-13 10:50:13 +00001601 def createElement(self, tagName):
1602 e = Element(tagName)
1603 e.ownerDocument = self
1604 return e
Fred Drake55c38192000-06-29 19:39:57 +00001605
Martin v. Löwis126f2f62001-03-13 10:50:13 +00001606 def createTextNode(self, data):
Martin v. Löwis787354c2003-01-25 15:28:29 +00001607 if not isinstance(data, StringTypes):
1608 raise TypeError, "node contents must be a string"
1609 t = Text()
1610 t.data = data
Martin v. Löwis126f2f62001-03-13 10:50:13 +00001611 t.ownerDocument = self
1612 return t
Fred Drake55c38192000-06-29 19:39:57 +00001613
Fred Drake87432f42001-04-04 14:09:46 +00001614 def createCDATASection(self, data):
Martin v. Löwis787354c2003-01-25 15:28:29 +00001615 if not isinstance(data, StringTypes):
1616 raise TypeError, "node contents must be a string"
1617 c = CDATASection()
1618 c.data = data
Fred Drake87432f42001-04-04 14:09:46 +00001619 c.ownerDocument = self
1620 return c
1621
Martin v. Löwis126f2f62001-03-13 10:50:13 +00001622 def createComment(self, data):
1623 c = Comment(data)
1624 c.ownerDocument = self
1625 return c
Fred Drake55c38192000-06-29 19:39:57 +00001626
Martin v. Löwis126f2f62001-03-13 10:50:13 +00001627 def createProcessingInstruction(self, target, data):
1628 p = ProcessingInstruction(target, data)
1629 p.ownerDocument = self
1630 return p
1631
1632 def createAttribute(self, qName):
1633 a = Attr(qName)
1634 a.ownerDocument = self
Martin v. Löwiscb67ea12001-03-31 16:30:40 +00001635 a.value = ""
Martin v. Löwis126f2f62001-03-13 10:50:13 +00001636 return a
Fred Drake55c38192000-06-29 19:39:57 +00001637
1638 def createElementNS(self, namespaceURI, qualifiedName):
Fred Drake4ccf4a12000-11-21 22:02:22 +00001639 prefix, localName = _nssplit(qualifiedName)
Martin v. Löwis787354c2003-01-25 15:28:29 +00001640 e = Element(qualifiedName, namespaceURI, prefix)
Martin v. Löwis126f2f62001-03-13 10:50:13 +00001641 e.ownerDocument = self
1642 return e
Fred Drake55c38192000-06-29 19:39:57 +00001643
1644 def createAttributeNS(self, namespaceURI, qualifiedName):
Fred Drake4ccf4a12000-11-21 22:02:22 +00001645 prefix, localName = _nssplit(qualifiedName)
Martin v. Löwis126f2f62001-03-13 10:50:13 +00001646 a = Attr(qualifiedName, namespaceURI, localName, prefix)
1647 a.ownerDocument = self
Martin v. Löwiscb67ea12001-03-31 16:30:40 +00001648 a.value = ""
Martin v. Löwis126f2f62001-03-13 10:50:13 +00001649 return a
Fred Drake55c38192000-06-29 19:39:57 +00001650
Martin v. Löwis787354c2003-01-25 15:28:29 +00001651 # A couple of implementation-specific helpers to create node types
1652 # not supported by the W3C DOM specs:
1653
1654 def _create_entity(self, name, publicId, systemId, notationName):
1655 e = Entity(name, publicId, systemId, notationName)
1656 e.ownerDocument = self
1657 return e
1658
1659 def _create_notation(self, name, publicId, systemId):
1660 n = Notation(name, publicId, systemId)
1661 n.ownerDocument = self
1662 return n
1663
1664 def getElementById(self, id):
1665 if self._id_cache.has_key(id):
1666 return self._id_cache[id]
1667 if not (self._elem_info or self._magic_id_count):
1668 return None
1669
1670 stack = self._id_search_stack
1671 if stack is None:
1672 # we never searched before, or the cache has been cleared
1673 stack = [self.documentElement]
1674 self._id_search_stack = stack
1675 elif not stack:
1676 # Previous search was completed and cache is still valid;
1677 # no matching node.
1678 return None
1679
1680 result = None
1681 while stack:
1682 node = stack.pop()
1683 # add child elements to stack for continued searching
1684 stack.extend([child for child in node.childNodes
1685 if child.nodeType in _nodeTypes_with_children])
1686 # check this node
1687 info = self._get_elem_info(node)
1688 if info:
1689 # We have to process all ID attributes before
1690 # returning in order to get all the attributes set to
1691 # be IDs using Element.setIdAttribute*().
1692 for attr in node.attributes.values():
1693 if attr.namespaceURI:
1694 if info.isIdNS(attr.namespaceURI, attr.localName):
1695 self._id_cache[attr.value] = node
1696 if attr.value == id:
1697 result = node
1698 elif not node._magic_id_nodes:
1699 break
1700 elif info.isId(attr.name):
1701 self._id_cache[attr.value] = node
1702 if attr.value == id:
1703 result = node
1704 elif not node._magic_id_nodes:
1705 break
1706 elif attr._is_id:
1707 self._id_cache[attr.value] = node
1708 if attr.value == id:
1709 result = node
1710 elif node._magic_id_nodes == 1:
1711 break
1712 elif node._magic_id_nodes:
1713 for attr in node.attributes.values():
1714 if attr._is_id:
1715 self._id_cache[attr.value] = node
1716 if attr.value == id:
1717 result = node
1718 if result is not None:
1719 break
1720 return result
1721
Fred Drake1f549022000-09-24 05:21:58 +00001722 def getElementsByTagName(self, name):
Martin v. Löwis787354c2003-01-25 15:28:29 +00001723 return _get_elements_by_tagName_helper(self, name, NodeList())
Fred Drakefbe7b4f2001-07-04 06:25:53 +00001724
1725 def getElementsByTagNameNS(self, namespaceURI, localName):
Martin v. Löwis787354c2003-01-25 15:28:29 +00001726 return _get_elements_by_tagName_ns_helper(
1727 self, namespaceURI, localName, NodeList())
1728
1729 def isSupported(self, feature, version):
1730 return self.implementation.hasFeature(feature, version)
1731
1732 def importNode(self, node, deep):
1733 if node.nodeType == Node.DOCUMENT_NODE:
1734 raise xml.dom.NotSupportedErr("cannot import document nodes")
1735 elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1736 raise xml.dom.NotSupportedErr("cannot import document type nodes")
1737 return _clone_node(node, deep, self)
Fred Drake55c38192000-06-29 19:39:57 +00001738
Martin v. Löwis7d650ca2002-06-30 15:05:00 +00001739 def writexml(self, writer, indent="", addindent="", newl="",
1740 encoding = None):
1741 if encoding is None:
1742 writer.write('<?xml version="1.0" ?>\n')
1743 else:
1744 writer.write('<?xml version="1.0" encoding="%s"?>\n' % encoding)
Fred Drake55c38192000-06-29 19:39:57 +00001745 for node in self.childNodes:
Martin v. Löwis46fa39a2001-02-06 00:14:08 +00001746 node.writexml(writer, indent, addindent, newl)
Fred Drake55c38192000-06-29 19:39:57 +00001747
Martin v. Löwis787354c2003-01-25 15:28:29 +00001748 # DOM Level 3 (WD 9 April 2002)
1749
1750 def renameNode(self, n, namespaceURI, name):
1751 if n.ownerDocument is not self:
1752 raise xml.dom.WrongDocumentErr(
1753 "cannot rename nodes from other documents;\n"
1754 "expected %s,\nfound %s" % (self, n.ownerDocument))
1755 if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE):
1756 raise xml.dom.NotSupportedErr(
1757 "renameNode() only applies to element and attribute nodes")
1758 if namespaceURI != EMPTY_NAMESPACE:
1759 if ':' in name:
1760 prefix, localName = name.split(':', 1)
1761 if ( prefix == "xmlns"
1762 and namespaceURI != xml.dom.XMLNS_NAMESPACE):
1763 raise xml.dom.NamespaceErr(
1764 "illegal use of 'xmlns' prefix")
1765 else:
1766 if ( name == "xmlns"
1767 and namespaceURI != xml.dom.XMLNS_NAMESPACE
1768 and n.nodeType == Node.ATTRIBUTE_NODE):
1769 raise xml.dom.NamespaceErr(
1770 "illegal use of the 'xmlns' attribute")
1771 prefix = None
1772 localName = name
1773 else:
1774 prefix = None
1775 localName = None
1776 if n.nodeType == Node.ATTRIBUTE_NODE:
1777 element = n.ownerElement
1778 if element is not None:
1779 is_id = n._is_id
1780 element.removeAttributeNode(n)
1781 else:
1782 element = None
1783 # avoid __setattr__
1784 d = n.__dict__
1785 d['prefix'] = prefix
1786 d['localName'] = localName
1787 d['namespaceURI'] = namespaceURI
1788 d['nodeName'] = name
1789 if n.nodeType == Node.ELEMENT_NODE:
1790 d['tagName'] = name
1791 else:
1792 # attribute node
1793 d['name'] = name
1794 if element is not None:
1795 element.setAttributeNode(n)
1796 if is_id:
1797 element.setIdAttributeNode(n)
1798 # It's not clear from a semantic perspective whether we should
1799 # call the user data handlers for the NODE_RENAMED event since
1800 # we're re-using the existing node. The draft spec has been
1801 # interpreted as meaning "no, don't call the handler unless a
1802 # new node is created."
1803 return n
1804
1805defproperty(Document, "documentElement",
1806 doc="Top-level element of this document.")
1807
1808
1809def _clone_node(node, deep, newOwnerDocument):
1810 """
1811 Clone a node and give it the new owner document.
1812 Called by Node.cloneNode and Document.importNode
1813 """
1814 if node.ownerDocument.isSameNode(newOwnerDocument):
1815 operation = xml.dom.UserDataHandler.NODE_CLONED
1816 else:
1817 operation = xml.dom.UserDataHandler.NODE_IMPORTED
1818 if node.nodeType == Node.ELEMENT_NODE:
1819 clone = newOwnerDocument.createElementNS(node.namespaceURI,
1820 node.nodeName)
1821 for attr in node.attributes.values():
1822 clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value)
1823 a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName)
1824 a.specified = attr.specified
1825
1826 if deep:
1827 for child in node.childNodes:
1828 c = _clone_node(child, deep, newOwnerDocument)
1829 clone.appendChild(c)
1830
1831 elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
1832 clone = newOwnerDocument.createDocumentFragment()
1833 if deep:
1834 for child in node.childNodes:
1835 c = _clone_node(child, deep, newOwnerDocument)
1836 clone.appendChild(c)
1837
1838 elif node.nodeType == Node.TEXT_NODE:
1839 clone = newOwnerDocument.createTextNode(node.data)
1840 elif node.nodeType == Node.CDATA_SECTION_NODE:
1841 clone = newOwnerDocument.createCDATASection(node.data)
1842 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
1843 clone = newOwnerDocument.createProcessingInstruction(node.target,
1844 node.data)
1845 elif node.nodeType == Node.COMMENT_NODE:
1846 clone = newOwnerDocument.createComment(node.data)
1847 elif node.nodeType == Node.ATTRIBUTE_NODE:
1848 clone = newOwnerDocument.createAttributeNS(node.namespaceURI,
1849 node.nodeName)
1850 clone.specified = True
1851 clone.value = node.value
1852 elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1853 assert node.ownerDocument is not newOwnerDocument
1854 operation = xml.dom.UserDataHandler.NODE_IMPORTED
1855 clone = newOwnerDocument.implementation.createDocumentType(
1856 node.name, node.publicId, node.systemId)
1857 clone.ownerDocument = newOwnerDocument
1858 if deep:
1859 clone.entities._seq = []
1860 clone.notations._seq = []
1861 for n in node.notations._seq:
1862 notation = Notation(n.nodeName, n.publicId, n.systemId)
1863 notation.ownerDocument = newOwnerDocument
1864 clone.notations._seq.append(notation)
1865 if hasattr(n, '_call_user_data_handler'):
1866 n._call_user_data_handler(operation, n, notation)
1867 for e in node.entities._seq:
1868 entity = Entity(e.nodeName, e.publicId, e.systemId,
1869 e.notationName)
1870 entity.actualEncoding = e.actualEncoding
1871 entity.encoding = e.encoding
1872 entity.version = e.version
1873 entity.ownerDocument = newOwnerDocument
1874 clone.entities._seq.append(entity)
1875 if hasattr(e, '_call_user_data_handler'):
1876 e._call_user_data_handler(operation, n, entity)
1877 else:
1878 # Note the cloning of Document and DocumentType nodes is
1879 # implemenetation specific. minidom handles those cases
1880 # directly in the cloneNode() methods.
1881 raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node))
1882
1883 # Check for _call_user_data_handler() since this could conceivably
1884 # used with other DOM implementations (one of the FourThought
1885 # DOMs, perhaps?).
1886 if hasattr(node, '_call_user_data_handler'):
1887 node._call_user_data_handler(operation, node, clone)
1888 return clone
1889
1890
1891def _nssplit(qualifiedName):
1892 fields = qualifiedName.split(':', 1)
1893 if len(fields) == 2:
1894 return fields
1895 else:
1896 return (None, fields[0])
1897
1898
Fred Drake4ccf4a12000-11-21 22:02:22 +00001899def _get_StringIO():
Fred Drakef7cf40d2000-12-14 18:16:11 +00001900 # we can't use cStringIO since it doesn't support Unicode strings
1901 from StringIO import StringIO
Fred Drake4ccf4a12000-11-21 22:02:22 +00001902 return StringIO()
1903
Martin v. Löwis787354c2003-01-25 15:28:29 +00001904def _do_pulldom_parse(func, args, kwargs):
Raymond Hettingerff41c482003-04-06 09:01:11 +00001905 events = func(*args, **kwargs)
Fred Drake1f549022000-09-24 05:21:58 +00001906 toktype, rootNode = events.getEvent()
1907 events.expandNode(rootNode)
Martin v. Löwisb417be22001-02-06 01:16:06 +00001908 events.clear()
Fred Drake55c38192000-06-29 19:39:57 +00001909 return rootNode
1910
Martin v. Löwis787354c2003-01-25 15:28:29 +00001911def parse(file, parser=None, bufsize=None):
Fred Drakef7cf40d2000-12-14 18:16:11 +00001912 """Parse a file into a DOM by filename or file object."""
Martin v. Löwis787354c2003-01-25 15:28:29 +00001913 if parser is None and not bufsize:
1914 from xml.dom import expatbuilder
1915 return expatbuilder.parse(file)
1916 else:
1917 from xml.dom import pulldom
Raymond Hettingerff41c482003-04-06 09:01:11 +00001918 return _do_pulldom_parse(pulldom.parse, (file,),
Martin v. Löwis787354c2003-01-25 15:28:29 +00001919 {'parser': parser, 'bufsize': bufsize})
Fred Drake55c38192000-06-29 19:39:57 +00001920
Martin v. Löwis787354c2003-01-25 15:28:29 +00001921def parseString(string, parser=None):
Fred Drakef7cf40d2000-12-14 18:16:11 +00001922 """Parse a file into a DOM from a string."""
Martin v. Löwis787354c2003-01-25 15:28:29 +00001923 if parser is None:
1924 from xml.dom import expatbuilder
1925 return expatbuilder.parseString(string)
1926 else:
1927 from xml.dom import pulldom
1928 return _do_pulldom_parse(pulldom.parseString, (string,),
1929 {'parser': parser})
Martin v. Löwis7edbd4f2001-02-22 14:05:50 +00001930
Martin v. Löwis787354c2003-01-25 15:28:29 +00001931def getDOMImplementation(features=None):
1932 if features:
1933 if isinstance(features, StringTypes):
1934 features = domreg._parse_feature_string(features)
1935 for f, v in features:
1936 if not Document.implementation.hasFeature(f, v):
1937 return None
Martin v. Löwis7edbd4f2001-02-22 14:05:50 +00001938 return Document.implementation