blob: ef1a2bff229993718ad55f0993d56ff331f37893 [file] [log] [blame]
Fred Drake1f549022000-09-24 05:21:58 +00001"""\
Fred Drakef7cf40d2000-12-14 18:16:11 +00002minidom.py -- a lightweight DOM implementation.
Fred Drake55c38192000-06-29 19:39:57 +00003
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +00004parse("foo.xml")
Paul Prescod623511b2000-07-21 22:05:49 +00005
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +00006parseString("<foo><bar/></foo>")
Paul Prescod623511b2000-07-21 22:05:49 +00007
Fred Drake55c38192000-06-29 19:39:57 +00008Todo:
9=====
10 * convenience methods for getting elements and text.
11 * more testing
12 * bring some of the writer and linearizer code into conformance with this
13 interface
14 * SAX 2 namespaces
15"""
16
Fred Drake1f549022000-09-24 05:21:58 +000017import string
Fred Drake4ccf4a12000-11-21 22:02:22 +000018_string = string
19del string
20
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +000021from xml.dom import HierarchyRequestErr
22
Fred Drake4ccf4a12000-11-21 22:02:22 +000023# localize the types, and allow support for Unicode values if available:
Fred Drake1f549022000-09-24 05:21:58 +000024import types
Fred Drake4ccf4a12000-11-21 22:02:22 +000025_TupleType = types.TupleType
26try:
27 _StringTypes = (types.StringType, types.UnicodeType)
28except AttributeError:
29 _StringTypes = (types.StringType,)
30del types
31
Fred Drakef7cf40d2000-12-14 18:16:11 +000032import xml.dom
33_Node = xml.dom.Node
Fred Drake55c38192000-06-29 19:39:57 +000034
Fred Drakef7cf40d2000-12-14 18:16:11 +000035class Node(_Node):
Fred Drake1f549022000-09-24 05:21:58 +000036 allnodes = {}
37 _debug = 0
38 _makeParentNodes = 1
39 debug = None
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +000040 childNodeTypes = ()
Martin v. Löwis52ce0d02001-01-27 08:47:37 +000041
Fred Drake1f549022000-09-24 05:21:58 +000042 def __init__(self):
43 self.childNodes = []
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +000044 self.parentNode = None
Fred Drake16f63292000-10-23 18:09:50 +000045 if Node._debug:
Fred Drake1f549022000-09-24 05:21:58 +000046 index = repr(id(self)) + repr(self.__class__)
47 Node.allnodes[index] = repr(self.__dict__)
48 if Node.debug is None:
Fred Drake4ccf4a12000-11-21 22:02:22 +000049 Node.debug = _get_StringIO()
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +000050 #open("debug4.out", "w")
Fred Drake1f549022000-09-24 05:21:58 +000051 Node.debug.write("create %s\n" % index)
Fred Drake55c38192000-06-29 19:39:57 +000052
Fred Drake1f549022000-09-24 05:21:58 +000053 def __getattr__(self, key):
54 if key[0:2] == "__":
Fred Drakef7cf40d2000-12-14 18:16:11 +000055 raise AttributeError, key
Fred Drake55c38192000-06-29 19:39:57 +000056 # getattr should never call getattr!
Fred Drake16f63292000-10-23 18:09:50 +000057 if self.__dict__.has_key("inGetAttr"):
Fred Drake55c38192000-06-29 19:39:57 +000058 del self.inGetAttr
59 raise AttributeError, key
60
Fred Drake1f549022000-09-24 05:21:58 +000061 prefix, attrname = key[:5], key[5:]
62 if prefix == "_get_":
63 self.inGetAttr = 1
Fred Drake16f63292000-10-23 18:09:50 +000064 if hasattr(self, attrname):
Fred Drake55c38192000-06-29 19:39:57 +000065 del self.inGetAttr
Fred Drake16f63292000-10-23 18:09:50 +000066 return (lambda self=self, attrname=attrname:
Fred Drake1f549022000-09-24 05:21:58 +000067 getattr(self, attrname))
Fred Drake55c38192000-06-29 19:39:57 +000068 else:
69 del self.inGetAttr
70 raise AttributeError, key
71 else:
Fred Drake1f549022000-09-24 05:21:58 +000072 self.inGetAttr = 1
Fred Drake55c38192000-06-29 19:39:57 +000073 try:
Fred Drake1f549022000-09-24 05:21:58 +000074 func = getattr(self, "_get_" + key)
Fred Drake55c38192000-06-29 19:39:57 +000075 except AttributeError:
76 raise AttributeError, key
77 del self.inGetAttr
78 return func()
79
Fred Drake1f549022000-09-24 05:21:58 +000080 def __nonzero__(self):
81 return 1
Fred Drake55c38192000-06-29 19:39:57 +000082
Fred Drake1f549022000-09-24 05:21:58 +000083 def toxml(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +000084 writer = _get_StringIO()
Fred Drake1f549022000-09-24 05:21:58 +000085 self.writexml(writer)
Fred Drake55c38192000-06-29 19:39:57 +000086 return writer.getvalue()
87
Martin v. Löwis46fa39a2001-02-06 00:14:08 +000088 def toprettyxml(self, indent="\t", newl="\n"):
89 # indent = the indentation string to prepend, per level
90 # newl = the newline string to append
91 writer = _get_StringIO()
92 self.writexml(writer, "", indent, newl)
93 return writer.getvalue()
94
Fred Drake1f549022000-09-24 05:21:58 +000095 def hasChildNodes(self):
96 if self.childNodes:
97 return 1
98 else:
99 return 0
Fred Drake55c38192000-06-29 19:39:57 +0000100
Fred Drake1f549022000-09-24 05:21:58 +0000101 def _get_firstChild(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000102 if self.childNodes:
103 return self.childNodes[0]
Paul Prescod73678da2000-07-01 04:58:47 +0000104
Fred Drake1f549022000-09-24 05:21:58 +0000105 def _get_lastChild(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000106 if self.childNodes:
107 return self.childNodes[-1]
Paul Prescod73678da2000-07-01 04:58:47 +0000108
Fred Drake1f549022000-09-24 05:21:58 +0000109 def insertBefore(self, newChild, refChild):
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000110 if newChild.nodeType not in self.childNodeTypes:
111 raise HierarchyRequestErr, \
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000112 "%s cannot be child of %s" % (repr(newChild), repr(self))
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000113 if newChild.parentNode is not None:
114 newChild.parentNode.removeChild(newChild)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000115 if refChild is None:
116 self.appendChild(newChild)
117 else:
118 index = self.childNodes.index(refChild)
119 self.childNodes.insert(index, newChild)
120 newChild.nextSibling = refChild
121 refChild.previousSibling = newChild
122 if index:
123 node = self.childNodes[index-1]
124 node.nextSibling = newChild
125 newChild.previousSibling = node
126 else:
127 newChild.previousSibling = None
128 if self._makeParentNodes:
129 newChild.parentNode = self
130 return newChild
Fred Drake55c38192000-06-29 19:39:57 +0000131
Fred Drake1f549022000-09-24 05:21:58 +0000132 def appendChild(self, node):
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000133 if node.nodeType not in self.childNodeTypes:
134 raise HierarchyRequestErr, \
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000135 "%s cannot be child of %s" % (repr(node), repr(self))
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000136 if node.parentNode is not None:
137 node.parentNode.removeChild(node)
Fred Drake13a30692000-10-09 20:04:16 +0000138 if self.childNodes:
139 last = self.lastChild
140 node.previousSibling = last
141 last.nextSibling = node
142 else:
143 node.previousSibling = None
144 node.nextSibling = None
Fred Drake1f549022000-09-24 05:21:58 +0000145 self.childNodes.append(node)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000146 if self._makeParentNodes:
147 node.parentNode = self
Paul Prescod73678da2000-07-01 04:58:47 +0000148 return node
149
Fred Drake1f549022000-09-24 05:21:58 +0000150 def replaceChild(self, newChild, oldChild):
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000151 if newChild.nodeType not in self.childNodeTypes:
152 raise HierarchyRequestErr, \
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000153 "%s cannot be child of %s" % (repr(newChild), repr(self))
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000154 if newChild.parentNode is not None:
155 newChild.parentNode.removeChild(newChild)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000156 if newChild is oldChild:
157 return
Fred Drake1f549022000-09-24 05:21:58 +0000158 index = self.childNodes.index(oldChild)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000159 self.childNodes[index] = newChild
160 if self._makeParentNodes:
161 newChild.parentNode = self
162 oldChild.parentNode = None
163 newChild.nextSibling = oldChild.nextSibling
164 newChild.previousSibling = oldChild.previousSibling
Martin v. Löwis156c3372000-12-28 18:40:56 +0000165 oldChild.nextSibling = None
Fred Drake4ccf4a12000-11-21 22:02:22 +0000166 oldChild.previousSibling = None
Martin v. Löwis156c3372000-12-28 18:40:56 +0000167 if newChild.previousSibling:
168 newChild.previousSibling.nextSibling = newChild
169 if newChild.nextSibling:
170 newChild.nextSibling.previousSibling = newChild
Fred Drake4ccf4a12000-11-21 22:02:22 +0000171 return oldChild
Paul Prescod73678da2000-07-01 04:58:47 +0000172
Fred Drake1f549022000-09-24 05:21:58 +0000173 def removeChild(self, oldChild):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000174 self.childNodes.remove(oldChild)
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000175 if oldChild.nextSibling is not None:
176 oldChild.nextSibling.previousSibling = oldChild.previousSibling
177 if oldChild.previousSibling is not None:
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000178 oldChild.previousSibling.nextSibling = oldChild.nextSibling
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000179 oldChild.nextSibling = oldChild.previousSibling = None
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000180
Fred Drake4ccf4a12000-11-21 22:02:22 +0000181 if self._makeParentNodes:
182 oldChild.parentNode = None
183 return oldChild
184
185 def normalize(self):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000186 L = []
187 for child in self.childNodes:
188 if child.nodeType == Node.TEXT_NODE:
189 data = child.data
190 if data and L and L[-1].nodeType == child.nodeType:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000191 # collapse text node
192 node = L[-1]
193 node.data = node.nodeValue = node.data + child.data
194 node.nextSibling = child.nextSibling
195 child.unlink()
Fred Drakef7cf40d2000-12-14 18:16:11 +0000196 elif data:
197 if L:
198 L[-1].nextSibling = child
199 child.previousSibling = L[-1]
200 else:
201 child.previousSibling = None
202 L.append(child)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000203 else:
Fred Drakef7cf40d2000-12-14 18:16:11 +0000204 # empty text node; discard
205 child.unlink()
206 else:
207 if L:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000208 L[-1].nextSibling = child
209 child.previousSibling = L[-1]
Fred Drakef7cf40d2000-12-14 18:16:11 +0000210 else:
211 child.previousSibling = None
212 L.append(child)
213 if child.nodeType == Node.ELEMENT_NODE:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000214 child.normalize()
Fred Drakef7cf40d2000-12-14 18:16:11 +0000215 self.childNodes[:] = L
Paul Prescod73678da2000-07-01 04:58:47 +0000216
Fred Drake1f549022000-09-24 05:21:58 +0000217 def cloneNode(self, deep):
Paul Prescod73678da2000-07-01 04:58:47 +0000218 import new
Fred Drake4ccf4a12000-11-21 22:02:22 +0000219 clone = new.instance(self.__class__, self.__dict__.copy())
220 if self._makeParentNodes:
221 clone.parentNode = None
222 clone.childNodes = []
223 if deep:
224 for child in self.childNodes:
225 clone.appendChild(child.cloneNode(1))
Paul Prescod73678da2000-07-01 04:58:47 +0000226 return clone
Fred Drake55c38192000-06-29 19:39:57 +0000227
Fred Drake25239772001-02-02 19:40:19 +0000228 # DOM Level 3 (Working Draft 2001-Jan-26)
229
230 def isSameNode(self, other):
231 return self is other
232
233 # minidom-specific API:
234
Fred Drake1f549022000-09-24 05:21:58 +0000235 def unlink(self):
236 self.parentNode = None
Fred Drake4ccf4a12000-11-21 22:02:22 +0000237 for child in self.childNodes:
238 child.unlink()
Fred Drake1f549022000-09-24 05:21:58 +0000239 self.childNodes = None
Paul Prescod4221ff02000-10-13 20:11:42 +0000240 self.previousSibling = None
241 self.nextSibling = None
Paul Prescod73678da2000-07-01 04:58:47 +0000242 if Node._debug:
Fred Drake1f549022000-09-24 05:21:58 +0000243 index = repr(id(self)) + repr(self.__class__)
244 self.debug.write("Deleting: %s\n" % index)
Paul Prescod73678da2000-07-01 04:58:47 +0000245 del Node.allnodes[index]
Fred Drake55c38192000-06-29 19:39:57 +0000246
Fred Drake1f549022000-09-24 05:21:58 +0000247def _write_data(writer, data):
Fred Drake55c38192000-06-29 19:39:57 +0000248 "Writes datachars to writer."
Fred Drake4ccf4a12000-11-21 22:02:22 +0000249 replace = _string.replace
250 data = replace(data, "&", "&amp;")
251 data = replace(data, "<", "&lt;")
252 data = replace(data, "\"", "&quot;")
253 data = replace(data, ">", "&gt;")
Fred Drake55c38192000-06-29 19:39:57 +0000254 writer.write(data)
255
Fred Drake1f549022000-09-24 05:21:58 +0000256def _getElementsByTagNameHelper(parent, name, rc):
Fred Drake55c38192000-06-29 19:39:57 +0000257 for node in parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +0000258 if node.nodeType == Node.ELEMENT_NODE and \
259 (name == "*" or node.tagName == name):
260 rc.append(node)
261 _getElementsByTagNameHelper(node, name, rc)
Fred Drake55c38192000-06-29 19:39:57 +0000262 return rc
263
Fred Drake1f549022000-09-24 05:21:58 +0000264def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc):
Fred Drake55c38192000-06-29 19:39:57 +0000265 for node in parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +0000266 if node.nodeType == Node.ELEMENT_NODE:
267 if ((localName == "*" or node.tagName == localName) and
268 (nsURI == "*" or node.namespaceURI == nsURI)):
269 rc.append(node)
Fred Drakef7cf40d2000-12-14 18:16:11 +0000270 _getElementsByTagNameNSHelper(node, nsURI, localName, rc)
271 return rc
Fred Drake55c38192000-06-29 19:39:57 +0000272
273class Attr(Node):
Fred Drake1f549022000-09-24 05:21:58 +0000274 nodeType = Node.ATTRIBUTE_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000275 attributes = None
276 ownerElement = None
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000277 childNodeTypes = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000278
Fred Drake1f549022000-09-24 05:21:58 +0000279 def __init__(self, qName, namespaceURI="", localName=None, prefix=None):
Fred Drake55c38192000-06-29 19:39:57 +0000280 # skip setattr for performance
Fred Drake4ccf4a12000-11-21 22:02:22 +0000281 d = self.__dict__
282 d["localName"] = localName or qName
283 d["nodeName"] = d["name"] = qName
284 d["namespaceURI"] = namespaceURI
285 d["prefix"] = prefix
Fred Drake1f549022000-09-24 05:21:58 +0000286 Node.__init__(self)
Paul Prescod73678da2000-07-01 04:58:47 +0000287 # nodeValue and value are set elsewhere
Fred Drake55c38192000-06-29 19:39:57 +0000288
Fred Drake1f549022000-09-24 05:21:58 +0000289 def __setattr__(self, name, value):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000290 d = self.__dict__
Fred Drake1f549022000-09-24 05:21:58 +0000291 if name in ("value", "nodeValue"):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000292 d["value"] = d["nodeValue"] = value
293 elif name in ("name", "nodeName"):
294 d["name"] = d["nodeName"] = value
Fred Drake55c38192000-06-29 19:39:57 +0000295 else:
Fred Drakef7cf40d2000-12-14 18:16:11 +0000296 d[name] = value
Fred Drake55c38192000-06-29 19:39:57 +0000297
Fred Drake4ccf4a12000-11-21 22:02:22 +0000298 def cloneNode(self, deep):
299 clone = Node.cloneNode(self, deep)
300 if clone.__dict__.has_key("ownerElement"):
301 del clone.ownerElement
302 return clone
303
Fred Drakef7cf40d2000-12-14 18:16:11 +0000304
305class NamedNodeMap:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000306 """The attribute list is a transient interface to the underlying
307 dictionaries. Mutations here will change the underlying element's
Fred Drakef7cf40d2000-12-14 18:16:11 +0000308 dictionary.
309
310 Ordering is imposed artificially and does not reflect the order of
311 attributes as found in an input document.
312 """
Fred Drake4ccf4a12000-11-21 22:02:22 +0000313
Fred Drake1f549022000-09-24 05:21:58 +0000314 def __init__(self, attrs, attrsNS):
315 self._attrs = attrs
316 self._attrsNS = attrsNS
Fred Drakef7cf40d2000-12-14 18:16:11 +0000317
318 def __getattr__(self, name):
319 if name == "length":
320 return len(self._attrs)
321 raise AttributeError, name
Fred Drake55c38192000-06-29 19:39:57 +0000322
Fred Drake1f549022000-09-24 05:21:58 +0000323 def item(self, index):
Fred Drake55c38192000-06-29 19:39:57 +0000324 try:
Fred Drakef7cf40d2000-12-14 18:16:11 +0000325 return self[self._attrs.keys()[index]]
Fred Drake55c38192000-06-29 19:39:57 +0000326 except IndexError:
327 return None
Fred Drake55c38192000-06-29 19:39:57 +0000328
Fred Drake1f549022000-09-24 05:21:58 +0000329 def items(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000330 L = []
331 for node in self._attrs.values():
Martin v. Löwisd5fb58f2001-01-27 08:38:34 +0000332 L.append((node.nodeName, node.value))
Fred Drake4ccf4a12000-11-21 22:02:22 +0000333 return L
Fred Drake1f549022000-09-24 05:21:58 +0000334
335 def itemsNS(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000336 L = []
337 for node in self._attrs.values():
338 L.append(((node.URI, node.localName), node.value))
339 return L
Fred Drake16f63292000-10-23 18:09:50 +0000340
Fred Drake1f549022000-09-24 05:21:58 +0000341 def keys(self):
Paul Prescod73678da2000-07-01 04:58:47 +0000342 return self._attrs.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000343
Fred Drake1f549022000-09-24 05:21:58 +0000344 def keysNS(self):
Paul Prescod73678da2000-07-01 04:58:47 +0000345 return self._attrsNS.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000346
Fred Drake1f549022000-09-24 05:21:58 +0000347 def values(self):
Paul Prescod73678da2000-07-01 04:58:47 +0000348 return self._attrs.values()
Fred Drake55c38192000-06-29 19:39:57 +0000349
Martin v. Löwisd5fb58f2001-01-27 08:38:34 +0000350 def get(self, name, value = None):
351 return self._attrs.get(name, value)
352
Fred Drake1f549022000-09-24 05:21:58 +0000353 def __len__(self):
Fred Drake55c38192000-06-29 19:39:57 +0000354 return self.length
355
Fred Drake1f549022000-09-24 05:21:58 +0000356 def __cmp__(self, other):
357 if self._attrs is getattr(other, "_attrs", None):
Fred Drake55c38192000-06-29 19:39:57 +0000358 return 0
Fred Drake16f63292000-10-23 18:09:50 +0000359 else:
Fred Drake1f549022000-09-24 05:21:58 +0000360 return cmp(id(self), id(other))
Fred Drake55c38192000-06-29 19:39:57 +0000361
362 #FIXME: is it appropriate to return .value?
Fred Drake1f549022000-09-24 05:21:58 +0000363 def __getitem__(self, attname_or_tuple):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000364 if type(attname_or_tuple) is _TupleType:
Paul Prescod73678da2000-07-01 04:58:47 +0000365 return self._attrsNS[attname_or_tuple]
Fred Drake55c38192000-06-29 19:39:57 +0000366 else:
Paul Prescod73678da2000-07-01 04:58:47 +0000367 return self._attrs[attname_or_tuple]
Fred Drake55c38192000-06-29 19:39:57 +0000368
Paul Prescod1e688272000-07-01 19:21:47 +0000369 # same as set
Fred Drake1f549022000-09-24 05:21:58 +0000370 def __setitem__(self, attname, value):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000371 if type(value) in _StringTypes:
Fred Drake1f549022000-09-24 05:21:58 +0000372 node = Attr(attname)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000373 node.value = value
Paul Prescod1e688272000-07-01 19:21:47 +0000374 else:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000375 if not isinstance(value, Attr):
376 raise TypeError, "value must be a string or Attr object"
Fred Drake1f549022000-09-24 05:21:58 +0000377 node = value
Fred Drakef7cf40d2000-12-14 18:16:11 +0000378 self.setNamedItem(node)
379
380 def setNamedItem(self, node):
381 old = self._attrs.get(node.name)
Paul Prescod1e688272000-07-01 19:21:47 +0000382 if old:
383 old.unlink()
Fred Drake1f549022000-09-24 05:21:58 +0000384 self._attrs[node.name] = node
385 self._attrsNS[(node.namespaceURI, node.localName)] = node
Fred Drakef7cf40d2000-12-14 18:16:11 +0000386 return old
387
388 def setNamedItemNS(self, node):
389 return self.setNamedItem(node)
Paul Prescod73678da2000-07-01 04:58:47 +0000390
Fred Drake1f549022000-09-24 05:21:58 +0000391 def __delitem__(self, attname_or_tuple):
392 node = self[attname_or_tuple]
Paul Prescod73678da2000-07-01 04:58:47 +0000393 node.unlink()
394 del self._attrs[node.name]
395 del self._attrsNS[(node.namespaceURI, node.localName)]
Fred Drakef7cf40d2000-12-14 18:16:11 +0000396 self.length = len(self._attrs)
397
398AttributeList = NamedNodeMap
399
Fred Drake1f549022000-09-24 05:21:58 +0000400
Martin v. Löwisa2fda0d2000-10-07 12:10:28 +0000401class Element(Node):
Fred Drake1f549022000-09-24 05:21:58 +0000402 nodeType = Node.ELEMENT_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000403 nextSibling = None
404 previousSibling = None
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000405 childNodeTypes = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
406 Node.COMMENT_NODE, Node.TEXT_NODE,
407 Node.CDATA_SECTION_NODE, Node.ENTITY_REFERENCE_NODE)
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000408
Fred Drake1f549022000-09-24 05:21:58 +0000409 def __init__(self, tagName, namespaceURI="", prefix="",
410 localName=None):
411 Node.__init__(self)
Fred Drake55c38192000-06-29 19:39:57 +0000412 self.tagName = self.nodeName = tagName
Fred Drake1f549022000-09-24 05:21:58 +0000413 self.localName = localName or tagName
414 self.prefix = prefix
415 self.namespaceURI = namespaceURI
416 self.nodeValue = None
Fred Drake55c38192000-06-29 19:39:57 +0000417
Fred Drake4ccf4a12000-11-21 22:02:22 +0000418 self._attrs = {} # attributes are double-indexed:
419 self._attrsNS = {} # tagName -> Attribute
420 # URI,localName -> Attribute
421 # in the future: consider lazy generation
422 # of attribute objects this is too tricky
423 # for now because of headaches with
424 # namespaces.
425
426 def cloneNode(self, deep):
427 clone = Node.cloneNode(self, deep)
428 clone._attrs = {}
429 clone._attrsNS = {}
430 for attr in self._attrs.values():
431 node = attr.cloneNode(1)
432 clone._attrs[node.name] = node
433 clone._attrsNS[(node.namespaceURI, node.localName)] = node
434 node.ownerElement = clone
435 return clone
436
437 def unlink(self):
438 for attr in self._attrs.values():
439 attr.unlink()
440 self._attrs = None
441 self._attrsNS = None
442 Node.unlink(self)
Fred Drake55c38192000-06-29 19:39:57 +0000443
Fred Drake1f549022000-09-24 05:21:58 +0000444 def getAttribute(self, attname):
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000445 try:
446 return self._attrs[attname].value
447 except KeyError:
448 return ""
Fred Drake55c38192000-06-29 19:39:57 +0000449
Fred Drake1f549022000-09-24 05:21:58 +0000450 def getAttributeNS(self, namespaceURI, localName):
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000451 try:
452 return self._attrsNS[(namespaceURI, localName)].value
453 except KeyError:
454 return ""
Fred Drake1f549022000-09-24 05:21:58 +0000455
456 def setAttribute(self, attname, value):
457 attr = Attr(attname)
Fred Drake55c38192000-06-29 19:39:57 +0000458 # for performance
Fred Drake1f549022000-09-24 05:21:58 +0000459 attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
460 self.setAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000461
Fred Drake1f549022000-09-24 05:21:58 +0000462 def setAttributeNS(self, namespaceURI, qualifiedName, value):
463 prefix, localname = _nssplit(qualifiedName)
Fred Drake55c38192000-06-29 19:39:57 +0000464 # for performance
Fred Drake1f549022000-09-24 05:21:58 +0000465 attr = Attr(qualifiedName, namespaceURI, localname, prefix)
466 attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
467 self.setAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000468
Fred Drake1f549022000-09-24 05:21:58 +0000469 def getAttributeNode(self, attrname):
470 return self._attrs.get(attrname)
Paul Prescod73678da2000-07-01 04:58:47 +0000471
Fred Drake1f549022000-09-24 05:21:58 +0000472 def getAttributeNodeNS(self, namespaceURI, localName):
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000473 return self._attrsNS.get((namespaceURI, localName))
Paul Prescod73678da2000-07-01 04:58:47 +0000474
Fred Drake1f549022000-09-24 05:21:58 +0000475 def setAttributeNode(self, attr):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000476 if attr.ownerElement not in (None, self):
Martin v. Löwisd5fb58f2001-01-27 08:38:34 +0000477 raise xml.dom.InuseAttributeErr("attribute node already owned")
Fred Drake1f549022000-09-24 05:21:58 +0000478 old = self._attrs.get(attr.name, None)
Paul Prescod73678da2000-07-01 04:58:47 +0000479 if old:
480 old.unlink()
Fred Drake1f549022000-09-24 05:21:58 +0000481 self._attrs[attr.name] = attr
482 self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
Fred Drake4ccf4a12000-11-21 22:02:22 +0000483
484 # This creates a circular reference, but Element.unlink()
485 # breaks the cycle since the references to the attribute
486 # dictionaries are tossed.
487 attr.ownerElement = self
488
489 if old is not attr:
490 # It might have already been part of this node, in which case
491 # it doesn't represent a change, and should not be returned.
492 return old
Fred Drake55c38192000-06-29 19:39:57 +0000493
Fred Drake1f549022000-09-24 05:21:58 +0000494 def removeAttribute(self, name):
Paul Prescod73678da2000-07-01 04:58:47 +0000495 attr = self._attrs[name]
Fred Drake1f549022000-09-24 05:21:58 +0000496 self.removeAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000497
Fred Drake1f549022000-09-24 05:21:58 +0000498 def removeAttributeNS(self, namespaceURI, localName):
Paul Prescod73678da2000-07-01 04:58:47 +0000499 attr = self._attrsNS[(namespaceURI, localName)]
Fred Drake1f549022000-09-24 05:21:58 +0000500 self.removeAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000501
Fred Drake1f549022000-09-24 05:21:58 +0000502 def removeAttributeNode(self, node):
Paul Prescod73678da2000-07-01 04:58:47 +0000503 node.unlink()
504 del self._attrs[node.name]
505 del self._attrsNS[(node.namespaceURI, node.localName)]
Fred Drake16f63292000-10-23 18:09:50 +0000506
Martin v. Löwis156c3372000-12-28 18:40:56 +0000507 def hasAttribute(self, name):
508 return self._attrs.has_key(name)
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000509
Martin v. Löwis156c3372000-12-28 18:40:56 +0000510 def hasAttributeNS(self, namespaceURI, localName):
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000511 return self._attrsNS.has_key((namespaceURI, localName))
512
Fred Drake1f549022000-09-24 05:21:58 +0000513 def getElementsByTagName(self, name):
514 return _getElementsByTagNameHelper(self, name, [])
Fred Drake55c38192000-06-29 19:39:57 +0000515
Fred Drake1f549022000-09-24 05:21:58 +0000516 def getElementsByTagNameNS(self, namespaceURI, localName):
517 _getElementsByTagNameNSHelper(self, namespaceURI, localName, [])
Fred Drake55c38192000-06-29 19:39:57 +0000518
Fred Drake1f549022000-09-24 05:21:58 +0000519 def __repr__(self):
520 return "<DOM Element: %s at %s>" % (self.tagName, id(self))
Fred Drake55c38192000-06-29 19:39:57 +0000521
Martin v. Löwis46fa39a2001-02-06 00:14:08 +0000522 def writexml(self, writer, indent="", addindent="", newl=""):
523 # indent = current indentation
524 # addindent = indentation to add to higher levels
525 # newl = newline string
526 writer.write(indent+"<" + self.tagName)
Fred Drake16f63292000-10-23 18:09:50 +0000527
Fred Drake4ccf4a12000-11-21 22:02:22 +0000528 attrs = self._get_attributes()
529 a_names = attrs.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000530 a_names.sort()
531
532 for a_name in a_names:
Fred Drake1f549022000-09-24 05:21:58 +0000533 writer.write(" %s=\"" % a_name)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000534 _write_data(writer, attrs[a_name].value)
Fred Drake55c38192000-06-29 19:39:57 +0000535 writer.write("\"")
536 if self.childNodes:
Martin v. Löwis46fa39a2001-02-06 00:14:08 +0000537 writer.write(">%s"%(newl))
Fred Drake55c38192000-06-29 19:39:57 +0000538 for node in self.childNodes:
Martin v. Löwis46fa39a2001-02-06 00:14:08 +0000539 node.writexml(writer,indent+addindent,addindent,newl)
540 writer.write("%s</%s>%s" % (indent,self.tagName,newl))
Fred Drake55c38192000-06-29 19:39:57 +0000541 else:
Martin v. Löwis46fa39a2001-02-06 00:14:08 +0000542 writer.write("/>%s"%(newl))
Fred Drake55c38192000-06-29 19:39:57 +0000543
Fred Drake1f549022000-09-24 05:21:58 +0000544 def _get_attributes(self):
545 return AttributeList(self._attrs, self._attrsNS)
Fred Drake55c38192000-06-29 19:39:57 +0000546
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000547 def hasAttributes(self):
548 if self._attrs or self._attrsNS:
549 return 1
550 else:
551 return 0
552
Fred Drake1f549022000-09-24 05:21:58 +0000553class Comment(Node):
554 nodeType = Node.COMMENT_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000555 nodeName = "#comment"
556 attributes = None
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000557 childNodeTypes = ()
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000558
Fred Drake1f549022000-09-24 05:21:58 +0000559 def __init__(self, data):
560 Node.__init__(self)
561 self.data = self.nodeValue = data
Fred Drake55c38192000-06-29 19:39:57 +0000562
Martin v. Löwis46fa39a2001-02-06 00:14:08 +0000563 def writexml(self, writer, indent="", addindent="", newl=""):
564 writer.write("%s<!--%s-->%s" % (indent,self.data,newl))
Fred Drake1f549022000-09-24 05:21:58 +0000565
566class ProcessingInstruction(Node):
567 nodeType = Node.PROCESSING_INSTRUCTION_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000568 attributes = None
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000569 childNodeTypes = ()
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000570
Fred Drake1f549022000-09-24 05:21:58 +0000571 def __init__(self, target, data):
572 Node.__init__(self)
Fred Drake55c38192000-06-29 19:39:57 +0000573 self.target = self.nodeName = target
574 self.data = self.nodeValue = data
Fred Drake55c38192000-06-29 19:39:57 +0000575
Martin v. Löwis46fa39a2001-02-06 00:14:08 +0000576 def writexml(self, writer, indent="", addindent="", newl=""):
577 writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
Fred Drake55c38192000-06-29 19:39:57 +0000578
Fred Drake1f549022000-09-24 05:21:58 +0000579class Text(Node):
580 nodeType = Node.TEXT_NODE
581 nodeName = "#text"
Fred Drake4ccf4a12000-11-21 22:02:22 +0000582 attributes = None
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000583 childNodeTypes = ()
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000584
Fred Drake1f549022000-09-24 05:21:58 +0000585 def __init__(self, data):
Fred Drakedaa823a2001-01-08 04:04:34 +0000586 if type(data) not in _StringTypes:
587 raise TypeError, "node contents must be a string"
Fred Drake1f549022000-09-24 05:21:58 +0000588 Node.__init__(self)
Fred Drake55c38192000-06-29 19:39:57 +0000589 self.data = self.nodeValue = data
Fred Drake55c38192000-06-29 19:39:57 +0000590
591 def __repr__(self):
Fred Drake1f549022000-09-24 05:21:58 +0000592 if len(self.data) > 10:
593 dotdotdot = "..."
Fred Drake55c38192000-06-29 19:39:57 +0000594 else:
Fred Drake1f549022000-09-24 05:21:58 +0000595 dotdotdot = ""
596 return "<DOM Text node \"%s%s\">" % (self.data[0:10], dotdotdot)
Fred Drake55c38192000-06-29 19:39:57 +0000597
Fred Drakef7cf40d2000-12-14 18:16:11 +0000598 def splitText(self, offset):
599 if offset < 0 or offset > len(self.data):
Martin v. Löwisd5fb58f2001-01-27 08:38:34 +0000600 raise xml.dom.IndexSizeErr("illegal offset value")
Fred Drakef7cf40d2000-12-14 18:16:11 +0000601 newText = Text(self.data[offset:])
602 next = self.nextSibling
603 if self.parentNode and self in self.parentNode.childNodes:
604 if next is None:
605 self.parentNode.appendChild(newText)
606 else:
607 self.parentNode.insertBefore(newText, next)
608 self.data = self.data[:offset]
609 return newText
610
Martin v. Löwis46fa39a2001-02-06 00:14:08 +0000611 def writexml(self, writer, indent="", addindent="", newl=""):
612 _write_data(writer, "%s%s%s"%(indent, self.data, newl))
Fred Drake55c38192000-06-29 19:39:57 +0000613
Fred Drake1f549022000-09-24 05:21:58 +0000614def _nssplit(qualifiedName):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000615 fields = _string.split(qualifiedName, ':', 1)
Paul Prescod73678da2000-07-01 04:58:47 +0000616 if len(fields) == 2:
617 return fields
618 elif len(fields) == 1:
Fred Drake1f549022000-09-24 05:21:58 +0000619 return ('', fields[0])
Paul Prescod73678da2000-07-01 04:58:47 +0000620
Fred Drakef7cf40d2000-12-14 18:16:11 +0000621
622class DocumentType(Node):
623 nodeType = Node.DOCUMENT_TYPE_NODE
624 nodeValue = None
625 attributes = None
626 name = None
627 publicId = None
628 systemId = None
629 internalSubset = ""
630 entities = None
631 notations = None
632
633 def __init__(self, qualifiedName):
634 Node.__init__(self)
635 if qualifiedName:
636 prefix, localname = _nssplit(qualifiedName)
637 self.name = localname
638
639
640class DOMImplementation:
641 def hasFeature(self, feature, version):
642 if version not in ("1.0", "2.0"):
643 return 0
644 feature = _string.lower(feature)
645 return feature == "core"
646
647 def createDocument(self, namespaceURI, qualifiedName, doctype):
648 if doctype and doctype.parentNode is not None:
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000649 raise xml.dom.WrongDocumentErr(
650 "doctype object owned by another DOM tree")
Fred Drakef7cf40d2000-12-14 18:16:11 +0000651 doc = Document()
652 if doctype is None:
653 doctype = self.createDocumentType(qualifiedName, None, None)
Martin v. Löwisb417be22001-02-06 01:16:06 +0000654 if not qualifiedName:
655 # The spec is unclear what to raise here; SyntaxErr
656 # would be the other obvious candidate. Since Xerces raises
657 # InvalidCharacterErr, and since SyntaxErr is not listed
658 # for createDocument, that seems to be the better choice.
659 # XXX: need to check for illegal characters here and in
660 # createElement.
661 raise xml.dom.InvalidCharacterErr("Element with no name")
662 prefix, localname = _nssplit(qualifiedName)
663 if prefix == "xml" \
664 and namespaceURI != "http://www.w3.org/XML/1998/namespace":
665 raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
666 if prefix and not namespaceURI:
667 raise xml.dom.NamespaceErr(
668 "illegal use of prefix without namespaces")
669 element = doc.createElementNS(namespaceURI, qualifiedName)
670 doc.appendChild(element)
Fred Drakef7cf40d2000-12-14 18:16:11 +0000671 doctype.parentNode = doc
672 doc.doctype = doctype
673 doc.implementation = self
674 return doc
675
676 def createDocumentType(self, qualifiedName, publicId, systemId):
677 doctype = DocumentType(qualifiedName)
678 doctype.publicId = publicId
679 doctype.systemId = systemId
680 return doctype
681
682
Fred Drake1f549022000-09-24 05:21:58 +0000683class Document(Node):
684 nodeType = Node.DOCUMENT_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000685 nodeName = "#document"
686 nodeValue = None
687 attributes = None
Fred Drakef7cf40d2000-12-14 18:16:11 +0000688 doctype = None
689 parentNode = None
690
691 implementation = DOMImplementation()
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000692 childNodeTypes = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
693 Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
Fred Drake55c38192000-06-29 19:39:57 +0000694
Fred Drake1f549022000-09-24 05:21:58 +0000695 def appendChild(self, node):
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000696 if node.nodeType not in self.childNodeTypes:
697 raise HierarchyRequestErr, \
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000698 "%s cannot be child of %s" % (repr(node), repr(self))
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000699 if node.parentNode is not None:
700 node.parentNode.removeChild(node)
701
Fred Drakef7cf40d2000-12-14 18:16:11 +0000702 if node.nodeType == Node.ELEMENT_NODE \
703 and self._get_documentElement():
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000704 raise xml.dom.HierarchyRequestErr(
705 "two document elements disallowed")
Fred Drake4ccf4a12000-11-21 22:02:22 +0000706 return Node.appendChild(self, node)
Paul Prescod73678da2000-07-01 04:58:47 +0000707
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000708 def removeChild(self, oldChild):
709 self.childNodes.remove(oldChild)
710 oldChild.nextSibling = oldChild.previousSibling = None
711 oldChild.parentNode = None
712 if self.documentElement is oldChild:
713 self.documentElement = None
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000714
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000715 return oldChild
716
Fred Drakef7cf40d2000-12-14 18:16:11 +0000717 def _get_documentElement(self):
718 for node in self.childNodes:
719 if node.nodeType == Node.ELEMENT_NODE:
720 return node
721
722 def unlink(self):
723 if self.doctype is not None:
724 self.doctype.unlink()
725 self.doctype = None
726 Node.unlink(self)
727
Fred Drake1f549022000-09-24 05:21:58 +0000728 createElement = Element
Fred Drake55c38192000-06-29 19:39:57 +0000729
Fred Drake1f549022000-09-24 05:21:58 +0000730 createTextNode = Text
Fred Drake55c38192000-06-29 19:39:57 +0000731
Fred Drake1f549022000-09-24 05:21:58 +0000732 createComment = Comment
Fred Drake55c38192000-06-29 19:39:57 +0000733
Fred Drake1f549022000-09-24 05:21:58 +0000734 createProcessingInstruction = ProcessingInstruction
Fred Drake55c38192000-06-29 19:39:57 +0000735
Fred Drake1f549022000-09-24 05:21:58 +0000736 createAttribute = Attr
Fred Drake55c38192000-06-29 19:39:57 +0000737
738 def createElementNS(self, namespaceURI, qualifiedName):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000739 prefix, localName = _nssplit(qualifiedName)
740 return self.createElement(qualifiedName, namespaceURI,
741 prefix, localName)
Fred Drake55c38192000-06-29 19:39:57 +0000742
743 def createAttributeNS(self, namespaceURI, qualifiedName):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000744 prefix, localName = _nssplit(qualifiedName)
745 return self.createAttribute(qualifiedName, namespaceURI,
746 localName, prefix)
Fred Drake55c38192000-06-29 19:39:57 +0000747
Fred Drake1f549022000-09-24 05:21:58 +0000748 def getElementsByTagNameNS(self, namespaceURI, localName):
749 _getElementsByTagNameNSHelper(self, namespaceURI, localName)
Fred Drake55c38192000-06-29 19:39:57 +0000750
Fred Drake1f549022000-09-24 05:21:58 +0000751 def getElementsByTagName(self, name):
752 rc = []
753 _getElementsByTagNameHelper(self, name, rc)
Fred Drake55c38192000-06-29 19:39:57 +0000754 return rc
755
Martin v. Löwis46fa39a2001-02-06 00:14:08 +0000756 def writexml(self, writer, indent="", addindent="", newl=""):
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000757 writer.write('<?xml version="1.0" ?>\n')
Fred Drake55c38192000-06-29 19:39:57 +0000758 for node in self.childNodes:
Martin v. Löwis46fa39a2001-02-06 00:14:08 +0000759 node.writexml(writer, indent, addindent, newl)
Fred Drake55c38192000-06-29 19:39:57 +0000760
Fred Drake4ccf4a12000-11-21 22:02:22 +0000761def _get_StringIO():
Fred Drakef7cf40d2000-12-14 18:16:11 +0000762 # we can't use cStringIO since it doesn't support Unicode strings
763 from StringIO import StringIO
Fred Drake4ccf4a12000-11-21 22:02:22 +0000764 return StringIO()
765
Fred Drake1f549022000-09-24 05:21:58 +0000766def _doparse(func, args, kwargs):
767 events = apply(func, args, kwargs)
768 toktype, rootNode = events.getEvent()
769 events.expandNode(rootNode)
Martin v. Löwisb417be22001-02-06 01:16:06 +0000770 events.clear()
Fred Drake55c38192000-06-29 19:39:57 +0000771 return rootNode
772
Fred Drake1f549022000-09-24 05:21:58 +0000773def parse(*args, **kwargs):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000774 """Parse a file into a DOM by filename or file object."""
Fred Drake4ccf4a12000-11-21 22:02:22 +0000775 from xml.dom import pulldom
Fred Drake1f549022000-09-24 05:21:58 +0000776 return _doparse(pulldom.parse, args, kwargs)
Fred Drake55c38192000-06-29 19:39:57 +0000777
Fred Drake1f549022000-09-24 05:21:58 +0000778def parseString(*args, **kwargs):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000779 """Parse a file into a DOM from a string."""
Fred Drake4ccf4a12000-11-21 22:02:22 +0000780 from xml.dom import pulldom
Fred Drake1f549022000-09-24 05:21:58 +0000781 return _doparse(pulldom.parseString, args, kwargs)