blob: d078f39db3769bda554a191fbd767351ebe33d99 [file] [log] [blame]
Fred Drake1f549022000-09-24 05:21:58 +00001"""\
Fred Drakef7cf40d2000-12-14 18:16:11 +00002minidom.py -- a lightweight DOM implementation.
Fred Drake55c38192000-06-29 19:39:57 +00003
Paul Prescod623511b2000-07-21 22:05:49 +00004parse( "foo.xml" )
5
6parseString( "<foo><bar/></foo>" )
7
Fred Drake55c38192000-06-29 19:39:57 +00008Todo:
9=====
10 * convenience methods for getting elements and text.
11 * more testing
12 * bring some of the writer and linearizer code into conformance with this
13 interface
14 * SAX 2 namespaces
15"""
16
Fred Drake1f549022000-09-24 05:21:58 +000017import string
Fred Drake4ccf4a12000-11-21 22:02:22 +000018_string = string
19del string
20
21# localize the types, and allow support for Unicode values if available:
Fred Drake1f549022000-09-24 05:21:58 +000022import types
Fred Drake4ccf4a12000-11-21 22:02:22 +000023_TupleType = types.TupleType
24try:
25 _StringTypes = (types.StringType, types.UnicodeType)
26except AttributeError:
27 _StringTypes = (types.StringType,)
28del types
29
Fred Drakef7cf40d2000-12-14 18:16:11 +000030import xml.dom
31_Node = xml.dom.Node
32del xml
Fred Drake1f549022000-09-24 05:21:58 +000033
Fred Drake55c38192000-06-29 19:39:57 +000034
Fred Drakef7cf40d2000-12-14 18:16:11 +000035class Node(_Node):
Fred Drake1f549022000-09-24 05:21:58 +000036 allnodes = {}
37 _debug = 0
38 _makeParentNodes = 1
39 debug = None
Fred Drake55c38192000-06-29 19:39:57 +000040
Fred Drake1f549022000-09-24 05:21:58 +000041 def __init__(self):
42 self.childNodes = []
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +000043 self.parentNode = None
Fred Drake16f63292000-10-23 18:09:50 +000044 if Node._debug:
Fred Drake1f549022000-09-24 05:21:58 +000045 index = repr(id(self)) + repr(self.__class__)
46 Node.allnodes[index] = repr(self.__dict__)
47 if Node.debug is None:
Fred Drake4ccf4a12000-11-21 22:02:22 +000048 Node.debug = _get_StringIO()
Paul Prescod6c4753f2000-07-04 03:39:33 +000049 #open( "debug4.out", "w" )
Fred Drake1f549022000-09-24 05:21:58 +000050 Node.debug.write("create %s\n" % index)
Fred Drake55c38192000-06-29 19:39:57 +000051
Fred Drake1f549022000-09-24 05:21:58 +000052 def __getattr__(self, key):
53 if key[0:2] == "__":
Fred Drakef7cf40d2000-12-14 18:16:11 +000054 raise AttributeError, key
Fred Drake55c38192000-06-29 19:39:57 +000055 # getattr should never call getattr!
Fred Drake16f63292000-10-23 18:09:50 +000056 if self.__dict__.has_key("inGetAttr"):
Fred Drake55c38192000-06-29 19:39:57 +000057 del self.inGetAttr
58 raise AttributeError, key
59
Fred Drake1f549022000-09-24 05:21:58 +000060 prefix, attrname = key[:5], key[5:]
61 if prefix == "_get_":
62 self.inGetAttr = 1
Fred Drake16f63292000-10-23 18:09:50 +000063 if hasattr(self, attrname):
Fred Drake55c38192000-06-29 19:39:57 +000064 del self.inGetAttr
Fred Drake16f63292000-10-23 18:09:50 +000065 return (lambda self=self, attrname=attrname:
Fred Drake1f549022000-09-24 05:21:58 +000066 getattr(self, attrname))
Fred Drake55c38192000-06-29 19:39:57 +000067 else:
68 del self.inGetAttr
69 raise AttributeError, key
70 else:
Fred Drake1f549022000-09-24 05:21:58 +000071 self.inGetAttr = 1
Fred Drake55c38192000-06-29 19:39:57 +000072 try:
Fred Drake1f549022000-09-24 05:21:58 +000073 func = getattr(self, "_get_" + key)
Fred Drake55c38192000-06-29 19:39:57 +000074 except AttributeError:
75 raise AttributeError, key
76 del self.inGetAttr
77 return func()
78
Fred Drake1f549022000-09-24 05:21:58 +000079 def __nonzero__(self):
80 return 1
Fred Drake55c38192000-06-29 19:39:57 +000081
Fred Drake1f549022000-09-24 05:21:58 +000082 def toxml(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +000083 writer = _get_StringIO()
Fred Drake1f549022000-09-24 05:21:58 +000084 self.writexml(writer)
Fred Drake55c38192000-06-29 19:39:57 +000085 return writer.getvalue()
86
Fred Drake1f549022000-09-24 05:21:58 +000087 def hasChildNodes(self):
88 if self.childNodes:
89 return 1
90 else:
91 return 0
Fred Drake55c38192000-06-29 19:39:57 +000092
Fred Drake1f549022000-09-24 05:21:58 +000093 def _get_firstChild(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +000094 if self.childNodes:
95 return self.childNodes[0]
Paul Prescod73678da2000-07-01 04:58:47 +000096
Fred Drake1f549022000-09-24 05:21:58 +000097 def _get_lastChild(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +000098 if self.childNodes:
99 return self.childNodes[-1]
Paul Prescod73678da2000-07-01 04:58:47 +0000100
Fred Drake1f549022000-09-24 05:21:58 +0000101 def insertBefore(self, newChild, refChild):
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000102 if newChild.parentNode is not None:
103 newChild.parentNode.removeChild(newChild)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000104 if refChild is None:
105 self.appendChild(newChild)
106 else:
107 index = self.childNodes.index(refChild)
108 self.childNodes.insert(index, newChild)
109 newChild.nextSibling = refChild
110 refChild.previousSibling = newChild
111 if index:
112 node = self.childNodes[index-1]
113 node.nextSibling = newChild
114 newChild.previousSibling = node
115 else:
116 newChild.previousSibling = None
117 if self._makeParentNodes:
118 newChild.parentNode = self
119 return newChild
Fred Drake55c38192000-06-29 19:39:57 +0000120
Fred Drake1f549022000-09-24 05:21:58 +0000121 def appendChild(self, node):
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000122 if node.parentNode is not None:
123 node.parentNode.removeChild(node)
Fred Drake13a30692000-10-09 20:04:16 +0000124 if self.childNodes:
125 last = self.lastChild
126 node.previousSibling = last
127 last.nextSibling = node
128 else:
129 node.previousSibling = None
130 node.nextSibling = None
Fred Drake1f549022000-09-24 05:21:58 +0000131 self.childNodes.append(node)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000132 if self._makeParentNodes:
133 node.parentNode = self
Paul Prescod73678da2000-07-01 04:58:47 +0000134 return node
135
Fred Drake1f549022000-09-24 05:21:58 +0000136 def replaceChild(self, newChild, oldChild):
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000137 if newChild.parentNode is not None:
138 newChild.parentNode.removeChild(newChild)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000139 if newChild is oldChild:
140 return
Fred Drake1f549022000-09-24 05:21:58 +0000141 index = self.childNodes.index(oldChild)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000142 self.childNodes[index] = newChild
143 if self._makeParentNodes:
144 newChild.parentNode = self
145 oldChild.parentNode = None
146 newChild.nextSibling = oldChild.nextSibling
147 newChild.previousSibling = oldChild.previousSibling
148 oldChild.newChild = None
149 oldChild.previousSibling = None
150 return oldChild
Paul Prescod73678da2000-07-01 04:58:47 +0000151
Fred Drake1f549022000-09-24 05:21:58 +0000152 def removeChild(self, oldChild):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000153 self.childNodes.remove(oldChild)
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000154 if oldChild.nextSibling is not None:
155 oldChild.nextSibling.previousSibling = oldChild.previousSibling
156 if oldChild.previousSibling is not None:
157 oldChild.previousSibling.nextSibling = oldChild.nextSibling
158 oldChild.nextSibling = oldChild.previousSibling = None
159
Fred Drake4ccf4a12000-11-21 22:02:22 +0000160 if self._makeParentNodes:
161 oldChild.parentNode = None
162 return oldChild
163
164 def normalize(self):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000165 L = []
166 for child in self.childNodes:
167 if child.nodeType == Node.TEXT_NODE:
168 data = child.data
169 if data and L and L[-1].nodeType == child.nodeType:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000170 # collapse text node
171 node = L[-1]
172 node.data = node.nodeValue = node.data + child.data
173 node.nextSibling = child.nextSibling
174 child.unlink()
Fred Drakef7cf40d2000-12-14 18:16:11 +0000175 elif data:
176 if L:
177 L[-1].nextSibling = child
178 child.previousSibling = L[-1]
179 else:
180 child.previousSibling = None
181 L.append(child)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000182 else:
Fred Drakef7cf40d2000-12-14 18:16:11 +0000183 # empty text node; discard
184 child.unlink()
185 else:
186 if L:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000187 L[-1].nextSibling = child
188 child.previousSibling = L[-1]
Fred Drakef7cf40d2000-12-14 18:16:11 +0000189 else:
190 child.previousSibling = None
191 L.append(child)
192 if child.nodeType == Node.ELEMENT_NODE:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000193 child.normalize()
Fred Drakef7cf40d2000-12-14 18:16:11 +0000194 self.childNodes[:] = L
Paul Prescod73678da2000-07-01 04:58:47 +0000195
Fred Drake1f549022000-09-24 05:21:58 +0000196 def cloneNode(self, deep):
Paul Prescod73678da2000-07-01 04:58:47 +0000197 import new
Fred Drake4ccf4a12000-11-21 22:02:22 +0000198 clone = new.instance(self.__class__, self.__dict__.copy())
199 if self._makeParentNodes:
200 clone.parentNode = None
201 clone.childNodes = []
202 if deep:
203 for child in self.childNodes:
204 clone.appendChild(child.cloneNode(1))
Paul Prescod73678da2000-07-01 04:58:47 +0000205 return clone
Fred Drake55c38192000-06-29 19:39:57 +0000206
Fred Drake1f549022000-09-24 05:21:58 +0000207 def unlink(self):
208 self.parentNode = None
Fred Drake4ccf4a12000-11-21 22:02:22 +0000209 for child in self.childNodes:
210 child.unlink()
Fred Drake1f549022000-09-24 05:21:58 +0000211 self.childNodes = None
Paul Prescod4221ff02000-10-13 20:11:42 +0000212 self.previousSibling = None
213 self.nextSibling = None
Paul Prescod73678da2000-07-01 04:58:47 +0000214 if Node._debug:
Fred Drake1f549022000-09-24 05:21:58 +0000215 index = repr(id(self)) + repr(self.__class__)
216 self.debug.write("Deleting: %s\n" % index)
Paul Prescod73678da2000-07-01 04:58:47 +0000217 del Node.allnodes[index]
Fred Drake55c38192000-06-29 19:39:57 +0000218
Fred Drake1f549022000-09-24 05:21:58 +0000219def _write_data(writer, data):
Fred Drake55c38192000-06-29 19:39:57 +0000220 "Writes datachars to writer."
Fred Drake4ccf4a12000-11-21 22:02:22 +0000221 replace = _string.replace
222 data = replace(data, "&", "&amp;")
223 data = replace(data, "<", "&lt;")
224 data = replace(data, "\"", "&quot;")
225 data = replace(data, ">", "&gt;")
Fred Drake55c38192000-06-29 19:39:57 +0000226 writer.write(data)
227
Fred Drake1f549022000-09-24 05:21:58 +0000228def _getElementsByTagNameHelper(parent, name, rc):
Fred Drake55c38192000-06-29 19:39:57 +0000229 for node in parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +0000230 if node.nodeType == Node.ELEMENT_NODE and \
231 (name == "*" or node.tagName == name):
232 rc.append(node)
233 _getElementsByTagNameHelper(node, name, rc)
Fred Drake55c38192000-06-29 19:39:57 +0000234 return rc
235
Fred Drake1f549022000-09-24 05:21:58 +0000236def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc):
Fred Drake55c38192000-06-29 19:39:57 +0000237 for node in parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +0000238 if node.nodeType == Node.ELEMENT_NODE:
239 if ((localName == "*" or node.tagName == localName) and
240 (nsURI == "*" or node.namespaceURI == nsURI)):
241 rc.append(node)
Fred Drakef7cf40d2000-12-14 18:16:11 +0000242 _getElementsByTagNameNSHelper(node, nsURI, localName, rc)
243 return rc
Fred Drake55c38192000-06-29 19:39:57 +0000244
245class Attr(Node):
Fred Drake1f549022000-09-24 05:21:58 +0000246 nodeType = Node.ATTRIBUTE_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000247 attributes = None
248 ownerElement = None
Fred Drake1f549022000-09-24 05:21:58 +0000249
250 def __init__(self, qName, namespaceURI="", localName=None, prefix=None):
Fred Drake55c38192000-06-29 19:39:57 +0000251 # skip setattr for performance
Fred Drake4ccf4a12000-11-21 22:02:22 +0000252 d = self.__dict__
253 d["localName"] = localName or qName
254 d["nodeName"] = d["name"] = qName
255 d["namespaceURI"] = namespaceURI
256 d["prefix"] = prefix
Fred Drake1f549022000-09-24 05:21:58 +0000257 Node.__init__(self)
Paul Prescod73678da2000-07-01 04:58:47 +0000258 # nodeValue and value are set elsewhere
Fred Drake55c38192000-06-29 19:39:57 +0000259
Fred Drake1f549022000-09-24 05:21:58 +0000260 def __setattr__(self, name, value):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000261 d = self.__dict__
Fred Drake1f549022000-09-24 05:21:58 +0000262 if name in ("value", "nodeValue"):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000263 d["value"] = d["nodeValue"] = value
264 elif name in ("name", "nodeName"):
265 d["name"] = d["nodeName"] = value
Fred Drake55c38192000-06-29 19:39:57 +0000266 else:
Fred Drakef7cf40d2000-12-14 18:16:11 +0000267 d[name] = value
Fred Drake55c38192000-06-29 19:39:57 +0000268
Fred Drake4ccf4a12000-11-21 22:02:22 +0000269 def cloneNode(self, deep):
270 clone = Node.cloneNode(self, deep)
271 if clone.__dict__.has_key("ownerElement"):
272 del clone.ownerElement
273 return clone
274
Fred Drakef7cf40d2000-12-14 18:16:11 +0000275
276class NamedNodeMap:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000277 """The attribute list is a transient interface to the underlying
278 dictionaries. Mutations here will change the underlying element's
Fred Drakef7cf40d2000-12-14 18:16:11 +0000279 dictionary.
280
281 Ordering is imposed artificially and does not reflect the order of
282 attributes as found in an input document.
283 """
Fred Drake4ccf4a12000-11-21 22:02:22 +0000284
Fred Drake1f549022000-09-24 05:21:58 +0000285 def __init__(self, attrs, attrsNS):
286 self._attrs = attrs
287 self._attrsNS = attrsNS
Fred Drakef7cf40d2000-12-14 18:16:11 +0000288
289 def __getattr__(self, name):
290 if name == "length":
291 return len(self._attrs)
292 raise AttributeError, name
Fred Drake55c38192000-06-29 19:39:57 +0000293
Fred Drake1f549022000-09-24 05:21:58 +0000294 def item(self, index):
Fred Drake55c38192000-06-29 19:39:57 +0000295 try:
Fred Drakef7cf40d2000-12-14 18:16:11 +0000296 return self[self._attrs.keys()[index]]
Fred Drake55c38192000-06-29 19:39:57 +0000297 except IndexError:
298 return None
Fred Drake55c38192000-06-29 19:39:57 +0000299
Fred Drake1f549022000-09-24 05:21:58 +0000300 def items(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000301 L = []
302 for node in self._attrs.values():
303 L.append((node.tagName, node.value))
304 return L
Fred Drake1f549022000-09-24 05:21:58 +0000305
306 def itemsNS(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000307 L = []
308 for node in self._attrs.values():
309 L.append(((node.URI, node.localName), node.value))
310 return L
Fred Drake16f63292000-10-23 18:09:50 +0000311
Fred Drake1f549022000-09-24 05:21:58 +0000312 def keys(self):
Paul Prescod73678da2000-07-01 04:58:47 +0000313 return self._attrs.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000314
Fred Drake1f549022000-09-24 05:21:58 +0000315 def keysNS(self):
Paul Prescod73678da2000-07-01 04:58:47 +0000316 return self._attrsNS.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000317
Fred Drake1f549022000-09-24 05:21:58 +0000318 def values(self):
Paul Prescod73678da2000-07-01 04:58:47 +0000319 return self._attrs.values()
Fred Drake55c38192000-06-29 19:39:57 +0000320
Fred Drake1f549022000-09-24 05:21:58 +0000321 def __len__(self):
Fred Drake55c38192000-06-29 19:39:57 +0000322 return self.length
323
Fred Drake1f549022000-09-24 05:21:58 +0000324 def __cmp__(self, other):
325 if self._attrs is getattr(other, "_attrs", None):
Fred Drake55c38192000-06-29 19:39:57 +0000326 return 0
Fred Drake16f63292000-10-23 18:09:50 +0000327 else:
Fred Drake1f549022000-09-24 05:21:58 +0000328 return cmp(id(self), id(other))
Fred Drake55c38192000-06-29 19:39:57 +0000329
330 #FIXME: is it appropriate to return .value?
Fred Drake1f549022000-09-24 05:21:58 +0000331 def __getitem__(self, attname_or_tuple):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000332 if type(attname_or_tuple) is _TupleType:
Paul Prescod73678da2000-07-01 04:58:47 +0000333 return self._attrsNS[attname_or_tuple]
Fred Drake55c38192000-06-29 19:39:57 +0000334 else:
Paul Prescod73678da2000-07-01 04:58:47 +0000335 return self._attrs[attname_or_tuple]
Fred Drake55c38192000-06-29 19:39:57 +0000336
Paul Prescod1e688272000-07-01 19:21:47 +0000337 # same as set
Fred Drake1f549022000-09-24 05:21:58 +0000338 def __setitem__(self, attname, value):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000339 if type(value) in _StringTypes:
Fred Drake1f549022000-09-24 05:21:58 +0000340 node = Attr(attname)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000341 node.value = value
Paul Prescod1e688272000-07-01 19:21:47 +0000342 else:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000343 if not isinstance(value, Attr):
344 raise TypeError, "value must be a string or Attr object"
Fred Drake1f549022000-09-24 05:21:58 +0000345 node = value
Fred Drakef7cf40d2000-12-14 18:16:11 +0000346 self.setNamedItem(node)
347
348 def setNamedItem(self, node):
349 old = self._attrs.get(node.name)
Paul Prescod1e688272000-07-01 19:21:47 +0000350 if old:
351 old.unlink()
Fred Drake1f549022000-09-24 05:21:58 +0000352 self._attrs[node.name] = node
353 self._attrsNS[(node.namespaceURI, node.localName)] = node
Fred Drakef7cf40d2000-12-14 18:16:11 +0000354 return old
355
356 def setNamedItemNS(self, node):
357 return self.setNamedItem(node)
Paul Prescod73678da2000-07-01 04:58:47 +0000358
Fred Drake1f549022000-09-24 05:21:58 +0000359 def __delitem__(self, attname_or_tuple):
360 node = self[attname_or_tuple]
Paul Prescod73678da2000-07-01 04:58:47 +0000361 node.unlink()
362 del self._attrs[node.name]
363 del self._attrsNS[(node.namespaceURI, node.localName)]
Fred Drakef7cf40d2000-12-14 18:16:11 +0000364 self.length = len(self._attrs)
365
366AttributeList = NamedNodeMap
367
Fred Drake1f549022000-09-24 05:21:58 +0000368
Martin v. Löwisa2fda0d2000-10-07 12:10:28 +0000369class Element(Node):
Fred Drake1f549022000-09-24 05:21:58 +0000370 nodeType = Node.ELEMENT_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000371 nextSibling = None
372 previousSibling = None
Fred Drake1f549022000-09-24 05:21:58 +0000373
374 def __init__(self, tagName, namespaceURI="", prefix="",
375 localName=None):
376 Node.__init__(self)
Fred Drake55c38192000-06-29 19:39:57 +0000377 self.tagName = self.nodeName = tagName
Fred Drake1f549022000-09-24 05:21:58 +0000378 self.localName = localName or tagName
379 self.prefix = prefix
380 self.namespaceURI = namespaceURI
381 self.nodeValue = None
Fred Drake55c38192000-06-29 19:39:57 +0000382
Fred Drake4ccf4a12000-11-21 22:02:22 +0000383 self._attrs = {} # attributes are double-indexed:
384 self._attrsNS = {} # tagName -> Attribute
385 # URI,localName -> Attribute
386 # in the future: consider lazy generation
387 # of attribute objects this is too tricky
388 # for now because of headaches with
389 # namespaces.
390
391 def cloneNode(self, deep):
392 clone = Node.cloneNode(self, deep)
393 clone._attrs = {}
394 clone._attrsNS = {}
395 for attr in self._attrs.values():
396 node = attr.cloneNode(1)
397 clone._attrs[node.name] = node
398 clone._attrsNS[(node.namespaceURI, node.localName)] = node
399 node.ownerElement = clone
400 return clone
401
402 def unlink(self):
403 for attr in self._attrs.values():
404 attr.unlink()
405 self._attrs = None
406 self._attrsNS = None
407 Node.unlink(self)
Fred Drake55c38192000-06-29 19:39:57 +0000408
Fred Drake1f549022000-09-24 05:21:58 +0000409 def getAttribute(self, attname):
Paul Prescod73678da2000-07-01 04:58:47 +0000410 return self._attrs[attname].value
Fred Drake55c38192000-06-29 19:39:57 +0000411
Fred Drake1f549022000-09-24 05:21:58 +0000412 def getAttributeNS(self, namespaceURI, localName):
Paul Prescod73678da2000-07-01 04:58:47 +0000413 return self._attrsNS[(namespaceURI, localName)].value
Fred Drake1f549022000-09-24 05:21:58 +0000414
415 def setAttribute(self, attname, value):
416 attr = Attr(attname)
Fred Drake55c38192000-06-29 19:39:57 +0000417 # for performance
Fred Drake1f549022000-09-24 05:21:58 +0000418 attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
419 self.setAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000420
Fred Drake1f549022000-09-24 05:21:58 +0000421 def setAttributeNS(self, namespaceURI, qualifiedName, value):
422 prefix, localname = _nssplit(qualifiedName)
Fred Drake55c38192000-06-29 19:39:57 +0000423 # for performance
Fred Drake1f549022000-09-24 05:21:58 +0000424 attr = Attr(qualifiedName, namespaceURI, localname, prefix)
425 attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
426 self.setAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000427
Fred Drake1f549022000-09-24 05:21:58 +0000428 def getAttributeNode(self, attrname):
429 return self._attrs.get(attrname)
Paul Prescod73678da2000-07-01 04:58:47 +0000430
Fred Drake1f549022000-09-24 05:21:58 +0000431 def getAttributeNodeNS(self, namespaceURI, localName):
Paul Prescod73678da2000-07-01 04:58:47 +0000432 return self._attrsNS[(namespaceURI, localName)]
433
Fred Drake1f549022000-09-24 05:21:58 +0000434 def setAttributeNode(self, attr):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000435 if attr.ownerElement not in (None, self):
436 raise ValueError, "attribute node already owned"
Fred Drake1f549022000-09-24 05:21:58 +0000437 old = self._attrs.get(attr.name, None)
Paul Prescod73678da2000-07-01 04:58:47 +0000438 if old:
439 old.unlink()
Fred Drake1f549022000-09-24 05:21:58 +0000440 self._attrs[attr.name] = attr
441 self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
Fred Drake4ccf4a12000-11-21 22:02:22 +0000442
443 # This creates a circular reference, but Element.unlink()
444 # breaks the cycle since the references to the attribute
445 # dictionaries are tossed.
446 attr.ownerElement = self
447
448 if old is not attr:
449 # It might have already been part of this node, in which case
450 # it doesn't represent a change, and should not be returned.
451 return old
Fred Drake55c38192000-06-29 19:39:57 +0000452
Fred Drake1f549022000-09-24 05:21:58 +0000453 def removeAttribute(self, name):
Paul Prescod73678da2000-07-01 04:58:47 +0000454 attr = self._attrs[name]
Fred Drake1f549022000-09-24 05:21:58 +0000455 self.removeAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000456
Fred Drake1f549022000-09-24 05:21:58 +0000457 def removeAttributeNS(self, namespaceURI, localName):
Paul Prescod73678da2000-07-01 04:58:47 +0000458 attr = self._attrsNS[(namespaceURI, localName)]
Fred Drake1f549022000-09-24 05:21:58 +0000459 self.removeAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000460
Fred Drake1f549022000-09-24 05:21:58 +0000461 def removeAttributeNode(self, node):
Paul Prescod73678da2000-07-01 04:58:47 +0000462 node.unlink()
463 del self._attrs[node.name]
464 del self._attrsNS[(node.namespaceURI, node.localName)]
Fred Drake16f63292000-10-23 18:09:50 +0000465
Fred Drake1f549022000-09-24 05:21:58 +0000466 def getElementsByTagName(self, name):
467 return _getElementsByTagNameHelper(self, name, [])
Fred Drake55c38192000-06-29 19:39:57 +0000468
Fred Drake1f549022000-09-24 05:21:58 +0000469 def getElementsByTagNameNS(self, namespaceURI, localName):
470 _getElementsByTagNameNSHelper(self, namespaceURI, localName, [])
Fred Drake55c38192000-06-29 19:39:57 +0000471
Fred Drake1f549022000-09-24 05:21:58 +0000472 def __repr__(self):
473 return "<DOM Element: %s at %s>" % (self.tagName, id(self))
Fred Drake55c38192000-06-29 19:39:57 +0000474
475 def writexml(self, writer):
Fred Drake1f549022000-09-24 05:21:58 +0000476 writer.write("<" + self.tagName)
Fred Drake16f63292000-10-23 18:09:50 +0000477
Fred Drake4ccf4a12000-11-21 22:02:22 +0000478 attrs = self._get_attributes()
479 a_names = attrs.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000480 a_names.sort()
481
482 for a_name in a_names:
Fred Drake1f549022000-09-24 05:21:58 +0000483 writer.write(" %s=\"" % a_name)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000484 _write_data(writer, attrs[a_name].value)
Fred Drake55c38192000-06-29 19:39:57 +0000485 writer.write("\"")
486 if self.childNodes:
487 writer.write(">")
488 for node in self.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +0000489 node.writexml(writer)
490 writer.write("</%s>" % self.tagName)
Fred Drake55c38192000-06-29 19:39:57 +0000491 else:
492 writer.write("/>")
493
Fred Drake1f549022000-09-24 05:21:58 +0000494 def _get_attributes(self):
495 return AttributeList(self._attrs, self._attrsNS)
Fred Drake55c38192000-06-29 19:39:57 +0000496
Fred Drake1f549022000-09-24 05:21:58 +0000497class Comment(Node):
498 nodeType = Node.COMMENT_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000499 nodeName = "#comment"
500 attributes = None
Fred Drake55c38192000-06-29 19:39:57 +0000501
Fred Drake1f549022000-09-24 05:21:58 +0000502 def __init__(self, data):
503 Node.__init__(self)
504 self.data = self.nodeValue = data
Fred Drake55c38192000-06-29 19:39:57 +0000505
Fred Drake1f549022000-09-24 05:21:58 +0000506 def writexml(self, writer):
507 writer.write("<!--%s-->" % self.data)
508
509class ProcessingInstruction(Node):
510 nodeType = Node.PROCESSING_INSTRUCTION_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000511 attributes = None
Fred Drake1f549022000-09-24 05:21:58 +0000512
513 def __init__(self, target, data):
514 Node.__init__(self)
Fred Drake55c38192000-06-29 19:39:57 +0000515 self.target = self.nodeName = target
516 self.data = self.nodeValue = data
Fred Drake55c38192000-06-29 19:39:57 +0000517
Fred Drake1f549022000-09-24 05:21:58 +0000518 def writexml(self, writer):
519 writer.write("<?%s %s?>" % (self.target, self.data))
Fred Drake55c38192000-06-29 19:39:57 +0000520
Fred Drake1f549022000-09-24 05:21:58 +0000521class Text(Node):
522 nodeType = Node.TEXT_NODE
523 nodeName = "#text"
Fred Drake4ccf4a12000-11-21 22:02:22 +0000524 attributes = None
Fred Drake1f549022000-09-24 05:21:58 +0000525
526 def __init__(self, data):
527 Node.__init__(self)
Fred Drake55c38192000-06-29 19:39:57 +0000528 self.data = self.nodeValue = data
Fred Drake55c38192000-06-29 19:39:57 +0000529
530 def __repr__(self):
Fred Drake1f549022000-09-24 05:21:58 +0000531 if len(self.data) > 10:
532 dotdotdot = "..."
Fred Drake55c38192000-06-29 19:39:57 +0000533 else:
Fred Drake1f549022000-09-24 05:21:58 +0000534 dotdotdot = ""
535 return "<DOM Text node \"%s%s\">" % (self.data[0:10], dotdotdot)
Fred Drake55c38192000-06-29 19:39:57 +0000536
Fred Drakef7cf40d2000-12-14 18:16:11 +0000537 def splitText(self, offset):
538 if offset < 0 or offset > len(self.data):
539 raise ValueError, "illegal offset value for splitText()"
540 newText = Text(self.data[offset:])
541 next = self.nextSibling
542 if self.parentNode and self in self.parentNode.childNodes:
543 if next is None:
544 self.parentNode.appendChild(newText)
545 else:
546 self.parentNode.insertBefore(newText, next)
547 self.data = self.data[:offset]
548 return newText
549
Fred Drake1f549022000-09-24 05:21:58 +0000550 def writexml(self, writer):
551 _write_data(writer, self.data)
Fred Drake55c38192000-06-29 19:39:57 +0000552
Fred Drake1f549022000-09-24 05:21:58 +0000553def _nssplit(qualifiedName):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000554 fields = _string.split(qualifiedName, ':', 1)
Paul Prescod73678da2000-07-01 04:58:47 +0000555 if len(fields) == 2:
556 return fields
557 elif len(fields) == 1:
Fred Drake1f549022000-09-24 05:21:58 +0000558 return ('', fields[0])
Paul Prescod73678da2000-07-01 04:58:47 +0000559
Fred Drakef7cf40d2000-12-14 18:16:11 +0000560
561class DocumentType(Node):
562 nodeType = Node.DOCUMENT_TYPE_NODE
563 nodeValue = None
564 attributes = None
565 name = None
566 publicId = None
567 systemId = None
568 internalSubset = ""
569 entities = None
570 notations = None
571
572 def __init__(self, qualifiedName):
573 Node.__init__(self)
574 if qualifiedName:
575 prefix, localname = _nssplit(qualifiedName)
576 self.name = localname
577
578
579class DOMImplementation:
580 def hasFeature(self, feature, version):
581 if version not in ("1.0", "2.0"):
582 return 0
583 feature = _string.lower(feature)
584 return feature == "core"
585
586 def createDocument(self, namespaceURI, qualifiedName, doctype):
587 if doctype and doctype.parentNode is not None:
588 raise ValueError, "doctype object owned by another DOM tree"
589 doc = Document()
590 if doctype is None:
591 doctype = self.createDocumentType(qualifiedName, None, None)
592 if qualifiedName:
593 prefix, localname = _nssplit(qualifiedName)
594 if prefix == "xml" \
595 and namespaceURI != "http://www.w3.org/XML/1998/namespace":
596 raise ValueError, "illegal use of 'xml' prefix"
597 if prefix and not namespaceURI:
598 raise ValueError, "illegal use of prefix without namespaces"
599 doctype.parentNode = doc
600 doc.doctype = doctype
601 doc.implementation = self
602 return doc
603
604 def createDocumentType(self, qualifiedName, publicId, systemId):
605 doctype = DocumentType(qualifiedName)
606 doctype.publicId = publicId
607 doctype.systemId = systemId
608 return doctype
609
610
Fred Drake1f549022000-09-24 05:21:58 +0000611class Document(Node):
612 nodeType = Node.DOCUMENT_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000613 nodeName = "#document"
614 nodeValue = None
615 attributes = None
Fred Drakef7cf40d2000-12-14 18:16:11 +0000616 doctype = None
617 parentNode = None
618
619 implementation = DOMImplementation()
Fred Drake55c38192000-06-29 19:39:57 +0000620
Fred Drake1f549022000-09-24 05:21:58 +0000621 def appendChild(self, node):
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000622 if node.parentNode is not None:
623 node.parentNode.removeChild(node)
624
Fred Drakef7cf40d2000-12-14 18:16:11 +0000625 if node.nodeType == Node.ELEMENT_NODE \
626 and self._get_documentElement():
627 raise TypeError, "two document elements disallowed"
Fred Drake4ccf4a12000-11-21 22:02:22 +0000628 return Node.appendChild(self, node)
Paul Prescod73678da2000-07-01 04:58:47 +0000629
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000630 def removeChild(self, oldChild):
631 self.childNodes.remove(oldChild)
632 oldChild.nextSibling = oldChild.previousSibling = None
633 oldChild.parentNode = None
634 if self.documentElement is oldChild:
635 self.documentElement = None
636
637 return oldChild
638
Fred Drakef7cf40d2000-12-14 18:16:11 +0000639 def _get_documentElement(self):
640 for node in self.childNodes:
641 if node.nodeType == Node.ELEMENT_NODE:
642 return node
643
644 def unlink(self):
645 if self.doctype is not None:
646 self.doctype.unlink()
647 self.doctype = None
648 Node.unlink(self)
649
Fred Drake1f549022000-09-24 05:21:58 +0000650 createElement = Element
Fred Drake55c38192000-06-29 19:39:57 +0000651
Fred Drake1f549022000-09-24 05:21:58 +0000652 createTextNode = Text
Fred Drake55c38192000-06-29 19:39:57 +0000653
Fred Drake1f549022000-09-24 05:21:58 +0000654 createComment = Comment
Fred Drake55c38192000-06-29 19:39:57 +0000655
Fred Drake1f549022000-09-24 05:21:58 +0000656 createProcessingInstruction = ProcessingInstruction
Fred Drake55c38192000-06-29 19:39:57 +0000657
Fred Drake1f549022000-09-24 05:21:58 +0000658 createAttribute = Attr
Fred Drake55c38192000-06-29 19:39:57 +0000659
660 def createElementNS(self, namespaceURI, qualifiedName):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000661 prefix, localName = _nssplit(qualifiedName)
662 return self.createElement(qualifiedName, namespaceURI,
663 prefix, localName)
Fred Drake55c38192000-06-29 19:39:57 +0000664
665 def createAttributeNS(self, namespaceURI, qualifiedName):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000666 prefix, localName = _nssplit(qualifiedName)
667 return self.createAttribute(qualifiedName, namespaceURI,
668 localName, prefix)
Fred Drake55c38192000-06-29 19:39:57 +0000669
Fred Drake1f549022000-09-24 05:21:58 +0000670 def getElementsByTagNameNS(self, namespaceURI, localName):
671 _getElementsByTagNameNSHelper(self, namespaceURI, localName)
Fred Drake55c38192000-06-29 19:39:57 +0000672
Fred Drake1f549022000-09-24 05:21:58 +0000673 def getElementsByTagName(self, name):
674 rc = []
675 _getElementsByTagNameHelper(self, name, rc)
Fred Drake55c38192000-06-29 19:39:57 +0000676 return rc
677
Fred Drake1f549022000-09-24 05:21:58 +0000678 def writexml(self, writer):
Fred Drake55c38192000-06-29 19:39:57 +0000679 for node in self.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +0000680 node.writexml(writer)
Fred Drake55c38192000-06-29 19:39:57 +0000681
Fred Drake4ccf4a12000-11-21 22:02:22 +0000682def _get_StringIO():
Fred Drakef7cf40d2000-12-14 18:16:11 +0000683 # we can't use cStringIO since it doesn't support Unicode strings
684 from StringIO import StringIO
Fred Drake4ccf4a12000-11-21 22:02:22 +0000685 return StringIO()
686
Fred Drake1f549022000-09-24 05:21:58 +0000687def _doparse(func, args, kwargs):
688 events = apply(func, args, kwargs)
689 toktype, rootNode = events.getEvent()
690 events.expandNode(rootNode)
Fred Drake55c38192000-06-29 19:39:57 +0000691 return rootNode
692
Fred Drake1f549022000-09-24 05:21:58 +0000693def parse(*args, **kwargs):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000694 """Parse a file into a DOM by filename or file object."""
Fred Drake4ccf4a12000-11-21 22:02:22 +0000695 from xml.dom import pulldom
Fred Drake1f549022000-09-24 05:21:58 +0000696 return _doparse(pulldom.parse, args, kwargs)
Fred Drake55c38192000-06-29 19:39:57 +0000697
Fred Drake1f549022000-09-24 05:21:58 +0000698def parseString(*args, **kwargs):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000699 """Parse a file into a DOM from a string."""
Fred Drake4ccf4a12000-11-21 22:02:22 +0000700 from xml.dom import pulldom
Fred Drake1f549022000-09-24 05:21:58 +0000701 return _doparse(pulldom.parseString, args, kwargs)