blob: 2d3755c69b0a48644af04d01ad024bcf86c7d63a [file] [log] [blame]
Fred Drake1f549022000-09-24 05:21:58 +00001"""\
Fred Drakef7cf40d2000-12-14 18:16:11 +00002minidom.py -- a lightweight DOM implementation.
Fred Drake55c38192000-06-29 19:39:57 +00003
Paul Prescod623511b2000-07-21 22:05:49 +00004parse( "foo.xml" )
5
6parseString( "<foo><bar/></foo>" )
7
Fred Drake55c38192000-06-29 19:39:57 +00008Todo:
9=====
10 * convenience methods for getting elements and text.
11 * more testing
12 * bring some of the writer and linearizer code into conformance with this
13 interface
14 * SAX 2 namespaces
15"""
16
Fred Drake1f549022000-09-24 05:21:58 +000017import string
Fred Drake4ccf4a12000-11-21 22:02:22 +000018_string = string
19del string
20
21# localize the types, and allow support for Unicode values if available:
Fred Drake1f549022000-09-24 05:21:58 +000022import types
Fred Drake4ccf4a12000-11-21 22:02:22 +000023_TupleType = types.TupleType
24try:
25 _StringTypes = (types.StringType, types.UnicodeType)
26except AttributeError:
27 _StringTypes = (types.StringType,)
28del types
29
Fred Drakef7cf40d2000-12-14 18:16:11 +000030import xml.dom
31_Node = xml.dom.Node
32del xml
Fred Drake1f549022000-09-24 05:21:58 +000033
Fred Drake55c38192000-06-29 19:39:57 +000034
Fred Drakef7cf40d2000-12-14 18:16:11 +000035class Node(_Node):
Fred Drake1f549022000-09-24 05:21:58 +000036 allnodes = {}
37 _debug = 0
38 _makeParentNodes = 1
39 debug = None
Fred Drake55c38192000-06-29 19:39:57 +000040
Fred Drake1f549022000-09-24 05:21:58 +000041 def __init__(self):
42 self.childNodes = []
Fred Drake16f63292000-10-23 18:09:50 +000043 if Node._debug:
Fred Drake1f549022000-09-24 05:21:58 +000044 index = repr(id(self)) + repr(self.__class__)
45 Node.allnodes[index] = repr(self.__dict__)
46 if Node.debug is None:
Fred Drake4ccf4a12000-11-21 22:02:22 +000047 Node.debug = _get_StringIO()
Paul Prescod6c4753f2000-07-04 03:39:33 +000048 #open( "debug4.out", "w" )
Fred Drake1f549022000-09-24 05:21:58 +000049 Node.debug.write("create %s\n" % index)
Fred Drake55c38192000-06-29 19:39:57 +000050
Fred Drake1f549022000-09-24 05:21:58 +000051 def __getattr__(self, key):
52 if key[0:2] == "__":
Fred Drakef7cf40d2000-12-14 18:16:11 +000053 raise AttributeError, key
Fred Drake55c38192000-06-29 19:39:57 +000054 # getattr should never call getattr!
Fred Drake16f63292000-10-23 18:09:50 +000055 if self.__dict__.has_key("inGetAttr"):
Fred Drake55c38192000-06-29 19:39:57 +000056 del self.inGetAttr
57 raise AttributeError, key
58
Fred Drake1f549022000-09-24 05:21:58 +000059 prefix, attrname = key[:5], key[5:]
60 if prefix == "_get_":
61 self.inGetAttr = 1
Fred Drake16f63292000-10-23 18:09:50 +000062 if hasattr(self, attrname):
Fred Drake55c38192000-06-29 19:39:57 +000063 del self.inGetAttr
Fred Drake16f63292000-10-23 18:09:50 +000064 return (lambda self=self, attrname=attrname:
Fred Drake1f549022000-09-24 05:21:58 +000065 getattr(self, attrname))
Fred Drake55c38192000-06-29 19:39:57 +000066 else:
67 del self.inGetAttr
68 raise AttributeError, key
69 else:
Fred Drake1f549022000-09-24 05:21:58 +000070 self.inGetAttr = 1
Fred Drake55c38192000-06-29 19:39:57 +000071 try:
Fred Drake1f549022000-09-24 05:21:58 +000072 func = getattr(self, "_get_" + key)
Fred Drake55c38192000-06-29 19:39:57 +000073 except AttributeError:
74 raise AttributeError, key
75 del self.inGetAttr
76 return func()
77
Fred Drake1f549022000-09-24 05:21:58 +000078 def __nonzero__(self):
79 return 1
Fred Drake55c38192000-06-29 19:39:57 +000080
Fred Drake1f549022000-09-24 05:21:58 +000081 def toxml(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +000082 writer = _get_StringIO()
Fred Drake1f549022000-09-24 05:21:58 +000083 self.writexml(writer)
Fred Drake55c38192000-06-29 19:39:57 +000084 return writer.getvalue()
85
Fred Drake1f549022000-09-24 05:21:58 +000086 def hasChildNodes(self):
87 if self.childNodes:
88 return 1
89 else:
90 return 0
Fred Drake55c38192000-06-29 19:39:57 +000091
Fred Drake1f549022000-09-24 05:21:58 +000092 def _get_firstChild(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +000093 if self.childNodes:
94 return self.childNodes[0]
Paul Prescod73678da2000-07-01 04:58:47 +000095
Fred Drake1f549022000-09-24 05:21:58 +000096 def _get_lastChild(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +000097 if self.childNodes:
98 return self.childNodes[-1]
Paul Prescod73678da2000-07-01 04:58:47 +000099
Fred Drake1f549022000-09-24 05:21:58 +0000100 def insertBefore(self, newChild, refChild):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000101 if refChild is None:
102 self.appendChild(newChild)
103 else:
104 index = self.childNodes.index(refChild)
105 self.childNodes.insert(index, newChild)
106 newChild.nextSibling = refChild
107 refChild.previousSibling = newChild
108 if index:
109 node = self.childNodes[index-1]
110 node.nextSibling = newChild
111 newChild.previousSibling = node
112 else:
113 newChild.previousSibling = None
114 if self._makeParentNodes:
115 newChild.parentNode = self
116 return newChild
Fred Drake55c38192000-06-29 19:39:57 +0000117
Fred Drake1f549022000-09-24 05:21:58 +0000118 def appendChild(self, node):
Fred Drake13a30692000-10-09 20:04:16 +0000119 if self.childNodes:
120 last = self.lastChild
121 node.previousSibling = last
122 last.nextSibling = node
123 else:
124 node.previousSibling = None
125 node.nextSibling = None
Fred Drake1f549022000-09-24 05:21:58 +0000126 self.childNodes.append(node)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000127 if self._makeParentNodes:
128 node.parentNode = self
Paul Prescod73678da2000-07-01 04:58:47 +0000129 return node
130
Fred Drake1f549022000-09-24 05:21:58 +0000131 def replaceChild(self, newChild, oldChild):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000132 if newChild is oldChild:
133 return
Fred Drake1f549022000-09-24 05:21:58 +0000134 index = self.childNodes.index(oldChild)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000135 self.childNodes[index] = newChild
136 if self._makeParentNodes:
137 newChild.parentNode = self
138 oldChild.parentNode = None
139 newChild.nextSibling = oldChild.nextSibling
140 newChild.previousSibling = oldChild.previousSibling
141 oldChild.newChild = None
142 oldChild.previousSibling = None
143 return oldChild
Paul Prescod73678da2000-07-01 04:58:47 +0000144
Fred Drake1f549022000-09-24 05:21:58 +0000145 def removeChild(self, oldChild):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000146 self.childNodes.remove(oldChild)
147 if self._makeParentNodes:
148 oldChild.parentNode = None
149 return oldChild
150
151 def normalize(self):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000152 L = []
153 for child in self.childNodes:
154 if child.nodeType == Node.TEXT_NODE:
155 data = child.data
156 if data and L and L[-1].nodeType == child.nodeType:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000157 # collapse text node
158 node = L[-1]
159 node.data = node.nodeValue = node.data + child.data
160 node.nextSibling = child.nextSibling
161 child.unlink()
Fred Drakef7cf40d2000-12-14 18:16:11 +0000162 elif data:
163 if L:
164 L[-1].nextSibling = child
165 child.previousSibling = L[-1]
166 else:
167 child.previousSibling = None
168 L.append(child)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000169 else:
Fred Drakef7cf40d2000-12-14 18:16:11 +0000170 # empty text node; discard
171 child.unlink()
172 else:
173 if L:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000174 L[-1].nextSibling = child
175 child.previousSibling = L[-1]
Fred Drakef7cf40d2000-12-14 18:16:11 +0000176 else:
177 child.previousSibling = None
178 L.append(child)
179 if child.nodeType == Node.ELEMENT_NODE:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000180 child.normalize()
Fred Drakef7cf40d2000-12-14 18:16:11 +0000181 self.childNodes[:] = L
Paul Prescod73678da2000-07-01 04:58:47 +0000182
Fred Drake1f549022000-09-24 05:21:58 +0000183 def cloneNode(self, deep):
Paul Prescod73678da2000-07-01 04:58:47 +0000184 import new
Fred Drake4ccf4a12000-11-21 22:02:22 +0000185 clone = new.instance(self.__class__, self.__dict__.copy())
186 if self._makeParentNodes:
187 clone.parentNode = None
188 clone.childNodes = []
189 if deep:
190 for child in self.childNodes:
191 clone.appendChild(child.cloneNode(1))
Paul Prescod73678da2000-07-01 04:58:47 +0000192 return clone
Fred Drake55c38192000-06-29 19:39:57 +0000193
Fred Drake1f549022000-09-24 05:21:58 +0000194 def unlink(self):
195 self.parentNode = None
Fred Drake4ccf4a12000-11-21 22:02:22 +0000196 for child in self.childNodes:
197 child.unlink()
Fred Drake1f549022000-09-24 05:21:58 +0000198 self.childNodes = None
Paul Prescod4221ff02000-10-13 20:11:42 +0000199 self.previousSibling = None
200 self.nextSibling = None
Paul Prescod73678da2000-07-01 04:58:47 +0000201 if Node._debug:
Fred Drake1f549022000-09-24 05:21:58 +0000202 index = repr(id(self)) + repr(self.__class__)
203 self.debug.write("Deleting: %s\n" % index)
Paul Prescod73678da2000-07-01 04:58:47 +0000204 del Node.allnodes[index]
Fred Drake55c38192000-06-29 19:39:57 +0000205
Fred Drake1f549022000-09-24 05:21:58 +0000206def _write_data(writer, data):
Fred Drake55c38192000-06-29 19:39:57 +0000207 "Writes datachars to writer."
Fred Drake4ccf4a12000-11-21 22:02:22 +0000208 replace = _string.replace
209 data = replace(data, "&", "&amp;")
210 data = replace(data, "<", "&lt;")
211 data = replace(data, "\"", "&quot;")
212 data = replace(data, ">", "&gt;")
Fred Drake55c38192000-06-29 19:39:57 +0000213 writer.write(data)
214
Fred Drake1f549022000-09-24 05:21:58 +0000215def _getElementsByTagNameHelper(parent, name, rc):
Fred Drake55c38192000-06-29 19:39:57 +0000216 for node in parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +0000217 if node.nodeType == Node.ELEMENT_NODE and \
218 (name == "*" or node.tagName == name):
219 rc.append(node)
220 _getElementsByTagNameHelper(node, name, rc)
Fred Drake55c38192000-06-29 19:39:57 +0000221 return rc
222
Fred Drake1f549022000-09-24 05:21:58 +0000223def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc):
Fred Drake55c38192000-06-29 19:39:57 +0000224 for node in parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +0000225 if node.nodeType == Node.ELEMENT_NODE:
226 if ((localName == "*" or node.tagName == localName) and
227 (nsURI == "*" or node.namespaceURI == nsURI)):
228 rc.append(node)
Fred Drakef7cf40d2000-12-14 18:16:11 +0000229 _getElementsByTagNameNSHelper(node, nsURI, localName, rc)
230 return rc
Fred Drake55c38192000-06-29 19:39:57 +0000231
232class Attr(Node):
Fred Drake1f549022000-09-24 05:21:58 +0000233 nodeType = Node.ATTRIBUTE_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000234 attributes = None
235 ownerElement = None
Fred Drake1f549022000-09-24 05:21:58 +0000236
237 def __init__(self, qName, namespaceURI="", localName=None, prefix=None):
Fred Drake55c38192000-06-29 19:39:57 +0000238 # skip setattr for performance
Fred Drake4ccf4a12000-11-21 22:02:22 +0000239 d = self.__dict__
240 d["localName"] = localName or qName
241 d["nodeName"] = d["name"] = qName
242 d["namespaceURI"] = namespaceURI
243 d["prefix"] = prefix
Fred Drake1f549022000-09-24 05:21:58 +0000244 Node.__init__(self)
Paul Prescod73678da2000-07-01 04:58:47 +0000245 # nodeValue and value are set elsewhere
Fred Drake55c38192000-06-29 19:39:57 +0000246
Fred Drake1f549022000-09-24 05:21:58 +0000247 def __setattr__(self, name, value):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000248 d = self.__dict__
Fred Drake1f549022000-09-24 05:21:58 +0000249 if name in ("value", "nodeValue"):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000250 d["value"] = d["nodeValue"] = value
251 elif name in ("name", "nodeName"):
252 d["name"] = d["nodeName"] = value
Fred Drake55c38192000-06-29 19:39:57 +0000253 else:
Fred Drakef7cf40d2000-12-14 18:16:11 +0000254 d[name] = value
Fred Drake55c38192000-06-29 19:39:57 +0000255
Fred Drake4ccf4a12000-11-21 22:02:22 +0000256 def cloneNode(self, deep):
257 clone = Node.cloneNode(self, deep)
258 if clone.__dict__.has_key("ownerElement"):
259 del clone.ownerElement
260 return clone
261
Fred Drakef7cf40d2000-12-14 18:16:11 +0000262
263class NamedNodeMap:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000264 """The attribute list is a transient interface to the underlying
265 dictionaries. Mutations here will change the underlying element's
Fred Drakef7cf40d2000-12-14 18:16:11 +0000266 dictionary.
267
268 Ordering is imposed artificially and does not reflect the order of
269 attributes as found in an input document.
270 """
Fred Drake4ccf4a12000-11-21 22:02:22 +0000271
Fred Drake1f549022000-09-24 05:21:58 +0000272 def __init__(self, attrs, attrsNS):
273 self._attrs = attrs
274 self._attrsNS = attrsNS
Fred Drakef7cf40d2000-12-14 18:16:11 +0000275
276 def __getattr__(self, name):
277 if name == "length":
278 return len(self._attrs)
279 raise AttributeError, name
Fred Drake55c38192000-06-29 19:39:57 +0000280
Fred Drake1f549022000-09-24 05:21:58 +0000281 def item(self, index):
Fred Drake55c38192000-06-29 19:39:57 +0000282 try:
Fred Drakef7cf40d2000-12-14 18:16:11 +0000283 return self[self._attrs.keys()[index]]
Fred Drake55c38192000-06-29 19:39:57 +0000284 except IndexError:
285 return None
Fred Drake55c38192000-06-29 19:39:57 +0000286
Fred Drake1f549022000-09-24 05:21:58 +0000287 def items(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000288 L = []
289 for node in self._attrs.values():
290 L.append((node.tagName, node.value))
291 return L
Fred Drake1f549022000-09-24 05:21:58 +0000292
293 def itemsNS(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000294 L = []
295 for node in self._attrs.values():
296 L.append(((node.URI, node.localName), node.value))
297 return L
Fred Drake16f63292000-10-23 18:09:50 +0000298
Fred Drake1f549022000-09-24 05:21:58 +0000299 def keys(self):
Paul Prescod73678da2000-07-01 04:58:47 +0000300 return self._attrs.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000301
Fred Drake1f549022000-09-24 05:21:58 +0000302 def keysNS(self):
Paul Prescod73678da2000-07-01 04:58:47 +0000303 return self._attrsNS.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000304
Fred Drake1f549022000-09-24 05:21:58 +0000305 def values(self):
Paul Prescod73678da2000-07-01 04:58:47 +0000306 return self._attrs.values()
Fred Drake55c38192000-06-29 19:39:57 +0000307
Fred Drake1f549022000-09-24 05:21:58 +0000308 def __len__(self):
Fred Drake55c38192000-06-29 19:39:57 +0000309 return self.length
310
Fred Drake1f549022000-09-24 05:21:58 +0000311 def __cmp__(self, other):
312 if self._attrs is getattr(other, "_attrs", None):
Fred Drake55c38192000-06-29 19:39:57 +0000313 return 0
Fred Drake16f63292000-10-23 18:09:50 +0000314 else:
Fred Drake1f549022000-09-24 05:21:58 +0000315 return cmp(id(self), id(other))
Fred Drake55c38192000-06-29 19:39:57 +0000316
317 #FIXME: is it appropriate to return .value?
Fred Drake1f549022000-09-24 05:21:58 +0000318 def __getitem__(self, attname_or_tuple):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000319 if type(attname_or_tuple) is _TupleType:
Paul Prescod73678da2000-07-01 04:58:47 +0000320 return self._attrsNS[attname_or_tuple]
Fred Drake55c38192000-06-29 19:39:57 +0000321 else:
Paul Prescod73678da2000-07-01 04:58:47 +0000322 return self._attrs[attname_or_tuple]
Fred Drake55c38192000-06-29 19:39:57 +0000323
Paul Prescod1e688272000-07-01 19:21:47 +0000324 # same as set
Fred Drake1f549022000-09-24 05:21:58 +0000325 def __setitem__(self, attname, value):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000326 if type(value) in _StringTypes:
Fred Drake1f549022000-09-24 05:21:58 +0000327 node = Attr(attname)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000328 node.value = value
Paul Prescod1e688272000-07-01 19:21:47 +0000329 else:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000330 if not isinstance(value, Attr):
331 raise TypeError, "value must be a string or Attr object"
Fred Drake1f549022000-09-24 05:21:58 +0000332 node = value
Fred Drakef7cf40d2000-12-14 18:16:11 +0000333 self.setNamedItem(node)
334
335 def setNamedItem(self, node):
336 old = self._attrs.get(node.name)
Paul Prescod1e688272000-07-01 19:21:47 +0000337 if old:
338 old.unlink()
Fred Drake1f549022000-09-24 05:21:58 +0000339 self._attrs[node.name] = node
340 self._attrsNS[(node.namespaceURI, node.localName)] = node
Fred Drakef7cf40d2000-12-14 18:16:11 +0000341 return old
342
343 def setNamedItemNS(self, node):
344 return self.setNamedItem(node)
Paul Prescod73678da2000-07-01 04:58:47 +0000345
Fred Drake1f549022000-09-24 05:21:58 +0000346 def __delitem__(self, attname_or_tuple):
347 node = self[attname_or_tuple]
Paul Prescod73678da2000-07-01 04:58:47 +0000348 node.unlink()
349 del self._attrs[node.name]
350 del self._attrsNS[(node.namespaceURI, node.localName)]
Fred Drakef7cf40d2000-12-14 18:16:11 +0000351 self.length = len(self._attrs)
352
353AttributeList = NamedNodeMap
354
Fred Drake1f549022000-09-24 05:21:58 +0000355
Martin v. Löwisa2fda0d2000-10-07 12:10:28 +0000356class Element(Node):
Fred Drake1f549022000-09-24 05:21:58 +0000357 nodeType = Node.ELEMENT_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000358 nextSibling = None
359 previousSibling = None
Fred Drake1f549022000-09-24 05:21:58 +0000360
361 def __init__(self, tagName, namespaceURI="", prefix="",
362 localName=None):
363 Node.__init__(self)
Fred Drake55c38192000-06-29 19:39:57 +0000364 self.tagName = self.nodeName = tagName
Fred Drake1f549022000-09-24 05:21:58 +0000365 self.localName = localName or tagName
366 self.prefix = prefix
367 self.namespaceURI = namespaceURI
368 self.nodeValue = None
Fred Drake55c38192000-06-29 19:39:57 +0000369
Fred Drake4ccf4a12000-11-21 22:02:22 +0000370 self._attrs = {} # attributes are double-indexed:
371 self._attrsNS = {} # tagName -> Attribute
372 # URI,localName -> Attribute
373 # in the future: consider lazy generation
374 # of attribute objects this is too tricky
375 # for now because of headaches with
376 # namespaces.
377
378 def cloneNode(self, deep):
379 clone = Node.cloneNode(self, deep)
380 clone._attrs = {}
381 clone._attrsNS = {}
382 for attr in self._attrs.values():
383 node = attr.cloneNode(1)
384 clone._attrs[node.name] = node
385 clone._attrsNS[(node.namespaceURI, node.localName)] = node
386 node.ownerElement = clone
387 return clone
388
389 def unlink(self):
390 for attr in self._attrs.values():
391 attr.unlink()
392 self._attrs = None
393 self._attrsNS = None
394 Node.unlink(self)
Fred Drake55c38192000-06-29 19:39:57 +0000395
Fred Drake1f549022000-09-24 05:21:58 +0000396 def getAttribute(self, attname):
Paul Prescod73678da2000-07-01 04:58:47 +0000397 return self._attrs[attname].value
Fred Drake55c38192000-06-29 19:39:57 +0000398
Fred Drake1f549022000-09-24 05:21:58 +0000399 def getAttributeNS(self, namespaceURI, localName):
Paul Prescod73678da2000-07-01 04:58:47 +0000400 return self._attrsNS[(namespaceURI, localName)].value
Fred Drake1f549022000-09-24 05:21:58 +0000401
402 def setAttribute(self, attname, value):
403 attr = Attr(attname)
Fred Drake55c38192000-06-29 19:39:57 +0000404 # for performance
Fred Drake1f549022000-09-24 05:21:58 +0000405 attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
406 self.setAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000407
Fred Drake1f549022000-09-24 05:21:58 +0000408 def setAttributeNS(self, namespaceURI, qualifiedName, value):
409 prefix, localname = _nssplit(qualifiedName)
Fred Drake55c38192000-06-29 19:39:57 +0000410 # for performance
Fred Drake1f549022000-09-24 05:21:58 +0000411 attr = Attr(qualifiedName, namespaceURI, localname, prefix)
412 attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
413 self.setAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000414
Fred Drake1f549022000-09-24 05:21:58 +0000415 def getAttributeNode(self, attrname):
416 return self._attrs.get(attrname)
Paul Prescod73678da2000-07-01 04:58:47 +0000417
Fred Drake1f549022000-09-24 05:21:58 +0000418 def getAttributeNodeNS(self, namespaceURI, localName):
Paul Prescod73678da2000-07-01 04:58:47 +0000419 return self._attrsNS[(namespaceURI, localName)]
420
Fred Drake1f549022000-09-24 05:21:58 +0000421 def setAttributeNode(self, attr):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000422 if attr.ownerElement not in (None, self):
423 raise ValueError, "attribute node already owned"
Fred Drake1f549022000-09-24 05:21:58 +0000424 old = self._attrs.get(attr.name, None)
Paul Prescod73678da2000-07-01 04:58:47 +0000425 if old:
426 old.unlink()
Fred Drake1f549022000-09-24 05:21:58 +0000427 self._attrs[attr.name] = attr
428 self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
Fred Drake4ccf4a12000-11-21 22:02:22 +0000429
430 # This creates a circular reference, but Element.unlink()
431 # breaks the cycle since the references to the attribute
432 # dictionaries are tossed.
433 attr.ownerElement = self
434
435 if old is not attr:
436 # It might have already been part of this node, in which case
437 # it doesn't represent a change, and should not be returned.
438 return old
Fred Drake55c38192000-06-29 19:39:57 +0000439
Fred Drake1f549022000-09-24 05:21:58 +0000440 def removeAttribute(self, name):
Paul Prescod73678da2000-07-01 04:58:47 +0000441 attr = self._attrs[name]
Fred Drake1f549022000-09-24 05:21:58 +0000442 self.removeAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000443
Fred Drake1f549022000-09-24 05:21:58 +0000444 def removeAttributeNS(self, namespaceURI, localName):
Paul Prescod73678da2000-07-01 04:58:47 +0000445 attr = self._attrsNS[(namespaceURI, localName)]
Fred Drake1f549022000-09-24 05:21:58 +0000446 self.removeAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000447
Fred Drake1f549022000-09-24 05:21:58 +0000448 def removeAttributeNode(self, node):
Paul Prescod73678da2000-07-01 04:58:47 +0000449 node.unlink()
450 del self._attrs[node.name]
451 del self._attrsNS[(node.namespaceURI, node.localName)]
Fred Drake16f63292000-10-23 18:09:50 +0000452
Fred Drake1f549022000-09-24 05:21:58 +0000453 def getElementsByTagName(self, name):
454 return _getElementsByTagNameHelper(self, name, [])
Fred Drake55c38192000-06-29 19:39:57 +0000455
Fred Drake1f549022000-09-24 05:21:58 +0000456 def getElementsByTagNameNS(self, namespaceURI, localName):
457 _getElementsByTagNameNSHelper(self, namespaceURI, localName, [])
Fred Drake55c38192000-06-29 19:39:57 +0000458
Fred Drake1f549022000-09-24 05:21:58 +0000459 def __repr__(self):
460 return "<DOM Element: %s at %s>" % (self.tagName, id(self))
Fred Drake55c38192000-06-29 19:39:57 +0000461
462 def writexml(self, writer):
Fred Drake1f549022000-09-24 05:21:58 +0000463 writer.write("<" + self.tagName)
Fred Drake16f63292000-10-23 18:09:50 +0000464
Fred Drake4ccf4a12000-11-21 22:02:22 +0000465 attrs = self._get_attributes()
466 a_names = attrs.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000467 a_names.sort()
468
469 for a_name in a_names:
Fred Drake1f549022000-09-24 05:21:58 +0000470 writer.write(" %s=\"" % a_name)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000471 _write_data(writer, attrs[a_name].value)
Fred Drake55c38192000-06-29 19:39:57 +0000472 writer.write("\"")
473 if self.childNodes:
474 writer.write(">")
475 for node in self.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +0000476 node.writexml(writer)
477 writer.write("</%s>" % self.tagName)
Fred Drake55c38192000-06-29 19:39:57 +0000478 else:
479 writer.write("/>")
480
Fred Drake1f549022000-09-24 05:21:58 +0000481 def _get_attributes(self):
482 return AttributeList(self._attrs, self._attrsNS)
Fred Drake55c38192000-06-29 19:39:57 +0000483
Fred Drake1f549022000-09-24 05:21:58 +0000484class Comment(Node):
485 nodeType = Node.COMMENT_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000486 nodeName = "#comment"
487 attributes = None
Fred Drake55c38192000-06-29 19:39:57 +0000488
Fred Drake1f549022000-09-24 05:21:58 +0000489 def __init__(self, data):
490 Node.__init__(self)
491 self.data = self.nodeValue = data
Fred Drake55c38192000-06-29 19:39:57 +0000492
Fred Drake1f549022000-09-24 05:21:58 +0000493 def writexml(self, writer):
494 writer.write("<!--%s-->" % self.data)
495
496class ProcessingInstruction(Node):
497 nodeType = Node.PROCESSING_INSTRUCTION_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000498 attributes = None
Fred Drake1f549022000-09-24 05:21:58 +0000499
500 def __init__(self, target, data):
501 Node.__init__(self)
Fred Drake55c38192000-06-29 19:39:57 +0000502 self.target = self.nodeName = target
503 self.data = self.nodeValue = data
Fred Drake55c38192000-06-29 19:39:57 +0000504
Fred Drake1f549022000-09-24 05:21:58 +0000505 def writexml(self, writer):
506 writer.write("<?%s %s?>" % (self.target, self.data))
Fred Drake55c38192000-06-29 19:39:57 +0000507
Fred Drake1f549022000-09-24 05:21:58 +0000508class Text(Node):
509 nodeType = Node.TEXT_NODE
510 nodeName = "#text"
Fred Drake4ccf4a12000-11-21 22:02:22 +0000511 attributes = None
Fred Drake1f549022000-09-24 05:21:58 +0000512
513 def __init__(self, data):
514 Node.__init__(self)
Fred Drake55c38192000-06-29 19:39:57 +0000515 self.data = self.nodeValue = data
Fred Drake55c38192000-06-29 19:39:57 +0000516
517 def __repr__(self):
Fred Drake1f549022000-09-24 05:21:58 +0000518 if len(self.data) > 10:
519 dotdotdot = "..."
Fred Drake55c38192000-06-29 19:39:57 +0000520 else:
Fred Drake1f549022000-09-24 05:21:58 +0000521 dotdotdot = ""
522 return "<DOM Text node \"%s%s\">" % (self.data[0:10], dotdotdot)
Fred Drake55c38192000-06-29 19:39:57 +0000523
Fred Drakef7cf40d2000-12-14 18:16:11 +0000524 def splitText(self, offset):
525 if offset < 0 or offset > len(self.data):
526 raise ValueError, "illegal offset value for splitText()"
527 newText = Text(self.data[offset:])
528 next = self.nextSibling
529 if self.parentNode and self in self.parentNode.childNodes:
530 if next is None:
531 self.parentNode.appendChild(newText)
532 else:
533 self.parentNode.insertBefore(newText, next)
534 self.data = self.data[:offset]
535 return newText
536
Fred Drake1f549022000-09-24 05:21:58 +0000537 def writexml(self, writer):
538 _write_data(writer, self.data)
Fred Drake55c38192000-06-29 19:39:57 +0000539
Fred Drake1f549022000-09-24 05:21:58 +0000540def _nssplit(qualifiedName):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000541 fields = _string.split(qualifiedName, ':', 1)
Paul Prescod73678da2000-07-01 04:58:47 +0000542 if len(fields) == 2:
543 return fields
544 elif len(fields) == 1:
Fred Drake1f549022000-09-24 05:21:58 +0000545 return ('', fields[0])
Paul Prescod73678da2000-07-01 04:58:47 +0000546
Fred Drakef7cf40d2000-12-14 18:16:11 +0000547
548class DocumentType(Node):
549 nodeType = Node.DOCUMENT_TYPE_NODE
550 nodeValue = None
551 attributes = None
552 name = None
553 publicId = None
554 systemId = None
555 internalSubset = ""
556 entities = None
557 notations = None
558
559 def __init__(self, qualifiedName):
560 Node.__init__(self)
561 if qualifiedName:
562 prefix, localname = _nssplit(qualifiedName)
563 self.name = localname
564
565
566class DOMImplementation:
567 def hasFeature(self, feature, version):
568 if version not in ("1.0", "2.0"):
569 return 0
570 feature = _string.lower(feature)
571 return feature == "core"
572
573 def createDocument(self, namespaceURI, qualifiedName, doctype):
574 if doctype and doctype.parentNode is not None:
575 raise ValueError, "doctype object owned by another DOM tree"
576 doc = Document()
577 if doctype is None:
578 doctype = self.createDocumentType(qualifiedName, None, None)
579 if qualifiedName:
580 prefix, localname = _nssplit(qualifiedName)
581 if prefix == "xml" \
582 and namespaceURI != "http://www.w3.org/XML/1998/namespace":
583 raise ValueError, "illegal use of 'xml' prefix"
584 if prefix and not namespaceURI:
585 raise ValueError, "illegal use of prefix without namespaces"
586 doctype.parentNode = doc
587 doc.doctype = doctype
588 doc.implementation = self
589 return doc
590
591 def createDocumentType(self, qualifiedName, publicId, systemId):
592 doctype = DocumentType(qualifiedName)
593 doctype.publicId = publicId
594 doctype.systemId = systemId
595 return doctype
596
597
Fred Drake1f549022000-09-24 05:21:58 +0000598class Document(Node):
599 nodeType = Node.DOCUMENT_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000600 nodeName = "#document"
601 nodeValue = None
602 attributes = None
Fred Drakef7cf40d2000-12-14 18:16:11 +0000603 doctype = None
604 parentNode = None
605
606 implementation = DOMImplementation()
Fred Drake55c38192000-06-29 19:39:57 +0000607
Fred Drake1f549022000-09-24 05:21:58 +0000608 def appendChild(self, node):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000609 if node.nodeType == Node.ELEMENT_NODE \
610 and self._get_documentElement():
611 raise TypeError, "two document elements disallowed"
Fred Drake4ccf4a12000-11-21 22:02:22 +0000612 return Node.appendChild(self, node)
Paul Prescod73678da2000-07-01 04:58:47 +0000613
Fred Drakef7cf40d2000-12-14 18:16:11 +0000614 def _get_documentElement(self):
615 for node in self.childNodes:
616 if node.nodeType == Node.ELEMENT_NODE:
617 return node
618
619 def unlink(self):
620 if self.doctype is not None:
621 self.doctype.unlink()
622 self.doctype = None
623 Node.unlink(self)
624
Fred Drake1f549022000-09-24 05:21:58 +0000625 createElement = Element
Fred Drake55c38192000-06-29 19:39:57 +0000626
Fred Drake1f549022000-09-24 05:21:58 +0000627 createTextNode = Text
Fred Drake55c38192000-06-29 19:39:57 +0000628
Fred Drake1f549022000-09-24 05:21:58 +0000629 createComment = Comment
Fred Drake55c38192000-06-29 19:39:57 +0000630
Fred Drake1f549022000-09-24 05:21:58 +0000631 createProcessingInstruction = ProcessingInstruction
Fred Drake55c38192000-06-29 19:39:57 +0000632
Fred Drake1f549022000-09-24 05:21:58 +0000633 createAttribute = Attr
Fred Drake55c38192000-06-29 19:39:57 +0000634
635 def createElementNS(self, namespaceURI, qualifiedName):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000636 prefix, localName = _nssplit(qualifiedName)
637 return self.createElement(qualifiedName, namespaceURI,
638 prefix, localName)
Fred Drake55c38192000-06-29 19:39:57 +0000639
640 def createAttributeNS(self, namespaceURI, qualifiedName):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000641 prefix, localName = _nssplit(qualifiedName)
642 return self.createAttribute(qualifiedName, namespaceURI,
643 localName, prefix)
Fred Drake55c38192000-06-29 19:39:57 +0000644
Fred Drake1f549022000-09-24 05:21:58 +0000645 def getElementsByTagNameNS(self, namespaceURI, localName):
646 _getElementsByTagNameNSHelper(self, namespaceURI, localName)
Fred Drake55c38192000-06-29 19:39:57 +0000647
Fred Drake1f549022000-09-24 05:21:58 +0000648 def getElementsByTagName(self, name):
649 rc = []
650 _getElementsByTagNameHelper(self, name, rc)
Fred Drake55c38192000-06-29 19:39:57 +0000651 return rc
652
Fred Drake1f549022000-09-24 05:21:58 +0000653 def writexml(self, writer):
Fred Drake55c38192000-06-29 19:39:57 +0000654 for node in self.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +0000655 node.writexml(writer)
Fred Drake55c38192000-06-29 19:39:57 +0000656
Fred Drake4ccf4a12000-11-21 22:02:22 +0000657def _get_StringIO():
Fred Drakef7cf40d2000-12-14 18:16:11 +0000658 # we can't use cStringIO since it doesn't support Unicode strings
659 from StringIO import StringIO
Fred Drake4ccf4a12000-11-21 22:02:22 +0000660 return StringIO()
661
Fred Drake1f549022000-09-24 05:21:58 +0000662def _doparse(func, args, kwargs):
663 events = apply(func, args, kwargs)
664 toktype, rootNode = events.getEvent()
665 events.expandNode(rootNode)
Fred Drake55c38192000-06-29 19:39:57 +0000666 return rootNode
667
Fred Drake1f549022000-09-24 05:21:58 +0000668def parse(*args, **kwargs):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000669 """Parse a file into a DOM by filename or file object."""
Fred Drake4ccf4a12000-11-21 22:02:22 +0000670 from xml.dom import pulldom
Fred Drake1f549022000-09-24 05:21:58 +0000671 return _doparse(pulldom.parse, args, kwargs)
Fred Drake55c38192000-06-29 19:39:57 +0000672
Fred Drake1f549022000-09-24 05:21:58 +0000673def parseString(*args, **kwargs):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000674 """Parse a file into a DOM from a string."""
Fred Drake4ccf4a12000-11-21 22:02:22 +0000675 from xml.dom import pulldom
Fred Drake1f549022000-09-24 05:21:58 +0000676 return _doparse(pulldom.parseString, args, kwargs)