blob: 65af4c72315dab1c2d7165389f457b576628cda4 [file] [log] [blame]
Fred Drake1f549022000-09-24 05:21:58 +00001"""\
Fred Drakef7cf40d2000-12-14 18:16:11 +00002minidom.py -- a lightweight DOM implementation.
Fred Drake55c38192000-06-29 19:39:57 +00003
Paul Prescod623511b2000-07-21 22:05:49 +00004parse( "foo.xml" )
5
6parseString( "<foo><bar/></foo>" )
7
Fred Drake55c38192000-06-29 19:39:57 +00008Todo:
9=====
10 * convenience methods for getting elements and text.
11 * more testing
12 * bring some of the writer and linearizer code into conformance with this
13 interface
14 * SAX 2 namespaces
15"""
16
Fred Drake1f549022000-09-24 05:21:58 +000017import string
Fred Drake4ccf4a12000-11-21 22:02:22 +000018_string = string
19del string
20
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +000021from xml.dom import HierarchyRequestErr
22
Fred Drake4ccf4a12000-11-21 22:02:22 +000023# localize the types, and allow support for Unicode values if available:
Fred Drake1f549022000-09-24 05:21:58 +000024import types
Fred Drake4ccf4a12000-11-21 22:02:22 +000025_TupleType = types.TupleType
26try:
27 _StringTypes = (types.StringType, types.UnicodeType)
28except AttributeError:
29 _StringTypes = (types.StringType,)
30del types
31
Fred Drakef7cf40d2000-12-14 18:16:11 +000032import xml.dom
33_Node = xml.dom.Node
34del xml
Fred Drake1f549022000-09-24 05:21:58 +000035
Fred Drake55c38192000-06-29 19:39:57 +000036
Fred Drakef7cf40d2000-12-14 18:16:11 +000037class Node(_Node):
Fred Drake1f549022000-09-24 05:21:58 +000038 allnodes = {}
39 _debug = 0
40 _makeParentNodes = 1
41 debug = None
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +000042 childNodeTypes = ()
43
Fred Drake1f549022000-09-24 05:21:58 +000044 def __init__(self):
45 self.childNodes = []
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +000046 self.parentNode = None
Fred Drake16f63292000-10-23 18:09:50 +000047 if Node._debug:
Fred Drake1f549022000-09-24 05:21:58 +000048 index = repr(id(self)) + repr(self.__class__)
49 Node.allnodes[index] = repr(self.__dict__)
50 if Node.debug is None:
Fred Drake4ccf4a12000-11-21 22:02:22 +000051 Node.debug = _get_StringIO()
Paul Prescod6c4753f2000-07-04 03:39:33 +000052 #open( "debug4.out", "w" )
Fred Drake1f549022000-09-24 05:21:58 +000053 Node.debug.write("create %s\n" % index)
Fred Drake55c38192000-06-29 19:39:57 +000054
Fred Drake1f549022000-09-24 05:21:58 +000055 def __getattr__(self, key):
56 if key[0:2] == "__":
Fred Drakef7cf40d2000-12-14 18:16:11 +000057 raise AttributeError, key
Fred Drake55c38192000-06-29 19:39:57 +000058 # getattr should never call getattr!
Fred Drake16f63292000-10-23 18:09:50 +000059 if self.__dict__.has_key("inGetAttr"):
Fred Drake55c38192000-06-29 19:39:57 +000060 del self.inGetAttr
61 raise AttributeError, key
62
Fred Drake1f549022000-09-24 05:21:58 +000063 prefix, attrname = key[:5], key[5:]
64 if prefix == "_get_":
65 self.inGetAttr = 1
Fred Drake16f63292000-10-23 18:09:50 +000066 if hasattr(self, attrname):
Fred Drake55c38192000-06-29 19:39:57 +000067 del self.inGetAttr
Fred Drake16f63292000-10-23 18:09:50 +000068 return (lambda self=self, attrname=attrname:
Fred Drake1f549022000-09-24 05:21:58 +000069 getattr(self, attrname))
Fred Drake55c38192000-06-29 19:39:57 +000070 else:
71 del self.inGetAttr
72 raise AttributeError, key
73 else:
Fred Drake1f549022000-09-24 05:21:58 +000074 self.inGetAttr = 1
Fred Drake55c38192000-06-29 19:39:57 +000075 try:
Fred Drake1f549022000-09-24 05:21:58 +000076 func = getattr(self, "_get_" + key)
Fred Drake55c38192000-06-29 19:39:57 +000077 except AttributeError:
78 raise AttributeError, key
79 del self.inGetAttr
80 return func()
81
Fred Drake1f549022000-09-24 05:21:58 +000082 def __nonzero__(self):
83 return 1
Fred Drake55c38192000-06-29 19:39:57 +000084
Fred Drake1f549022000-09-24 05:21:58 +000085 def toxml(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +000086 writer = _get_StringIO()
Fred Drake1f549022000-09-24 05:21:58 +000087 self.writexml(writer)
Fred Drake55c38192000-06-29 19:39:57 +000088 return writer.getvalue()
89
Fred Drake1f549022000-09-24 05:21:58 +000090 def hasChildNodes(self):
91 if self.childNodes:
92 return 1
93 else:
94 return 0
Fred Drake55c38192000-06-29 19:39:57 +000095
Fred Drake1f549022000-09-24 05:21:58 +000096 def _get_firstChild(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +000097 if self.childNodes:
98 return self.childNodes[0]
Paul Prescod73678da2000-07-01 04:58:47 +000099
Fred Drake1f549022000-09-24 05:21:58 +0000100 def _get_lastChild(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000101 if self.childNodes:
102 return self.childNodes[-1]
Paul Prescod73678da2000-07-01 04:58:47 +0000103
Fred Drake1f549022000-09-24 05:21:58 +0000104 def insertBefore(self, newChild, refChild):
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000105 if newChild.nodeType not in self.childNodeTypes:
106 raise HierarchyRequestErr, \
107 "%s cannot be child of %s" % (repr(newChild), repr(self) )
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000108 if newChild.parentNode is not None:
109 newChild.parentNode.removeChild(newChild)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000110 if refChild is None:
111 self.appendChild(newChild)
112 else:
113 index = self.childNodes.index(refChild)
114 self.childNodes.insert(index, newChild)
115 newChild.nextSibling = refChild
116 refChild.previousSibling = newChild
117 if index:
118 node = self.childNodes[index-1]
119 node.nextSibling = newChild
120 newChild.previousSibling = node
121 else:
122 newChild.previousSibling = None
123 if self._makeParentNodes:
124 newChild.parentNode = self
125 return newChild
Fred Drake55c38192000-06-29 19:39:57 +0000126
Fred Drake1f549022000-09-24 05:21:58 +0000127 def appendChild(self, node):
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000128 if node.nodeType not in self.childNodeTypes:
129 raise HierarchyRequestErr, \
130 "%s cannot be child of %s" % (repr(node), repr(self) )
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000131 if node.parentNode is not None:
132 node.parentNode.removeChild(node)
Fred Drake13a30692000-10-09 20:04:16 +0000133 if self.childNodes:
134 last = self.lastChild
135 node.previousSibling = last
136 last.nextSibling = node
137 else:
138 node.previousSibling = None
139 node.nextSibling = None
Fred Drake1f549022000-09-24 05:21:58 +0000140 self.childNodes.append(node)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000141 if self._makeParentNodes:
142 node.parentNode = self
Paul Prescod73678da2000-07-01 04:58:47 +0000143 return node
144
Fred Drake1f549022000-09-24 05:21:58 +0000145 def replaceChild(self, newChild, oldChild):
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000146 if newChild.nodeType not in self.childNodeTypes:
147 raise HierarchyRequestErr, \
148 "%s cannot be child of %s" % (repr(newChild), repr(self) )
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000149 if newChild.parentNode is not None:
150 newChild.parentNode.removeChild(newChild)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000151 if newChild is oldChild:
152 return
Fred Drake1f549022000-09-24 05:21:58 +0000153 index = self.childNodes.index(oldChild)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000154 self.childNodes[index] = newChild
155 if self._makeParentNodes:
156 newChild.parentNode = self
157 oldChild.parentNode = None
158 newChild.nextSibling = oldChild.nextSibling
159 newChild.previousSibling = oldChild.previousSibling
Martin v. Löwis156c3372000-12-28 18:40:56 +0000160 oldChild.nextSibling = None
Fred Drake4ccf4a12000-11-21 22:02:22 +0000161 oldChild.previousSibling = None
Martin v. Löwis156c3372000-12-28 18:40:56 +0000162 if newChild.previousSibling:
163 newChild.previousSibling.nextSibling = newChild
164 if newChild.nextSibling:
165 newChild.nextSibling.previousSibling = newChild
Fred Drake4ccf4a12000-11-21 22:02:22 +0000166 return oldChild
Paul Prescod73678da2000-07-01 04:58:47 +0000167
Fred Drake1f549022000-09-24 05:21:58 +0000168 def removeChild(self, oldChild):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000169 self.childNodes.remove(oldChild)
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000170 if oldChild.nextSibling is not None:
171 oldChild.nextSibling.previousSibling = oldChild.previousSibling
172 if oldChild.previousSibling is not None:
173 oldChild.previousSibling.nextSibling = oldChild.nextSibling
174 oldChild.nextSibling = oldChild.previousSibling = None
175
Fred Drake4ccf4a12000-11-21 22:02:22 +0000176 if self._makeParentNodes:
177 oldChild.parentNode = None
178 return oldChild
179
180 def normalize(self):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000181 L = []
182 for child in self.childNodes:
183 if child.nodeType == Node.TEXT_NODE:
184 data = child.data
185 if data and L and L[-1].nodeType == child.nodeType:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000186 # collapse text node
187 node = L[-1]
188 node.data = node.nodeValue = node.data + child.data
189 node.nextSibling = child.nextSibling
190 child.unlink()
Fred Drakef7cf40d2000-12-14 18:16:11 +0000191 elif data:
192 if L:
193 L[-1].nextSibling = child
194 child.previousSibling = L[-1]
195 else:
196 child.previousSibling = None
197 L.append(child)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000198 else:
Fred Drakef7cf40d2000-12-14 18:16:11 +0000199 # empty text node; discard
200 child.unlink()
201 else:
202 if L:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000203 L[-1].nextSibling = child
204 child.previousSibling = L[-1]
Fred Drakef7cf40d2000-12-14 18:16:11 +0000205 else:
206 child.previousSibling = None
207 L.append(child)
208 if child.nodeType == Node.ELEMENT_NODE:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000209 child.normalize()
Fred Drakef7cf40d2000-12-14 18:16:11 +0000210 self.childNodes[:] = L
Paul Prescod73678da2000-07-01 04:58:47 +0000211
Fred Drake1f549022000-09-24 05:21:58 +0000212 def cloneNode(self, deep):
Paul Prescod73678da2000-07-01 04:58:47 +0000213 import new
Fred Drake4ccf4a12000-11-21 22:02:22 +0000214 clone = new.instance(self.__class__, self.__dict__.copy())
215 if self._makeParentNodes:
216 clone.parentNode = None
217 clone.childNodes = []
218 if deep:
219 for child in self.childNodes:
220 clone.appendChild(child.cloneNode(1))
Paul Prescod73678da2000-07-01 04:58:47 +0000221 return clone
Fred Drake55c38192000-06-29 19:39:57 +0000222
Fred Drake1f549022000-09-24 05:21:58 +0000223 def unlink(self):
224 self.parentNode = None
Fred Drake4ccf4a12000-11-21 22:02:22 +0000225 for child in self.childNodes:
226 child.unlink()
Fred Drake1f549022000-09-24 05:21:58 +0000227 self.childNodes = None
Paul Prescod4221ff02000-10-13 20:11:42 +0000228 self.previousSibling = None
229 self.nextSibling = None
Paul Prescod73678da2000-07-01 04:58:47 +0000230 if Node._debug:
Fred Drake1f549022000-09-24 05:21:58 +0000231 index = repr(id(self)) + repr(self.__class__)
232 self.debug.write("Deleting: %s\n" % index)
Paul Prescod73678da2000-07-01 04:58:47 +0000233 del Node.allnodes[index]
Fred Drake55c38192000-06-29 19:39:57 +0000234
Fred Drake1f549022000-09-24 05:21:58 +0000235def _write_data(writer, data):
Fred Drake55c38192000-06-29 19:39:57 +0000236 "Writes datachars to writer."
Fred Drake4ccf4a12000-11-21 22:02:22 +0000237 replace = _string.replace
238 data = replace(data, "&", "&amp;")
239 data = replace(data, "<", "&lt;")
240 data = replace(data, "\"", "&quot;")
241 data = replace(data, ">", "&gt;")
Fred Drake55c38192000-06-29 19:39:57 +0000242 writer.write(data)
243
Fred Drake1f549022000-09-24 05:21:58 +0000244def _getElementsByTagNameHelper(parent, name, rc):
Fred Drake55c38192000-06-29 19:39:57 +0000245 for node in parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +0000246 if node.nodeType == Node.ELEMENT_NODE and \
247 (name == "*" or node.tagName == name):
248 rc.append(node)
249 _getElementsByTagNameHelper(node, name, rc)
Fred Drake55c38192000-06-29 19:39:57 +0000250 return rc
251
Fred Drake1f549022000-09-24 05:21:58 +0000252def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc):
Fred Drake55c38192000-06-29 19:39:57 +0000253 for node in parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +0000254 if node.nodeType == Node.ELEMENT_NODE:
255 if ((localName == "*" or node.tagName == localName) and
256 (nsURI == "*" or node.namespaceURI == nsURI)):
257 rc.append(node)
Fred Drakef7cf40d2000-12-14 18:16:11 +0000258 _getElementsByTagNameNSHelper(node, nsURI, localName, rc)
259 return rc
Fred Drake55c38192000-06-29 19:39:57 +0000260
261class Attr(Node):
Fred Drake1f549022000-09-24 05:21:58 +0000262 nodeType = Node.ATTRIBUTE_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000263 attributes = None
264 ownerElement = None
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000265 childNodeTypes = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
266
Fred Drake1f549022000-09-24 05:21:58 +0000267 def __init__(self, qName, namespaceURI="", localName=None, prefix=None):
Fred Drake55c38192000-06-29 19:39:57 +0000268 # skip setattr for performance
Fred Drake4ccf4a12000-11-21 22:02:22 +0000269 d = self.__dict__
270 d["localName"] = localName or qName
271 d["nodeName"] = d["name"] = qName
272 d["namespaceURI"] = namespaceURI
273 d["prefix"] = prefix
Fred Drake1f549022000-09-24 05:21:58 +0000274 Node.__init__(self)
Paul Prescod73678da2000-07-01 04:58:47 +0000275 # nodeValue and value are set elsewhere
Fred Drake55c38192000-06-29 19:39:57 +0000276
Fred Drake1f549022000-09-24 05:21:58 +0000277 def __setattr__(self, name, value):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000278 d = self.__dict__
Fred Drake1f549022000-09-24 05:21:58 +0000279 if name in ("value", "nodeValue"):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000280 d["value"] = d["nodeValue"] = value
281 elif name in ("name", "nodeName"):
282 d["name"] = d["nodeName"] = value
Fred Drake55c38192000-06-29 19:39:57 +0000283 else:
Fred Drakef7cf40d2000-12-14 18:16:11 +0000284 d[name] = value
Fred Drake55c38192000-06-29 19:39:57 +0000285
Fred Drake4ccf4a12000-11-21 22:02:22 +0000286 def cloneNode(self, deep):
287 clone = Node.cloneNode(self, deep)
288 if clone.__dict__.has_key("ownerElement"):
289 del clone.ownerElement
290 return clone
291
Fred Drakef7cf40d2000-12-14 18:16:11 +0000292
293class NamedNodeMap:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000294 """The attribute list is a transient interface to the underlying
295 dictionaries. Mutations here will change the underlying element's
Fred Drakef7cf40d2000-12-14 18:16:11 +0000296 dictionary.
297
298 Ordering is imposed artificially and does not reflect the order of
299 attributes as found in an input document.
300 """
Fred Drake4ccf4a12000-11-21 22:02:22 +0000301
Fred Drake1f549022000-09-24 05:21:58 +0000302 def __init__(self, attrs, attrsNS):
303 self._attrs = attrs
304 self._attrsNS = attrsNS
Fred Drakef7cf40d2000-12-14 18:16:11 +0000305
306 def __getattr__(self, name):
307 if name == "length":
308 return len(self._attrs)
309 raise AttributeError, name
Fred Drake55c38192000-06-29 19:39:57 +0000310
Fred Drake1f549022000-09-24 05:21:58 +0000311 def item(self, index):
Fred Drake55c38192000-06-29 19:39:57 +0000312 try:
Fred Drakef7cf40d2000-12-14 18:16:11 +0000313 return self[self._attrs.keys()[index]]
Fred Drake55c38192000-06-29 19:39:57 +0000314 except IndexError:
315 return None
Fred Drake55c38192000-06-29 19:39:57 +0000316
Fred Drake1f549022000-09-24 05:21:58 +0000317 def items(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000318 L = []
319 for node in self._attrs.values():
320 L.append((node.tagName, node.value))
321 return L
Fred Drake1f549022000-09-24 05:21:58 +0000322
323 def itemsNS(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000324 L = []
325 for node in self._attrs.values():
326 L.append(((node.URI, node.localName), node.value))
327 return L
Fred Drake16f63292000-10-23 18:09:50 +0000328
Fred Drake1f549022000-09-24 05:21:58 +0000329 def keys(self):
Paul Prescod73678da2000-07-01 04:58:47 +0000330 return self._attrs.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000331
Fred Drake1f549022000-09-24 05:21:58 +0000332 def keysNS(self):
Paul Prescod73678da2000-07-01 04:58:47 +0000333 return self._attrsNS.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000334
Fred Drake1f549022000-09-24 05:21:58 +0000335 def values(self):
Paul Prescod73678da2000-07-01 04:58:47 +0000336 return self._attrs.values()
Fred Drake55c38192000-06-29 19:39:57 +0000337
Fred Drake1f549022000-09-24 05:21:58 +0000338 def __len__(self):
Fred Drake55c38192000-06-29 19:39:57 +0000339 return self.length
340
Fred Drake1f549022000-09-24 05:21:58 +0000341 def __cmp__(self, other):
342 if self._attrs is getattr(other, "_attrs", None):
Fred Drake55c38192000-06-29 19:39:57 +0000343 return 0
Fred Drake16f63292000-10-23 18:09:50 +0000344 else:
Fred Drake1f549022000-09-24 05:21:58 +0000345 return cmp(id(self), id(other))
Fred Drake55c38192000-06-29 19:39:57 +0000346
347 #FIXME: is it appropriate to return .value?
Fred Drake1f549022000-09-24 05:21:58 +0000348 def __getitem__(self, attname_or_tuple):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000349 if type(attname_or_tuple) is _TupleType:
Paul Prescod73678da2000-07-01 04:58:47 +0000350 return self._attrsNS[attname_or_tuple]
Fred Drake55c38192000-06-29 19:39:57 +0000351 else:
Paul Prescod73678da2000-07-01 04:58:47 +0000352 return self._attrs[attname_or_tuple]
Fred Drake55c38192000-06-29 19:39:57 +0000353
Paul Prescod1e688272000-07-01 19:21:47 +0000354 # same as set
Fred Drake1f549022000-09-24 05:21:58 +0000355 def __setitem__(self, attname, value):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000356 if type(value) in _StringTypes:
Fred Drake1f549022000-09-24 05:21:58 +0000357 node = Attr(attname)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000358 node.value = value
Paul Prescod1e688272000-07-01 19:21:47 +0000359 else:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000360 if not isinstance(value, Attr):
361 raise TypeError, "value must be a string or Attr object"
Fred Drake1f549022000-09-24 05:21:58 +0000362 node = value
Fred Drakef7cf40d2000-12-14 18:16:11 +0000363 self.setNamedItem(node)
364
365 def setNamedItem(self, node):
366 old = self._attrs.get(node.name)
Paul Prescod1e688272000-07-01 19:21:47 +0000367 if old:
368 old.unlink()
Fred Drake1f549022000-09-24 05:21:58 +0000369 self._attrs[node.name] = node
370 self._attrsNS[(node.namespaceURI, node.localName)] = node
Fred Drakef7cf40d2000-12-14 18:16:11 +0000371 return old
372
373 def setNamedItemNS(self, node):
374 return self.setNamedItem(node)
Paul Prescod73678da2000-07-01 04:58:47 +0000375
Fred Drake1f549022000-09-24 05:21:58 +0000376 def __delitem__(self, attname_or_tuple):
377 node = self[attname_or_tuple]
Paul Prescod73678da2000-07-01 04:58:47 +0000378 node.unlink()
379 del self._attrs[node.name]
380 del self._attrsNS[(node.namespaceURI, node.localName)]
Fred Drakef7cf40d2000-12-14 18:16:11 +0000381 self.length = len(self._attrs)
382
383AttributeList = NamedNodeMap
384
Fred Drake1f549022000-09-24 05:21:58 +0000385
Martin v. Löwisa2fda0d2000-10-07 12:10:28 +0000386class Element(Node):
Fred Drake1f549022000-09-24 05:21:58 +0000387 nodeType = Node.ELEMENT_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000388 nextSibling = None
389 previousSibling = None
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000390 childNodeTypes = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
391 Node.COMMENT_NODE, Node.TEXT_NODE,
392 Node.CDATA_SECTION_NODE, Node.ENTITY_REFERENCE_NODE)
393
Fred Drake1f549022000-09-24 05:21:58 +0000394 def __init__(self, tagName, namespaceURI="", prefix="",
395 localName=None):
396 Node.__init__(self)
Fred Drake55c38192000-06-29 19:39:57 +0000397 self.tagName = self.nodeName = tagName
Fred Drake1f549022000-09-24 05:21:58 +0000398 self.localName = localName or tagName
399 self.prefix = prefix
400 self.namespaceURI = namespaceURI
401 self.nodeValue = None
Fred Drake55c38192000-06-29 19:39:57 +0000402
Fred Drake4ccf4a12000-11-21 22:02:22 +0000403 self._attrs = {} # attributes are double-indexed:
404 self._attrsNS = {} # tagName -> Attribute
405 # URI,localName -> Attribute
406 # in the future: consider lazy generation
407 # of attribute objects this is too tricky
408 # for now because of headaches with
409 # namespaces.
410
411 def cloneNode(self, deep):
412 clone = Node.cloneNode(self, deep)
413 clone._attrs = {}
414 clone._attrsNS = {}
415 for attr in self._attrs.values():
416 node = attr.cloneNode(1)
417 clone._attrs[node.name] = node
418 clone._attrsNS[(node.namespaceURI, node.localName)] = node
419 node.ownerElement = clone
420 return clone
421
422 def unlink(self):
423 for attr in self._attrs.values():
424 attr.unlink()
425 self._attrs = None
426 self._attrsNS = None
427 Node.unlink(self)
Fred Drake55c38192000-06-29 19:39:57 +0000428
Fred Drake1f549022000-09-24 05:21:58 +0000429 def getAttribute(self, attname):
Paul Prescod73678da2000-07-01 04:58:47 +0000430 return self._attrs[attname].value
Fred Drake55c38192000-06-29 19:39:57 +0000431
Fred Drake1f549022000-09-24 05:21:58 +0000432 def getAttributeNS(self, namespaceURI, localName):
Paul Prescod73678da2000-07-01 04:58:47 +0000433 return self._attrsNS[(namespaceURI, localName)].value
Fred Drake1f549022000-09-24 05:21:58 +0000434
435 def setAttribute(self, attname, value):
436 attr = Attr(attname)
Fred Drake55c38192000-06-29 19:39:57 +0000437 # for performance
Fred Drake1f549022000-09-24 05:21:58 +0000438 attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
439 self.setAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000440
Fred Drake1f549022000-09-24 05:21:58 +0000441 def setAttributeNS(self, namespaceURI, qualifiedName, value):
442 prefix, localname = _nssplit(qualifiedName)
Fred Drake55c38192000-06-29 19:39:57 +0000443 # for performance
Fred Drake1f549022000-09-24 05:21:58 +0000444 attr = Attr(qualifiedName, namespaceURI, localname, prefix)
445 attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
446 self.setAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000447
Fred Drake1f549022000-09-24 05:21:58 +0000448 def getAttributeNode(self, attrname):
449 return self._attrs.get(attrname)
Paul Prescod73678da2000-07-01 04:58:47 +0000450
Fred Drake1f549022000-09-24 05:21:58 +0000451 def getAttributeNodeNS(self, namespaceURI, localName):
Paul Prescod73678da2000-07-01 04:58:47 +0000452 return self._attrsNS[(namespaceURI, localName)]
453
Fred Drake1f549022000-09-24 05:21:58 +0000454 def setAttributeNode(self, attr):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000455 if attr.ownerElement not in (None, self):
456 raise ValueError, "attribute node already owned"
Fred Drake1f549022000-09-24 05:21:58 +0000457 old = self._attrs.get(attr.name, None)
Paul Prescod73678da2000-07-01 04:58:47 +0000458 if old:
459 old.unlink()
Fred Drake1f549022000-09-24 05:21:58 +0000460 self._attrs[attr.name] = attr
461 self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
Fred Drake4ccf4a12000-11-21 22:02:22 +0000462
463 # This creates a circular reference, but Element.unlink()
464 # breaks the cycle since the references to the attribute
465 # dictionaries are tossed.
466 attr.ownerElement = self
467
468 if old is not attr:
469 # It might have already been part of this node, in which case
470 # it doesn't represent a change, and should not be returned.
471 return old
Fred Drake55c38192000-06-29 19:39:57 +0000472
Fred Drake1f549022000-09-24 05:21:58 +0000473 def removeAttribute(self, name):
Paul Prescod73678da2000-07-01 04:58:47 +0000474 attr = self._attrs[name]
Fred Drake1f549022000-09-24 05:21:58 +0000475 self.removeAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000476
Fred Drake1f549022000-09-24 05:21:58 +0000477 def removeAttributeNS(self, namespaceURI, localName):
Paul Prescod73678da2000-07-01 04:58:47 +0000478 attr = self._attrsNS[(namespaceURI, localName)]
Fred Drake1f549022000-09-24 05:21:58 +0000479 self.removeAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000480
Fred Drake1f549022000-09-24 05:21:58 +0000481 def removeAttributeNode(self, node):
Paul Prescod73678da2000-07-01 04:58:47 +0000482 node.unlink()
483 del self._attrs[node.name]
484 del self._attrsNS[(node.namespaceURI, node.localName)]
Fred Drake16f63292000-10-23 18:09:50 +0000485
Martin v. Löwis156c3372000-12-28 18:40:56 +0000486 def hasAttribute(self, name):
487 return self._attrs.has_key(name)
488
489 def hasAttributeNS(self, namespaceURI, localName):
490 return self._attrsNS.has_key((namespaceURI, localName))
491
Fred Drake1f549022000-09-24 05:21:58 +0000492 def getElementsByTagName(self, name):
493 return _getElementsByTagNameHelper(self, name, [])
Fred Drake55c38192000-06-29 19:39:57 +0000494
Fred Drake1f549022000-09-24 05:21:58 +0000495 def getElementsByTagNameNS(self, namespaceURI, localName):
496 _getElementsByTagNameNSHelper(self, namespaceURI, localName, [])
Fred Drake55c38192000-06-29 19:39:57 +0000497
Fred Drake1f549022000-09-24 05:21:58 +0000498 def __repr__(self):
499 return "<DOM Element: %s at %s>" % (self.tagName, id(self))
Fred Drake55c38192000-06-29 19:39:57 +0000500
501 def writexml(self, writer):
Fred Drake1f549022000-09-24 05:21:58 +0000502 writer.write("<" + self.tagName)
Fred Drake16f63292000-10-23 18:09:50 +0000503
Fred Drake4ccf4a12000-11-21 22:02:22 +0000504 attrs = self._get_attributes()
505 a_names = attrs.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000506 a_names.sort()
507
508 for a_name in a_names:
Fred Drake1f549022000-09-24 05:21:58 +0000509 writer.write(" %s=\"" % a_name)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000510 _write_data(writer, attrs[a_name].value)
Fred Drake55c38192000-06-29 19:39:57 +0000511 writer.write("\"")
512 if self.childNodes:
513 writer.write(">")
514 for node in self.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +0000515 node.writexml(writer)
516 writer.write("</%s>" % self.tagName)
Fred Drake55c38192000-06-29 19:39:57 +0000517 else:
518 writer.write("/>")
519
Fred Drake1f549022000-09-24 05:21:58 +0000520 def _get_attributes(self):
521 return AttributeList(self._attrs, self._attrsNS)
Fred Drake55c38192000-06-29 19:39:57 +0000522
Fred Drake1f549022000-09-24 05:21:58 +0000523class Comment(Node):
524 nodeType = Node.COMMENT_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000525 nodeName = "#comment"
526 attributes = None
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000527 childNodeTypes = ()
528
Fred Drake1f549022000-09-24 05:21:58 +0000529 def __init__(self, data):
530 Node.__init__(self)
531 self.data = self.nodeValue = data
Fred Drake55c38192000-06-29 19:39:57 +0000532
Fred Drake1f549022000-09-24 05:21:58 +0000533 def writexml(self, writer):
534 writer.write("<!--%s-->" % self.data)
535
536class ProcessingInstruction(Node):
537 nodeType = Node.PROCESSING_INSTRUCTION_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000538 attributes = None
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000539 childNodeTypes = ()
540
Fred Drake1f549022000-09-24 05:21:58 +0000541 def __init__(self, target, data):
542 Node.__init__(self)
Fred Drake55c38192000-06-29 19:39:57 +0000543 self.target = self.nodeName = target
544 self.data = self.nodeValue = data
Fred Drake55c38192000-06-29 19:39:57 +0000545
Fred Drake1f549022000-09-24 05:21:58 +0000546 def writexml(self, writer):
547 writer.write("<?%s %s?>" % (self.target, self.data))
Fred Drake55c38192000-06-29 19:39:57 +0000548
Fred Drake1f549022000-09-24 05:21:58 +0000549class Text(Node):
550 nodeType = Node.TEXT_NODE
551 nodeName = "#text"
Fred Drake4ccf4a12000-11-21 22:02:22 +0000552 attributes = None
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000553 childNodeTypes = ()
554
Fred Drake1f549022000-09-24 05:21:58 +0000555 def __init__(self, data):
Fred Drakedaa823a2001-01-08 04:04:34 +0000556 if type(data) not in _StringTypes:
557 raise TypeError, "node contents must be a string"
Fred Drake1f549022000-09-24 05:21:58 +0000558 Node.__init__(self)
Fred Drake55c38192000-06-29 19:39:57 +0000559 self.data = self.nodeValue = data
Fred Drake55c38192000-06-29 19:39:57 +0000560
561 def __repr__(self):
Fred Drake1f549022000-09-24 05:21:58 +0000562 if len(self.data) > 10:
563 dotdotdot = "..."
Fred Drake55c38192000-06-29 19:39:57 +0000564 else:
Fred Drake1f549022000-09-24 05:21:58 +0000565 dotdotdot = ""
566 return "<DOM Text node \"%s%s\">" % (self.data[0:10], dotdotdot)
Fred Drake55c38192000-06-29 19:39:57 +0000567
Fred Drakef7cf40d2000-12-14 18:16:11 +0000568 def splitText(self, offset):
569 if offset < 0 or offset > len(self.data):
570 raise ValueError, "illegal offset value for splitText()"
571 newText = Text(self.data[offset:])
572 next = self.nextSibling
573 if self.parentNode and self in self.parentNode.childNodes:
574 if next is None:
575 self.parentNode.appendChild(newText)
576 else:
577 self.parentNode.insertBefore(newText, next)
578 self.data = self.data[:offset]
579 return newText
580
Fred Drake1f549022000-09-24 05:21:58 +0000581 def writexml(self, writer):
582 _write_data(writer, self.data)
Fred Drake55c38192000-06-29 19:39:57 +0000583
Fred Drake1f549022000-09-24 05:21:58 +0000584def _nssplit(qualifiedName):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000585 fields = _string.split(qualifiedName, ':', 1)
Paul Prescod73678da2000-07-01 04:58:47 +0000586 if len(fields) == 2:
587 return fields
588 elif len(fields) == 1:
Fred Drake1f549022000-09-24 05:21:58 +0000589 return ('', fields[0])
Paul Prescod73678da2000-07-01 04:58:47 +0000590
Fred Drakef7cf40d2000-12-14 18:16:11 +0000591
592class DocumentType(Node):
593 nodeType = Node.DOCUMENT_TYPE_NODE
594 nodeValue = None
595 attributes = None
596 name = None
597 publicId = None
598 systemId = None
599 internalSubset = ""
600 entities = None
601 notations = None
602
603 def __init__(self, qualifiedName):
604 Node.__init__(self)
605 if qualifiedName:
606 prefix, localname = _nssplit(qualifiedName)
607 self.name = localname
608
609
610class DOMImplementation:
611 def hasFeature(self, feature, version):
612 if version not in ("1.0", "2.0"):
613 return 0
614 feature = _string.lower(feature)
615 return feature == "core"
616
617 def createDocument(self, namespaceURI, qualifiedName, doctype):
618 if doctype and doctype.parentNode is not None:
619 raise ValueError, "doctype object owned by another DOM tree"
620 doc = Document()
621 if doctype is None:
622 doctype = self.createDocumentType(qualifiedName, None, None)
623 if qualifiedName:
624 prefix, localname = _nssplit(qualifiedName)
625 if prefix == "xml" \
626 and namespaceURI != "http://www.w3.org/XML/1998/namespace":
627 raise ValueError, "illegal use of 'xml' prefix"
628 if prefix and not namespaceURI:
629 raise ValueError, "illegal use of prefix without namespaces"
630 doctype.parentNode = doc
631 doc.doctype = doctype
632 doc.implementation = self
633 return doc
634
635 def createDocumentType(self, qualifiedName, publicId, systemId):
636 doctype = DocumentType(qualifiedName)
637 doctype.publicId = publicId
638 doctype.systemId = systemId
639 return doctype
640
641
Fred Drake1f549022000-09-24 05:21:58 +0000642class Document(Node):
643 nodeType = Node.DOCUMENT_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000644 nodeName = "#document"
645 nodeValue = None
646 attributes = None
Fred Drakef7cf40d2000-12-14 18:16:11 +0000647 doctype = None
648 parentNode = None
649
650 implementation = DOMImplementation()
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000651 childNodeTypes = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
652 Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
Fred Drake55c38192000-06-29 19:39:57 +0000653
Fred Drake1f549022000-09-24 05:21:58 +0000654 def appendChild(self, node):
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000655 if node.nodeType not in self.childNodeTypes:
656 raise HierarchyRequestErr, \
657 "%s cannot be child of %s" % (repr(node), repr(self) )
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000658 if node.parentNode is not None:
659 node.parentNode.removeChild(node)
660
Fred Drakef7cf40d2000-12-14 18:16:11 +0000661 if node.nodeType == Node.ELEMENT_NODE \
662 and self._get_documentElement():
663 raise TypeError, "two document elements disallowed"
Fred Drake4ccf4a12000-11-21 22:02:22 +0000664 return Node.appendChild(self, node)
Paul Prescod73678da2000-07-01 04:58:47 +0000665
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000666 def removeChild(self, oldChild):
667 self.childNodes.remove(oldChild)
668 oldChild.nextSibling = oldChild.previousSibling = None
669 oldChild.parentNode = None
670 if self.documentElement is oldChild:
671 self.documentElement = None
672
673 return oldChild
674
Fred Drakef7cf40d2000-12-14 18:16:11 +0000675 def _get_documentElement(self):
676 for node in self.childNodes:
677 if node.nodeType == Node.ELEMENT_NODE:
678 return node
679
680 def unlink(self):
681 if self.doctype is not None:
682 self.doctype.unlink()
683 self.doctype = None
684 Node.unlink(self)
685
Fred Drake1f549022000-09-24 05:21:58 +0000686 createElement = Element
Fred Drake55c38192000-06-29 19:39:57 +0000687
Fred Drake1f549022000-09-24 05:21:58 +0000688 createTextNode = Text
Fred Drake55c38192000-06-29 19:39:57 +0000689
Fred Drake1f549022000-09-24 05:21:58 +0000690 createComment = Comment
Fred Drake55c38192000-06-29 19:39:57 +0000691
Fred Drake1f549022000-09-24 05:21:58 +0000692 createProcessingInstruction = ProcessingInstruction
Fred Drake55c38192000-06-29 19:39:57 +0000693
Fred Drake1f549022000-09-24 05:21:58 +0000694 createAttribute = Attr
Fred Drake55c38192000-06-29 19:39:57 +0000695
696 def createElementNS(self, namespaceURI, qualifiedName):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000697 prefix, localName = _nssplit(qualifiedName)
698 return self.createElement(qualifiedName, namespaceURI,
699 prefix, localName)
Fred Drake55c38192000-06-29 19:39:57 +0000700
701 def createAttributeNS(self, namespaceURI, qualifiedName):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000702 prefix, localName = _nssplit(qualifiedName)
703 return self.createAttribute(qualifiedName, namespaceURI,
704 localName, prefix)
Fred Drake55c38192000-06-29 19:39:57 +0000705
Fred Drake1f549022000-09-24 05:21:58 +0000706 def getElementsByTagNameNS(self, namespaceURI, localName):
707 _getElementsByTagNameNSHelper(self, namespaceURI, localName)
Fred Drake55c38192000-06-29 19:39:57 +0000708
Fred Drake1f549022000-09-24 05:21:58 +0000709 def getElementsByTagName(self, name):
710 rc = []
711 _getElementsByTagNameHelper(self, name, rc)
Fred Drake55c38192000-06-29 19:39:57 +0000712 return rc
713
Fred Drake1f549022000-09-24 05:21:58 +0000714 def writexml(self, writer):
Fred Drake55c38192000-06-29 19:39:57 +0000715 for node in self.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +0000716 node.writexml(writer)
Fred Drake55c38192000-06-29 19:39:57 +0000717
Fred Drake4ccf4a12000-11-21 22:02:22 +0000718def _get_StringIO():
Fred Drakef7cf40d2000-12-14 18:16:11 +0000719 # we can't use cStringIO since it doesn't support Unicode strings
720 from StringIO import StringIO
Fred Drake4ccf4a12000-11-21 22:02:22 +0000721 return StringIO()
722
Fred Drake1f549022000-09-24 05:21:58 +0000723def _doparse(func, args, kwargs):
724 events = apply(func, args, kwargs)
725 toktype, rootNode = events.getEvent()
726 events.expandNode(rootNode)
Fred Drake55c38192000-06-29 19:39:57 +0000727 return rootNode
728
Fred Drake1f549022000-09-24 05:21:58 +0000729def parse(*args, **kwargs):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000730 """Parse a file into a DOM by filename or file object."""
Fred Drake4ccf4a12000-11-21 22:02:22 +0000731 from xml.dom import pulldom
Fred Drake1f549022000-09-24 05:21:58 +0000732 return _doparse(pulldom.parse, args, kwargs)
Fred Drake55c38192000-06-29 19:39:57 +0000733
Fred Drake1f549022000-09-24 05:21:58 +0000734def parseString(*args, **kwargs):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000735 """Parse a file into a DOM from a string."""
Fred Drake4ccf4a12000-11-21 22:02:22 +0000736 from xml.dom import pulldom
Fred Drake1f549022000-09-24 05:21:58 +0000737 return _doparse(pulldom.parseString, args, kwargs)