blob: 3d6dce4011e93eddb9dbc898515bc354aeba00d7 [file] [log] [blame]
Fred Drake1f549022000-09-24 05:21:58 +00001"""\
Fred Drakef7cf40d2000-12-14 18:16:11 +00002minidom.py -- a lightweight DOM implementation.
Fred Drake55c38192000-06-29 19:39:57 +00003
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +00004parse("foo.xml")
Paul Prescod623511b2000-07-21 22:05:49 +00005
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +00006parseString("<foo><bar/></foo>")
Paul Prescod623511b2000-07-21 22:05:49 +00007
Fred Drake55c38192000-06-29 19:39:57 +00008Todo:
9=====
10 * convenience methods for getting elements and text.
11 * more testing
12 * bring some of the writer and linearizer code into conformance with this
13 interface
14 * SAX 2 namespaces
15"""
16
Fred Drake1f549022000-09-24 05:21:58 +000017import string
Fred Drake4ccf4a12000-11-21 22:02:22 +000018_string = string
19del string
20
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +000021from xml.dom import HierarchyRequestErr
22
Fred Drake4ccf4a12000-11-21 22:02:22 +000023# localize the types, and allow support for Unicode values if available:
Fred Drake1f549022000-09-24 05:21:58 +000024import types
Fred Drake4ccf4a12000-11-21 22:02:22 +000025_TupleType = types.TupleType
26try:
27 _StringTypes = (types.StringType, types.UnicodeType)
28except AttributeError:
29 _StringTypes = (types.StringType,)
30del types
31
Fred Drakef7cf40d2000-12-14 18:16:11 +000032import xml.dom
33_Node = xml.dom.Node
Fred Drake55c38192000-06-29 19:39:57 +000034
Fred Drakef7cf40d2000-12-14 18:16:11 +000035class Node(_Node):
Fred Drake1f549022000-09-24 05:21:58 +000036 allnodes = {}
37 _debug = 0
38 _makeParentNodes = 1
39 debug = None
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +000040 childNodeTypes = ()
Martin v. Löwis52ce0d02001-01-27 08:47:37 +000041
Fred Drake1f549022000-09-24 05:21:58 +000042 def __init__(self):
43 self.childNodes = []
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +000044 self.parentNode = None
Fred Drake16f63292000-10-23 18:09:50 +000045 if Node._debug:
Fred Drake1f549022000-09-24 05:21:58 +000046 index = repr(id(self)) + repr(self.__class__)
47 Node.allnodes[index] = repr(self.__dict__)
48 if Node.debug is None:
Fred Drake4ccf4a12000-11-21 22:02:22 +000049 Node.debug = _get_StringIO()
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +000050 #open("debug4.out", "w")
Fred Drake1f549022000-09-24 05:21:58 +000051 Node.debug.write("create %s\n" % index)
Fred Drake55c38192000-06-29 19:39:57 +000052
Fred Drake1f549022000-09-24 05:21:58 +000053 def __getattr__(self, key):
54 if key[0:2] == "__":
Fred Drakef7cf40d2000-12-14 18:16:11 +000055 raise AttributeError, key
Fred Drake55c38192000-06-29 19:39:57 +000056 # getattr should never call getattr!
Fred Drake16f63292000-10-23 18:09:50 +000057 if self.__dict__.has_key("inGetAttr"):
Fred Drake55c38192000-06-29 19:39:57 +000058 del self.inGetAttr
59 raise AttributeError, key
60
Fred Drake1f549022000-09-24 05:21:58 +000061 prefix, attrname = key[:5], key[5:]
62 if prefix == "_get_":
63 self.inGetAttr = 1
Fred Drake16f63292000-10-23 18:09:50 +000064 if hasattr(self, attrname):
Fred Drake55c38192000-06-29 19:39:57 +000065 del self.inGetAttr
Fred Drake16f63292000-10-23 18:09:50 +000066 return (lambda self=self, attrname=attrname:
Fred Drake1f549022000-09-24 05:21:58 +000067 getattr(self, attrname))
Fred Drake55c38192000-06-29 19:39:57 +000068 else:
69 del self.inGetAttr
70 raise AttributeError, key
71 else:
Fred Drake1f549022000-09-24 05:21:58 +000072 self.inGetAttr = 1
Fred Drake55c38192000-06-29 19:39:57 +000073 try:
Fred Drake1f549022000-09-24 05:21:58 +000074 func = getattr(self, "_get_" + key)
Fred Drake55c38192000-06-29 19:39:57 +000075 except AttributeError:
76 raise AttributeError, key
77 del self.inGetAttr
78 return func()
79
Fred Drake1f549022000-09-24 05:21:58 +000080 def __nonzero__(self):
81 return 1
Fred Drake55c38192000-06-29 19:39:57 +000082
Fred Drake1f549022000-09-24 05:21:58 +000083 def toxml(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +000084 writer = _get_StringIO()
Fred Drake1f549022000-09-24 05:21:58 +000085 self.writexml(writer)
Fred Drake55c38192000-06-29 19:39:57 +000086 return writer.getvalue()
87
Fred Drake1f549022000-09-24 05:21:58 +000088 def hasChildNodes(self):
89 if self.childNodes:
90 return 1
91 else:
92 return 0
Fred Drake55c38192000-06-29 19:39:57 +000093
Fred Drake1f549022000-09-24 05:21:58 +000094 def _get_firstChild(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +000095 if self.childNodes:
96 return self.childNodes[0]
Paul Prescod73678da2000-07-01 04:58:47 +000097
Fred Drake1f549022000-09-24 05:21:58 +000098 def _get_lastChild(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +000099 if self.childNodes:
100 return self.childNodes[-1]
Paul Prescod73678da2000-07-01 04:58:47 +0000101
Fred Drake1f549022000-09-24 05:21:58 +0000102 def insertBefore(self, newChild, refChild):
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000103 if newChild.nodeType not in self.childNodeTypes:
104 raise HierarchyRequestErr, \
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000105 "%s cannot be child of %s" % (repr(newChild), repr(self))
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000106 if newChild.parentNode is not None:
107 newChild.parentNode.removeChild(newChild)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000108 if refChild is None:
109 self.appendChild(newChild)
110 else:
111 index = self.childNodes.index(refChild)
112 self.childNodes.insert(index, newChild)
113 newChild.nextSibling = refChild
114 refChild.previousSibling = newChild
115 if index:
116 node = self.childNodes[index-1]
117 node.nextSibling = newChild
118 newChild.previousSibling = node
119 else:
120 newChild.previousSibling = None
121 if self._makeParentNodes:
122 newChild.parentNode = self
123 return newChild
Fred Drake55c38192000-06-29 19:39:57 +0000124
Fred Drake1f549022000-09-24 05:21:58 +0000125 def appendChild(self, node):
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000126 if node.nodeType not in self.childNodeTypes:
127 raise HierarchyRequestErr, \
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000128 "%s cannot be child of %s" % (repr(node), repr(self))
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000129 if node.parentNode is not None:
130 node.parentNode.removeChild(node)
Fred Drake13a30692000-10-09 20:04:16 +0000131 if self.childNodes:
132 last = self.lastChild
133 node.previousSibling = last
134 last.nextSibling = node
135 else:
136 node.previousSibling = None
137 node.nextSibling = None
Fred Drake1f549022000-09-24 05:21:58 +0000138 self.childNodes.append(node)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000139 if self._makeParentNodes:
140 node.parentNode = self
Paul Prescod73678da2000-07-01 04:58:47 +0000141 return node
142
Fred Drake1f549022000-09-24 05:21:58 +0000143 def replaceChild(self, newChild, oldChild):
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000144 if newChild.nodeType not in self.childNodeTypes:
145 raise HierarchyRequestErr, \
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000146 "%s cannot be child of %s" % (repr(newChild), repr(self))
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000147 if newChild.parentNode is not None:
148 newChild.parentNode.removeChild(newChild)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000149 if newChild is oldChild:
150 return
Fred Drake1f549022000-09-24 05:21:58 +0000151 index = self.childNodes.index(oldChild)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000152 self.childNodes[index] = newChild
153 if self._makeParentNodes:
154 newChild.parentNode = self
155 oldChild.parentNode = None
156 newChild.nextSibling = oldChild.nextSibling
157 newChild.previousSibling = oldChild.previousSibling
Martin v. Löwis156c3372000-12-28 18:40:56 +0000158 oldChild.nextSibling = None
Fred Drake4ccf4a12000-11-21 22:02:22 +0000159 oldChild.previousSibling = None
Martin v. Löwis156c3372000-12-28 18:40:56 +0000160 if newChild.previousSibling:
161 newChild.previousSibling.nextSibling = newChild
162 if newChild.nextSibling:
163 newChild.nextSibling.previousSibling = newChild
Fred Drake4ccf4a12000-11-21 22:02:22 +0000164 return oldChild
Paul Prescod73678da2000-07-01 04:58:47 +0000165
Fred Drake1f549022000-09-24 05:21:58 +0000166 def removeChild(self, oldChild):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000167 self.childNodes.remove(oldChild)
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000168 if oldChild.nextSibling is not None:
169 oldChild.nextSibling.previousSibling = oldChild.previousSibling
170 if oldChild.previousSibling is not None:
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000171 oldChild.previousSibling.nextSibling = oldChild.nextSibling
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000172 oldChild.nextSibling = oldChild.previousSibling = None
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000173
Fred Drake4ccf4a12000-11-21 22:02:22 +0000174 if self._makeParentNodes:
175 oldChild.parentNode = None
176 return oldChild
177
178 def normalize(self):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000179 L = []
180 for child in self.childNodes:
181 if child.nodeType == Node.TEXT_NODE:
182 data = child.data
183 if data and L and L[-1].nodeType == child.nodeType:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000184 # collapse text node
185 node = L[-1]
186 node.data = node.nodeValue = node.data + child.data
187 node.nextSibling = child.nextSibling
188 child.unlink()
Fred Drakef7cf40d2000-12-14 18:16:11 +0000189 elif data:
190 if L:
191 L[-1].nextSibling = child
192 child.previousSibling = L[-1]
193 else:
194 child.previousSibling = None
195 L.append(child)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000196 else:
Fred Drakef7cf40d2000-12-14 18:16:11 +0000197 # empty text node; discard
198 child.unlink()
199 else:
200 if L:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000201 L[-1].nextSibling = child
202 child.previousSibling = L[-1]
Fred Drakef7cf40d2000-12-14 18:16:11 +0000203 else:
204 child.previousSibling = None
205 L.append(child)
206 if child.nodeType == Node.ELEMENT_NODE:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000207 child.normalize()
Fred Drakef7cf40d2000-12-14 18:16:11 +0000208 self.childNodes[:] = L
Paul Prescod73678da2000-07-01 04:58:47 +0000209
Fred Drake1f549022000-09-24 05:21:58 +0000210 def cloneNode(self, deep):
Paul Prescod73678da2000-07-01 04:58:47 +0000211 import new
Fred Drake4ccf4a12000-11-21 22:02:22 +0000212 clone = new.instance(self.__class__, self.__dict__.copy())
213 if self._makeParentNodes:
214 clone.parentNode = None
215 clone.childNodes = []
216 if deep:
217 for child in self.childNodes:
218 clone.appendChild(child.cloneNode(1))
Paul Prescod73678da2000-07-01 04:58:47 +0000219 return clone
Fred Drake55c38192000-06-29 19:39:57 +0000220
Fred Drake25239772001-02-02 19:40:19 +0000221 # DOM Level 3 (Working Draft 2001-Jan-26)
222
223 def isSameNode(self, other):
224 return self is other
225
226 # minidom-specific API:
227
Fred Drake1f549022000-09-24 05:21:58 +0000228 def unlink(self):
229 self.parentNode = None
Fred Drake4ccf4a12000-11-21 22:02:22 +0000230 for child in self.childNodes:
231 child.unlink()
Fred Drake1f549022000-09-24 05:21:58 +0000232 self.childNodes = None
Paul Prescod4221ff02000-10-13 20:11:42 +0000233 self.previousSibling = None
234 self.nextSibling = None
Paul Prescod73678da2000-07-01 04:58:47 +0000235 if Node._debug:
Fred Drake1f549022000-09-24 05:21:58 +0000236 index = repr(id(self)) + repr(self.__class__)
237 self.debug.write("Deleting: %s\n" % index)
Paul Prescod73678da2000-07-01 04:58:47 +0000238 del Node.allnodes[index]
Fred Drake55c38192000-06-29 19:39:57 +0000239
Fred Drake1f549022000-09-24 05:21:58 +0000240def _write_data(writer, data):
Fred Drake55c38192000-06-29 19:39:57 +0000241 "Writes datachars to writer."
Fred Drake4ccf4a12000-11-21 22:02:22 +0000242 replace = _string.replace
243 data = replace(data, "&", "&amp;")
244 data = replace(data, "<", "&lt;")
245 data = replace(data, "\"", "&quot;")
246 data = replace(data, ">", "&gt;")
Fred Drake55c38192000-06-29 19:39:57 +0000247 writer.write(data)
248
Fred Drake1f549022000-09-24 05:21:58 +0000249def _getElementsByTagNameHelper(parent, name, rc):
Fred Drake55c38192000-06-29 19:39:57 +0000250 for node in parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +0000251 if node.nodeType == Node.ELEMENT_NODE and \
252 (name == "*" or node.tagName == name):
253 rc.append(node)
254 _getElementsByTagNameHelper(node, name, rc)
Fred Drake55c38192000-06-29 19:39:57 +0000255 return rc
256
Fred Drake1f549022000-09-24 05:21:58 +0000257def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc):
Fred Drake55c38192000-06-29 19:39:57 +0000258 for node in parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +0000259 if node.nodeType == Node.ELEMENT_NODE:
260 if ((localName == "*" or node.tagName == localName) and
261 (nsURI == "*" or node.namespaceURI == nsURI)):
262 rc.append(node)
Fred Drakef7cf40d2000-12-14 18:16:11 +0000263 _getElementsByTagNameNSHelper(node, nsURI, localName, rc)
264 return rc
Fred Drake55c38192000-06-29 19:39:57 +0000265
266class Attr(Node):
Fred Drake1f549022000-09-24 05:21:58 +0000267 nodeType = Node.ATTRIBUTE_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000268 attributes = None
269 ownerElement = None
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000270 childNodeTypes = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000271
Fred Drake1f549022000-09-24 05:21:58 +0000272 def __init__(self, qName, namespaceURI="", localName=None, prefix=None):
Fred Drake55c38192000-06-29 19:39:57 +0000273 # skip setattr for performance
Fred Drake4ccf4a12000-11-21 22:02:22 +0000274 d = self.__dict__
275 d["localName"] = localName or qName
276 d["nodeName"] = d["name"] = qName
277 d["namespaceURI"] = namespaceURI
278 d["prefix"] = prefix
Fred Drake1f549022000-09-24 05:21:58 +0000279 Node.__init__(self)
Paul Prescod73678da2000-07-01 04:58:47 +0000280 # nodeValue and value are set elsewhere
Fred Drake55c38192000-06-29 19:39:57 +0000281
Fred Drake1f549022000-09-24 05:21:58 +0000282 def __setattr__(self, name, value):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000283 d = self.__dict__
Fred Drake1f549022000-09-24 05:21:58 +0000284 if name in ("value", "nodeValue"):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000285 d["value"] = d["nodeValue"] = value
286 elif name in ("name", "nodeName"):
287 d["name"] = d["nodeName"] = value
Fred Drake55c38192000-06-29 19:39:57 +0000288 else:
Fred Drakef7cf40d2000-12-14 18:16:11 +0000289 d[name] = value
Fred Drake55c38192000-06-29 19:39:57 +0000290
Fred Drake4ccf4a12000-11-21 22:02:22 +0000291 def cloneNode(self, deep):
292 clone = Node.cloneNode(self, deep)
293 if clone.__dict__.has_key("ownerElement"):
294 del clone.ownerElement
295 return clone
296
Fred Drakef7cf40d2000-12-14 18:16:11 +0000297
298class NamedNodeMap:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000299 """The attribute list is a transient interface to the underlying
300 dictionaries. Mutations here will change the underlying element's
Fred Drakef7cf40d2000-12-14 18:16:11 +0000301 dictionary.
302
303 Ordering is imposed artificially and does not reflect the order of
304 attributes as found in an input document.
305 """
Fred Drake4ccf4a12000-11-21 22:02:22 +0000306
Fred Drake1f549022000-09-24 05:21:58 +0000307 def __init__(self, attrs, attrsNS):
308 self._attrs = attrs
309 self._attrsNS = attrsNS
Fred Drakef7cf40d2000-12-14 18:16:11 +0000310
311 def __getattr__(self, name):
312 if name == "length":
313 return len(self._attrs)
314 raise AttributeError, name
Fred Drake55c38192000-06-29 19:39:57 +0000315
Fred Drake1f549022000-09-24 05:21:58 +0000316 def item(self, index):
Fred Drake55c38192000-06-29 19:39:57 +0000317 try:
Fred Drakef7cf40d2000-12-14 18:16:11 +0000318 return self[self._attrs.keys()[index]]
Fred Drake55c38192000-06-29 19:39:57 +0000319 except IndexError:
320 return None
Fred Drake55c38192000-06-29 19:39:57 +0000321
Fred Drake1f549022000-09-24 05:21:58 +0000322 def items(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000323 L = []
324 for node in self._attrs.values():
Martin v. Löwisd5fb58f2001-01-27 08:38:34 +0000325 L.append((node.nodeName, node.value))
Fred Drake4ccf4a12000-11-21 22:02:22 +0000326 return L
Fred Drake1f549022000-09-24 05:21:58 +0000327
328 def itemsNS(self):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000329 L = []
330 for node in self._attrs.values():
331 L.append(((node.URI, node.localName), node.value))
332 return L
Fred Drake16f63292000-10-23 18:09:50 +0000333
Fred Drake1f549022000-09-24 05:21:58 +0000334 def keys(self):
Paul Prescod73678da2000-07-01 04:58:47 +0000335 return self._attrs.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000336
Fred Drake1f549022000-09-24 05:21:58 +0000337 def keysNS(self):
Paul Prescod73678da2000-07-01 04:58:47 +0000338 return self._attrsNS.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000339
Fred Drake1f549022000-09-24 05:21:58 +0000340 def values(self):
Paul Prescod73678da2000-07-01 04:58:47 +0000341 return self._attrs.values()
Fred Drake55c38192000-06-29 19:39:57 +0000342
Martin v. Löwisd5fb58f2001-01-27 08:38:34 +0000343 def get(self, name, value = None):
344 return self._attrs.get(name, value)
345
Fred Drake1f549022000-09-24 05:21:58 +0000346 def __len__(self):
Fred Drake55c38192000-06-29 19:39:57 +0000347 return self.length
348
Fred Drake1f549022000-09-24 05:21:58 +0000349 def __cmp__(self, other):
350 if self._attrs is getattr(other, "_attrs", None):
Fred Drake55c38192000-06-29 19:39:57 +0000351 return 0
Fred Drake16f63292000-10-23 18:09:50 +0000352 else:
Fred Drake1f549022000-09-24 05:21:58 +0000353 return cmp(id(self), id(other))
Fred Drake55c38192000-06-29 19:39:57 +0000354
355 #FIXME: is it appropriate to return .value?
Fred Drake1f549022000-09-24 05:21:58 +0000356 def __getitem__(self, attname_or_tuple):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000357 if type(attname_or_tuple) is _TupleType:
Paul Prescod73678da2000-07-01 04:58:47 +0000358 return self._attrsNS[attname_or_tuple]
Fred Drake55c38192000-06-29 19:39:57 +0000359 else:
Paul Prescod73678da2000-07-01 04:58:47 +0000360 return self._attrs[attname_or_tuple]
Fred Drake55c38192000-06-29 19:39:57 +0000361
Paul Prescod1e688272000-07-01 19:21:47 +0000362 # same as set
Fred Drake1f549022000-09-24 05:21:58 +0000363 def __setitem__(self, attname, value):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000364 if type(value) in _StringTypes:
Fred Drake1f549022000-09-24 05:21:58 +0000365 node = Attr(attname)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000366 node.value = value
Paul Prescod1e688272000-07-01 19:21:47 +0000367 else:
Fred Drake4ccf4a12000-11-21 22:02:22 +0000368 if not isinstance(value, Attr):
369 raise TypeError, "value must be a string or Attr object"
Fred Drake1f549022000-09-24 05:21:58 +0000370 node = value
Fred Drakef7cf40d2000-12-14 18:16:11 +0000371 self.setNamedItem(node)
372
373 def setNamedItem(self, node):
374 old = self._attrs.get(node.name)
Paul Prescod1e688272000-07-01 19:21:47 +0000375 if old:
376 old.unlink()
Fred Drake1f549022000-09-24 05:21:58 +0000377 self._attrs[node.name] = node
378 self._attrsNS[(node.namespaceURI, node.localName)] = node
Fred Drakef7cf40d2000-12-14 18:16:11 +0000379 return old
380
381 def setNamedItemNS(self, node):
382 return self.setNamedItem(node)
Paul Prescod73678da2000-07-01 04:58:47 +0000383
Fred Drake1f549022000-09-24 05:21:58 +0000384 def __delitem__(self, attname_or_tuple):
385 node = self[attname_or_tuple]
Paul Prescod73678da2000-07-01 04:58:47 +0000386 node.unlink()
387 del self._attrs[node.name]
388 del self._attrsNS[(node.namespaceURI, node.localName)]
Fred Drakef7cf40d2000-12-14 18:16:11 +0000389 self.length = len(self._attrs)
390
391AttributeList = NamedNodeMap
392
Fred Drake1f549022000-09-24 05:21:58 +0000393
Martin v. Löwisa2fda0d2000-10-07 12:10:28 +0000394class Element(Node):
Fred Drake1f549022000-09-24 05:21:58 +0000395 nodeType = Node.ELEMENT_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000396 nextSibling = None
397 previousSibling = None
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000398 childNodeTypes = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
399 Node.COMMENT_NODE, Node.TEXT_NODE,
400 Node.CDATA_SECTION_NODE, Node.ENTITY_REFERENCE_NODE)
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000401
Fred Drake1f549022000-09-24 05:21:58 +0000402 def __init__(self, tagName, namespaceURI="", prefix="",
403 localName=None):
404 Node.__init__(self)
Fred Drake55c38192000-06-29 19:39:57 +0000405 self.tagName = self.nodeName = tagName
Fred Drake1f549022000-09-24 05:21:58 +0000406 self.localName = localName or tagName
407 self.prefix = prefix
408 self.namespaceURI = namespaceURI
409 self.nodeValue = None
Fred Drake55c38192000-06-29 19:39:57 +0000410
Fred Drake4ccf4a12000-11-21 22:02:22 +0000411 self._attrs = {} # attributes are double-indexed:
412 self._attrsNS = {} # tagName -> Attribute
413 # URI,localName -> Attribute
414 # in the future: consider lazy generation
415 # of attribute objects this is too tricky
416 # for now because of headaches with
417 # namespaces.
418
419 def cloneNode(self, deep):
420 clone = Node.cloneNode(self, deep)
421 clone._attrs = {}
422 clone._attrsNS = {}
423 for attr in self._attrs.values():
424 node = attr.cloneNode(1)
425 clone._attrs[node.name] = node
426 clone._attrsNS[(node.namespaceURI, node.localName)] = node
427 node.ownerElement = clone
428 return clone
429
430 def unlink(self):
431 for attr in self._attrs.values():
432 attr.unlink()
433 self._attrs = None
434 self._attrsNS = None
435 Node.unlink(self)
Fred Drake55c38192000-06-29 19:39:57 +0000436
Fred Drake1f549022000-09-24 05:21:58 +0000437 def getAttribute(self, attname):
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000438 try:
439 return self._attrs[attname].value
440 except KeyError:
441 return ""
Fred Drake55c38192000-06-29 19:39:57 +0000442
Fred Drake1f549022000-09-24 05:21:58 +0000443 def getAttributeNS(self, namespaceURI, localName):
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000444 try:
445 return self._attrsNS[(namespaceURI, localName)].value
446 except KeyError:
447 return ""
Fred Drake1f549022000-09-24 05:21:58 +0000448
449 def setAttribute(self, attname, value):
450 attr = Attr(attname)
Fred Drake55c38192000-06-29 19:39:57 +0000451 # for performance
Fred Drake1f549022000-09-24 05:21:58 +0000452 attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
453 self.setAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000454
Fred Drake1f549022000-09-24 05:21:58 +0000455 def setAttributeNS(self, namespaceURI, qualifiedName, value):
456 prefix, localname = _nssplit(qualifiedName)
Fred Drake55c38192000-06-29 19:39:57 +0000457 # for performance
Fred Drake1f549022000-09-24 05:21:58 +0000458 attr = Attr(qualifiedName, namespaceURI, localname, prefix)
459 attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
460 self.setAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000461
Fred Drake1f549022000-09-24 05:21:58 +0000462 def getAttributeNode(self, attrname):
463 return self._attrs.get(attrname)
Paul Prescod73678da2000-07-01 04:58:47 +0000464
Fred Drake1f549022000-09-24 05:21:58 +0000465 def getAttributeNodeNS(self, namespaceURI, localName):
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000466 return self._attrsNS.get((namespaceURI, localName))
Paul Prescod73678da2000-07-01 04:58:47 +0000467
Fred Drake1f549022000-09-24 05:21:58 +0000468 def setAttributeNode(self, attr):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000469 if attr.ownerElement not in (None, self):
Martin v. Löwisd5fb58f2001-01-27 08:38:34 +0000470 raise xml.dom.InuseAttributeErr("attribute node already owned")
Fred Drake1f549022000-09-24 05:21:58 +0000471 old = self._attrs.get(attr.name, None)
Paul Prescod73678da2000-07-01 04:58:47 +0000472 if old:
473 old.unlink()
Fred Drake1f549022000-09-24 05:21:58 +0000474 self._attrs[attr.name] = attr
475 self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
Fred Drake4ccf4a12000-11-21 22:02:22 +0000476
477 # This creates a circular reference, but Element.unlink()
478 # breaks the cycle since the references to the attribute
479 # dictionaries are tossed.
480 attr.ownerElement = self
481
482 if old is not attr:
483 # It might have already been part of this node, in which case
484 # it doesn't represent a change, and should not be returned.
485 return old
Fred Drake55c38192000-06-29 19:39:57 +0000486
Fred Drake1f549022000-09-24 05:21:58 +0000487 def removeAttribute(self, name):
Paul Prescod73678da2000-07-01 04:58:47 +0000488 attr = self._attrs[name]
Fred Drake1f549022000-09-24 05:21:58 +0000489 self.removeAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000490
Fred Drake1f549022000-09-24 05:21:58 +0000491 def removeAttributeNS(self, namespaceURI, localName):
Paul Prescod73678da2000-07-01 04:58:47 +0000492 attr = self._attrsNS[(namespaceURI, localName)]
Fred Drake1f549022000-09-24 05:21:58 +0000493 self.removeAttributeNode(attr)
Fred Drake55c38192000-06-29 19:39:57 +0000494
Fred Drake1f549022000-09-24 05:21:58 +0000495 def removeAttributeNode(self, node):
Paul Prescod73678da2000-07-01 04:58:47 +0000496 node.unlink()
497 del self._attrs[node.name]
498 del self._attrsNS[(node.namespaceURI, node.localName)]
Fred Drake16f63292000-10-23 18:09:50 +0000499
Martin v. Löwis156c3372000-12-28 18:40:56 +0000500 def hasAttribute(self, name):
501 return self._attrs.has_key(name)
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000502
Martin v. Löwis156c3372000-12-28 18:40:56 +0000503 def hasAttributeNS(self, namespaceURI, localName):
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000504 return self._attrsNS.has_key((namespaceURI, localName))
505
Fred Drake1f549022000-09-24 05:21:58 +0000506 def getElementsByTagName(self, name):
507 return _getElementsByTagNameHelper(self, name, [])
Fred Drake55c38192000-06-29 19:39:57 +0000508
Fred Drake1f549022000-09-24 05:21:58 +0000509 def getElementsByTagNameNS(self, namespaceURI, localName):
510 _getElementsByTagNameNSHelper(self, namespaceURI, localName, [])
Fred Drake55c38192000-06-29 19:39:57 +0000511
Fred Drake1f549022000-09-24 05:21:58 +0000512 def __repr__(self):
513 return "<DOM Element: %s at %s>" % (self.tagName, id(self))
Fred Drake55c38192000-06-29 19:39:57 +0000514
515 def writexml(self, writer):
Fred Drake1f549022000-09-24 05:21:58 +0000516 writer.write("<" + self.tagName)
Fred Drake16f63292000-10-23 18:09:50 +0000517
Fred Drake4ccf4a12000-11-21 22:02:22 +0000518 attrs = self._get_attributes()
519 a_names = attrs.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000520 a_names.sort()
521
522 for a_name in a_names:
Fred Drake1f549022000-09-24 05:21:58 +0000523 writer.write(" %s=\"" % a_name)
Fred Drake4ccf4a12000-11-21 22:02:22 +0000524 _write_data(writer, attrs[a_name].value)
Fred Drake55c38192000-06-29 19:39:57 +0000525 writer.write("\"")
526 if self.childNodes:
527 writer.write(">")
528 for node in self.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +0000529 node.writexml(writer)
530 writer.write("</%s>" % self.tagName)
Fred Drake55c38192000-06-29 19:39:57 +0000531 else:
532 writer.write("/>")
533
Fred Drake1f549022000-09-24 05:21:58 +0000534 def _get_attributes(self):
535 return AttributeList(self._attrs, self._attrsNS)
Fred Drake55c38192000-06-29 19:39:57 +0000536
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000537 def hasAttributes(self):
538 if self._attrs or self._attrsNS:
539 return 1
540 else:
541 return 0
542
Fred Drake1f549022000-09-24 05:21:58 +0000543class Comment(Node):
544 nodeType = Node.COMMENT_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000545 nodeName = "#comment"
546 attributes = None
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000547 childNodeTypes = ()
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000548
Fred Drake1f549022000-09-24 05:21:58 +0000549 def __init__(self, data):
550 Node.__init__(self)
551 self.data = self.nodeValue = data
Fred Drake55c38192000-06-29 19:39:57 +0000552
Fred Drake1f549022000-09-24 05:21:58 +0000553 def writexml(self, writer):
554 writer.write("<!--%s-->" % self.data)
555
556class ProcessingInstruction(Node):
557 nodeType = Node.PROCESSING_INSTRUCTION_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000558 attributes = None
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000559 childNodeTypes = ()
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000560
Fred Drake1f549022000-09-24 05:21:58 +0000561 def __init__(self, target, data):
562 Node.__init__(self)
Fred Drake55c38192000-06-29 19:39:57 +0000563 self.target = self.nodeName = target
564 self.data = self.nodeValue = data
Fred Drake55c38192000-06-29 19:39:57 +0000565
Fred Drake1f549022000-09-24 05:21:58 +0000566 def writexml(self, writer):
567 writer.write("<?%s %s?>" % (self.target, self.data))
Fred Drake55c38192000-06-29 19:39:57 +0000568
Fred Drake1f549022000-09-24 05:21:58 +0000569class Text(Node):
570 nodeType = Node.TEXT_NODE
571 nodeName = "#text"
Fred Drake4ccf4a12000-11-21 22:02:22 +0000572 attributes = None
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000573 childNodeTypes = ()
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000574
Fred Drake1f549022000-09-24 05:21:58 +0000575 def __init__(self, data):
Fred Drakedaa823a2001-01-08 04:04:34 +0000576 if type(data) not in _StringTypes:
577 raise TypeError, "node contents must be a string"
Fred Drake1f549022000-09-24 05:21:58 +0000578 Node.__init__(self)
Fred Drake55c38192000-06-29 19:39:57 +0000579 self.data = self.nodeValue = data
Fred Drake55c38192000-06-29 19:39:57 +0000580
581 def __repr__(self):
Fred Drake1f549022000-09-24 05:21:58 +0000582 if len(self.data) > 10:
583 dotdotdot = "..."
Fred Drake55c38192000-06-29 19:39:57 +0000584 else:
Fred Drake1f549022000-09-24 05:21:58 +0000585 dotdotdot = ""
586 return "<DOM Text node \"%s%s\">" % (self.data[0:10], dotdotdot)
Fred Drake55c38192000-06-29 19:39:57 +0000587
Fred Drakef7cf40d2000-12-14 18:16:11 +0000588 def splitText(self, offset):
589 if offset < 0 or offset > len(self.data):
Martin v. Löwisd5fb58f2001-01-27 08:38:34 +0000590 raise xml.dom.IndexSizeErr("illegal offset value")
Fred Drakef7cf40d2000-12-14 18:16:11 +0000591 newText = Text(self.data[offset:])
592 next = self.nextSibling
593 if self.parentNode and self in self.parentNode.childNodes:
594 if next is None:
595 self.parentNode.appendChild(newText)
596 else:
597 self.parentNode.insertBefore(newText, next)
598 self.data = self.data[:offset]
599 return newText
600
Fred Drake1f549022000-09-24 05:21:58 +0000601 def writexml(self, writer):
602 _write_data(writer, self.data)
Fred Drake55c38192000-06-29 19:39:57 +0000603
Fred Drake1f549022000-09-24 05:21:58 +0000604def _nssplit(qualifiedName):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000605 fields = _string.split(qualifiedName, ':', 1)
Paul Prescod73678da2000-07-01 04:58:47 +0000606 if len(fields) == 2:
607 return fields
608 elif len(fields) == 1:
Fred Drake1f549022000-09-24 05:21:58 +0000609 return ('', fields[0])
Paul Prescod73678da2000-07-01 04:58:47 +0000610
Fred Drakef7cf40d2000-12-14 18:16:11 +0000611
612class DocumentType(Node):
613 nodeType = Node.DOCUMENT_TYPE_NODE
614 nodeValue = None
615 attributes = None
616 name = None
617 publicId = None
618 systemId = None
619 internalSubset = ""
620 entities = None
621 notations = None
622
623 def __init__(self, qualifiedName):
624 Node.__init__(self)
625 if qualifiedName:
626 prefix, localname = _nssplit(qualifiedName)
627 self.name = localname
628
629
630class DOMImplementation:
631 def hasFeature(self, feature, version):
632 if version not in ("1.0", "2.0"):
633 return 0
634 feature = _string.lower(feature)
635 return feature == "core"
636
637 def createDocument(self, namespaceURI, qualifiedName, doctype):
638 if doctype and doctype.parentNode is not None:
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000639 raise xml.dom.WrongDocumentErr(
640 "doctype object owned by another DOM tree")
Fred Drakef7cf40d2000-12-14 18:16:11 +0000641 doc = Document()
642 if doctype is None:
643 doctype = self.createDocumentType(qualifiedName, None, None)
644 if qualifiedName:
645 prefix, localname = _nssplit(qualifiedName)
646 if prefix == "xml" \
647 and namespaceURI != "http://www.w3.org/XML/1998/namespace":
Martin v. Löwisd5fb58f2001-01-27 08:38:34 +0000648 raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
Fred Drakef7cf40d2000-12-14 18:16:11 +0000649 if prefix and not namespaceURI:
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000650 raise xml.dom.NamespaceErr(
651 "illegal use of prefix without namespaces")
652 element = doc.createElementNS(namespaceURI, qualifiedName)
653 doc.appendChild(element)
654 # XXX else, raise an error? Empty qname is illegal in the DOM spec!
Fred Drakef7cf40d2000-12-14 18:16:11 +0000655 doctype.parentNode = doc
656 doc.doctype = doctype
657 doc.implementation = self
658 return doc
659
660 def createDocumentType(self, qualifiedName, publicId, systemId):
661 doctype = DocumentType(qualifiedName)
662 doctype.publicId = publicId
663 doctype.systemId = systemId
664 return doctype
665
666
Fred Drake1f549022000-09-24 05:21:58 +0000667class Document(Node):
668 nodeType = Node.DOCUMENT_NODE
Fred Drake4ccf4a12000-11-21 22:02:22 +0000669 nodeName = "#document"
670 nodeValue = None
671 attributes = None
Fred Drakef7cf40d2000-12-14 18:16:11 +0000672 doctype = None
673 parentNode = None
674
675 implementation = DOMImplementation()
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000676 childNodeTypes = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
677 Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
Fred Drake55c38192000-06-29 19:39:57 +0000678
Fred Drake1f549022000-09-24 05:21:58 +0000679 def appendChild(self, node):
Andrew M. Kuchling291ed4f2000-12-31 03:50:23 +0000680 if node.nodeType not in self.childNodeTypes:
681 raise HierarchyRequestErr, \
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000682 "%s cannot be child of %s" % (repr(node), repr(self))
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000683 if node.parentNode is not None:
684 node.parentNode.removeChild(node)
685
Fred Drakef7cf40d2000-12-14 18:16:11 +0000686 if node.nodeType == Node.ELEMENT_NODE \
687 and self._get_documentElement():
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000688 raise xml.dom.HierarchyRequestErr(
689 "two document elements disallowed")
Fred Drake4ccf4a12000-11-21 22:02:22 +0000690 return Node.appendChild(self, node)
Paul Prescod73678da2000-07-01 04:58:47 +0000691
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000692 def removeChild(self, oldChild):
693 self.childNodes.remove(oldChild)
694 oldChild.nextSibling = oldChild.previousSibling = None
695 oldChild.parentNode = None
696 if self.documentElement is oldChild:
697 self.documentElement = None
Martin v. Löwis52ce0d02001-01-27 08:47:37 +0000698
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000699 return oldChild
700
Fred Drakef7cf40d2000-12-14 18:16:11 +0000701 def _get_documentElement(self):
702 for node in self.childNodes:
703 if node.nodeType == Node.ELEMENT_NODE:
704 return node
705
706 def unlink(self):
707 if self.doctype is not None:
708 self.doctype.unlink()
709 self.doctype = None
710 Node.unlink(self)
711
Fred Drake1f549022000-09-24 05:21:58 +0000712 createElement = Element
Fred Drake55c38192000-06-29 19:39:57 +0000713
Fred Drake1f549022000-09-24 05:21:58 +0000714 createTextNode = Text
Fred Drake55c38192000-06-29 19:39:57 +0000715
Fred Drake1f549022000-09-24 05:21:58 +0000716 createComment = Comment
Fred Drake55c38192000-06-29 19:39:57 +0000717
Fred Drake1f549022000-09-24 05:21:58 +0000718 createProcessingInstruction = ProcessingInstruction
Fred Drake55c38192000-06-29 19:39:57 +0000719
Fred Drake1f549022000-09-24 05:21:58 +0000720 createAttribute = Attr
Fred Drake55c38192000-06-29 19:39:57 +0000721
722 def createElementNS(self, namespaceURI, qualifiedName):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000723 prefix, localName = _nssplit(qualifiedName)
724 return self.createElement(qualifiedName, namespaceURI,
725 prefix, localName)
Fred Drake55c38192000-06-29 19:39:57 +0000726
727 def createAttributeNS(self, namespaceURI, qualifiedName):
Fred Drake4ccf4a12000-11-21 22:02:22 +0000728 prefix, localName = _nssplit(qualifiedName)
729 return self.createAttribute(qualifiedName, namespaceURI,
730 localName, prefix)
Fred Drake55c38192000-06-29 19:39:57 +0000731
Fred Drake1f549022000-09-24 05:21:58 +0000732 def getElementsByTagNameNS(self, namespaceURI, localName):
733 _getElementsByTagNameNSHelper(self, namespaceURI, localName)
Fred Drake55c38192000-06-29 19:39:57 +0000734
Fred Drake1f549022000-09-24 05:21:58 +0000735 def getElementsByTagName(self, name):
736 rc = []
737 _getElementsByTagNameHelper(self, name, rc)
Fred Drake55c38192000-06-29 19:39:57 +0000738 return rc
739
Fred Drake1f549022000-09-24 05:21:58 +0000740 def writexml(self, writer):
Guido van Rossum9e1fe1e2001-02-05 19:17:50 +0000741 writer.write('<?xml version="1.0" ?>\n')
Fred Drake55c38192000-06-29 19:39:57 +0000742 for node in self.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +0000743 node.writexml(writer)
Fred Drake55c38192000-06-29 19:39:57 +0000744
Fred Drake4ccf4a12000-11-21 22:02:22 +0000745def _get_StringIO():
Fred Drakef7cf40d2000-12-14 18:16:11 +0000746 # we can't use cStringIO since it doesn't support Unicode strings
747 from StringIO import StringIO
Fred Drake4ccf4a12000-11-21 22:02:22 +0000748 return StringIO()
749
Fred Drake1f549022000-09-24 05:21:58 +0000750def _doparse(func, args, kwargs):
751 events = apply(func, args, kwargs)
752 toktype, rootNode = events.getEvent()
753 events.expandNode(rootNode)
Fred Drake55c38192000-06-29 19:39:57 +0000754 return rootNode
755
Fred Drake1f549022000-09-24 05:21:58 +0000756def parse(*args, **kwargs):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000757 """Parse a file into a DOM by filename or file object."""
Fred Drake4ccf4a12000-11-21 22:02:22 +0000758 from xml.dom import pulldom
Fred Drake1f549022000-09-24 05:21:58 +0000759 return _doparse(pulldom.parse, args, kwargs)
Fred Drake55c38192000-06-29 19:39:57 +0000760
Fred Drake1f549022000-09-24 05:21:58 +0000761def parseString(*args, **kwargs):
Fred Drakef7cf40d2000-12-14 18:16:11 +0000762 """Parse a file into a DOM from a string."""
Fred Drake4ccf4a12000-11-21 22:02:22 +0000763 from xml.dom import pulldom
Fred Drake1f549022000-09-24 05:21:58 +0000764 return _doparse(pulldom.parseString, args, kwargs)