blob: 981e5116f3b2764867bf6d7a3275808f85df8788 [file] [log] [blame]
Fred Drake55c38192000-06-29 19:39:57 +00001import pulldom
2import string
3from StringIO import StringIO
4import types
5
6"""
7minidom.py -- a lightweight DOM implementation based on SAX.
8
9Todo:
10=====
11 * convenience methods for getting elements and text.
12 * more testing
13 * bring some of the writer and linearizer code into conformance with this
14 interface
15 * SAX 2 namespaces
16"""
17
18class Node:
19 ELEMENT_NODE = 1
20 ATTRIBUTE_NODE = 2
21 TEXT_NODE = 3
22 CDATA_SECTION_NODE = 4
23 ENTITY_REFERENCE_NODE = 5
24 ENTITY_NODE = 6
25 PROCESSING_INSTRUCTION_NODE = 7
26 COMMENT_NODE = 8
27 DOCUMENT_NODE = 9
28 DOCUMENT_TYPE_NODE = 10
29 DOCUMENT_FRAGMENT_NODE = 11
30 NOTATION_NODE = 12
31
Paul Prescod73678da2000-07-01 04:58:47 +000032 allnodes={}
33 _debug=0
34 _makeParentNodes=1
35 debug=None
Fred Drake55c38192000-06-29 19:39:57 +000036
37 def __init__( self ):
38 self.childNodes=[]
Paul Prescod73678da2000-07-01 04:58:47 +000039 if Node._debug:
40 index=repr( id( self ))+repr( self.__class__ )
41 Node.allnodes[index]=repr( self.__dict__ )
42 if Node.debug==None:
Paul Prescod1e688272000-07-01 19:21:47 +000043 Node.debug=StringIO()
Paul Prescod6c4753f2000-07-04 03:39:33 +000044 #open( "debug4.out", "w" )
Paul Prescod73678da2000-07-01 04:58:47 +000045 Node.debug.write( "create %s\n"%index )
Fred Drake55c38192000-06-29 19:39:57 +000046
47 def __getattr__( self, key ):
48 if key[0:2]=="__": raise AttributeError
49 # getattr should never call getattr!
50 if self.__dict__.has_key("inGetAttr"):
51 del self.inGetAttr
52 raise AttributeError, key
53
54 prefix,attrname=key[:5],key[5:]
55 if prefix=="_get_":
56 self.inGetAttr=1
57 if hasattr( self, attrname ):
58 del self.inGetAttr
59 return (lambda self=self, attrname=attrname:
60 getattr( self, attrname ))
61 else:
62 del self.inGetAttr
63 raise AttributeError, key
64 else:
65 self.inGetAttr=1
66 try:
67 func = getattr( self, "_get_"+key )
68 except AttributeError:
69 raise AttributeError, key
70 del self.inGetAttr
71 return func()
72
73 def __nonzero__(self): return 1
74
75 def toxml( self ):
76 writer=StringIO()
77 self.writexml( writer )
78 return writer.getvalue()
79
80 def hasChildNodes( self ):
81 if self.childNodes: return 1
82 else: return 0
83
Paul Prescod73678da2000-07-01 04:58:47 +000084 def _get_firstChild( self ):
85 return self.childNodes[0]
86
87 def _get_lastChild( self ):
88 return self.childNodes[-1]
89
Fred Drake55c38192000-06-29 19:39:57 +000090 def insertBefore( self, newChild, refChild):
91 index=self.childNodes.index( refChild )
92 self.childNodes.insert( index, newChild )
Paul Prescod73678da2000-07-01 04:58:47 +000093 if self._makeParentNodes:
94 newChild.parentNode=self
Fred Drake55c38192000-06-29 19:39:57 +000095
96 def appendChild( self, node ):
97 self.childNodes.append( node )
Paul Prescod73678da2000-07-01 04:58:47 +000098 return node
99
100 def replaceChild( self, newChild, oldChild ):
101 index=self.childNodes.index( oldChild )
102 self.childNodes[index]=oldChild
103
104 def removeChild( self, oldChild ):
105 index=self.childNodes.index( oldChild )
106 del self.childNodes[index]
107
108 def cloneNode( self, deep ):
109 import new
110 clone=new.instance( self.__class__, self.__dict__ )
111 clone.attributes=self.attributes.copy()
112 if not deep:
113 clone.childNodes=[]
114 else:
115 clone.childNodes=map( lambda x: x.cloneNode, self.childNodes )
116 return clone
Fred Drake55c38192000-06-29 19:39:57 +0000117
118 def unlink( self ):
119 self.parentNode=None
120 while self.childNodes:
121 self.childNodes[-1].unlink()
122 del self.childNodes[-1] # probably not most efficient!
123 self.childNodes=None
124 if self.attributes:
Paul Prescod73678da2000-07-01 04:58:47 +0000125 for attr in self._attrs.values():
126 self.removeAttributeNode( attr )
127 assert not len( self._attrs )
128 assert not len( self._attrsNS )
129 if Node._debug:
130 index=repr( id( self ))+repr( self.__class__ )
131 self.debug.write( "Deleting: %s\n" % index )
132 del Node.allnodes[index]
Fred Drake55c38192000-06-29 19:39:57 +0000133
134def _write_data( writer, data):
135 "Writes datachars to writer."
136 data=string.replace(data,"&","&")
137 data=string.replace(data,"<","&lt;")
138 data=string.replace(data,"\"","&quot;")
139 data=string.replace(data,">","&gt;")
140 writer.write(data)
141
Fred Drake55c38192000-06-29 19:39:57 +0000142def _getElementsByTagNameHelper( parent, name, rc ):
143 for node in parent.childNodes:
144 if node.nodeType==Node.ELEMENT_NODE and\
145 (name=="*" or node.tagName==name):
146 rc.append( node )
147 _getElementsByTagNameHelper( node, name, rc )
148 return rc
149
150def _getElementsByTagNameNSHelper( parent, nsURI, localName, rc ):
151 for node in parent.childNodes:
152 if (node.nodeType==Node.ELEMENT_NODE ):
153 if ((localName=="*" or node.tagName==localName) and
154 (nsURI=="*" or node.namespaceURI==nsURI)):
155 rc.append( node )
156 _getElementsByTagNameNSHelper( node, name, rc )
157
158class Attr(Node):
159 nodeType=Node.ATTRIBUTE_NODE
Paul Prescod73678da2000-07-01 04:58:47 +0000160 def __init__( self, qName, namespaceURI="", localName=None,
161prefix=None ):
Fred Drake55c38192000-06-29 19:39:57 +0000162 # skip setattr for performance
Fred Drake55c38192000-06-29 19:39:57 +0000163 self.__dict__["localName"]=localName or qName
Paul Prescod73678da2000-07-01 04:58:47 +0000164 self.__dict__["nodeName"] = self.__dict__["name"] = qName
Fred Drake55c38192000-06-29 19:39:57 +0000165 self.__dict__["namespaceURI"]=namespaceURI
Paul Prescod73678da2000-07-01 04:58:47 +0000166 self.__dict__["prefix"]=prefix
Fred Drake55c38192000-06-29 19:39:57 +0000167 self.attributes=None
Paul Prescod73678da2000-07-01 04:58:47 +0000168 Node.__init__( self )
169 # nodeValue and value are set elsewhere
Fred Drake55c38192000-06-29 19:39:57 +0000170
171 def __setattr__( self, name, value ):
172 if name in ("value", "nodeValue" ):
173 self.__dict__["value"]=self.__dict__["nodeValue"]=value
174 else:
175 self.__dict__[name]=value
176
177class AttributeList:
Paul Prescod73678da2000-07-01 04:58:47 +0000178 """the attribute list is a transient interface to the underlying
179dictionaries. mutations here will change the underlying element's
180dictionary"""
Fred Drake55c38192000-06-29 19:39:57 +0000181 def __init__( self, attrs, attrsNS ):
Paul Prescod73678da2000-07-01 04:58:47 +0000182 self._attrs=attrs
183 self._attrsNS=attrsNS
184 self.length=len( self._attrs.keys() )
Fred Drake55c38192000-06-29 19:39:57 +0000185
186 def item( self, index ):
187 try:
188 return self[self.keys()[index]]
189 except IndexError:
190 return None
191
192 def items( self ):
193 return map( lambda node: (node.tagName, node.value),
Paul Prescod73678da2000-07-01 04:58:47 +0000194 self._attrs.values() )
Fred Drake55c38192000-06-29 19:39:57 +0000195
196 def itemsNS( self ):
197 return map( lambda node: ((node.URI, node.localName), node.value),
Paul Prescod73678da2000-07-01 04:58:47 +0000198 self._attrs.values() )
Fred Drake55c38192000-06-29 19:39:57 +0000199
200 def keys( self ):
Paul Prescod73678da2000-07-01 04:58:47 +0000201 return self._attrs.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000202
203 def keysNS( self ):
Paul Prescod73678da2000-07-01 04:58:47 +0000204 return self._attrsNS.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000205
206 def values( self ):
Paul Prescod73678da2000-07-01 04:58:47 +0000207 return self._attrs.values()
Fred Drake55c38192000-06-29 19:39:57 +0000208
209 def __len__( self ):
210 return self.length
211
212 def __cmp__( self, other ):
Paul Prescod73678da2000-07-01 04:58:47 +0000213 if self._attrs is getattr( other, "_attrs", None ):
Fred Drake55c38192000-06-29 19:39:57 +0000214 return 0
215 else:
216 return cmp( id( self ), id( other ) )
217
218 #FIXME: is it appropriate to return .value?
219 def __getitem__( self, attname_or_tuple ):
Paul Prescod1e688272000-07-01 19:21:47 +0000220 if type( attname_or_tuple ) == types.TupleType:
Paul Prescod73678da2000-07-01 04:58:47 +0000221 return self._attrsNS[attname_or_tuple]
Fred Drake55c38192000-06-29 19:39:57 +0000222 else:
Paul Prescod73678da2000-07-01 04:58:47 +0000223 return self._attrs[attname_or_tuple]
Fred Drake55c38192000-06-29 19:39:57 +0000224
Paul Prescod1e688272000-07-01 19:21:47 +0000225 # same as set
226 def __setitem__( self, attname, value ):
227 if type( value ) == types.StringType:
228 node=Attr( attname )
229 node.value=value
230 else:
231 assert isinstance( value, Attr ) or type( value )==types.StringType
232 node=value
Paul Prescod6c4753f2000-07-04 03:39:33 +0000233 old=self._attrs.get( attname, None)
Paul Prescod1e688272000-07-01 19:21:47 +0000234 if old:
235 old.unlink()
236 self._attrs[node.name]=node
237 self._attrsNS[(node.namespaceURI,node.localName)]=node
Paul Prescod73678da2000-07-01 04:58:47 +0000238
239 def __delitem__( self, attname_or_tuple ):
240 node=self[attname_or_tuple]
241 node.unlink()
242 del self._attrs[node.name]
243 del self._attrsNS[(node.namespaceURI, node.localName)]
244
Fred Drake55c38192000-06-29 19:39:57 +0000245class Element( Node ):
246 nodeType=Node.ELEMENT_NODE
247 def __init__( self, tagName, namespaceURI="", prefix="",
248 localName=None ):
249 Node.__init__( self )
250 self.tagName = self.nodeName = tagName
251 self.localName=localName or tagName
252 self.prefix=prefix
253 self.namespaceURI=namespaceURI
254 self.nodeValue=None
255
Paul Prescod73678da2000-07-01 04:58:47 +0000256 self._attrs={} # attributes are double-indexed:
257 self._attrsNS={}# tagName -> Attribute
Fred Drake55c38192000-06-29 19:39:57 +0000258 # URI,localName -> Attribute
259 # in the future: consider lazy generation of attribute objects
260 # this is too tricky for now because of headaches
261 # with namespaces.
262
263 def getAttribute( self, attname ):
Paul Prescod73678da2000-07-01 04:58:47 +0000264 return self._attrs[attname].value
Fred Drake55c38192000-06-29 19:39:57 +0000265
266 def getAttributeNS( self, namespaceURI, localName ):
Paul Prescod73678da2000-07-01 04:58:47 +0000267 return self._attrsNS[(namespaceURI, localName)].value
Fred Drake55c38192000-06-29 19:39:57 +0000268
269 def setAttribute( self, attname, value ):
270 attr=Attr( attname )
271 # for performance
272 attr.__dict__["value"]=attr.__dict__["nodeValue"]=value
273 self.setAttributeNode( attr )
274
275 def setAttributeNS( self, namespaceURI, qualifiedName, value ):
Paul Prescod73678da2000-07-01 04:58:47 +0000276 prefix,localname=_nssplit( qualifiedName )
Fred Drake55c38192000-06-29 19:39:57 +0000277 # for performance
Paul Prescod73678da2000-07-01 04:58:47 +0000278 attr = Attr( qualifiedName, namespaceURI, localname, prefix )
Fred Drake55c38192000-06-29 19:39:57 +0000279 attr.__dict__["value"]=attr.__dict__["nodeValue"]=value
280 self.setAttributeNode( attr )
281
Paul Prescod73678da2000-07-01 04:58:47 +0000282 def getAttributeNode( self, attrname ):
283 return self._attrs.get( attrname )
284
285 def getAttributeNodeNS( self, namespaceURI, localName ):
286 return self._attrsNS[(namespaceURI, localName)]
287
Fred Drake55c38192000-06-29 19:39:57 +0000288 def setAttributeNode( self, attr ):
Paul Prescod73678da2000-07-01 04:58:47 +0000289 old=self._attrs.get( attr.name, None)
290 if old:
291 old.unlink()
292 self._attrs[attr.name]=attr
293 self._attrsNS[(attr.namespaceURI,attr.localName)]=attr
Fred Drake55c38192000-06-29 19:39:57 +0000294
295 def removeAttribute( self, name ):
Paul Prescod73678da2000-07-01 04:58:47 +0000296 attr = self._attrs[name]
Fred Drake55c38192000-06-29 19:39:57 +0000297 self.removeAttributeNode( attr )
298
299 def removeAttributeNS( self, namespaceURI, localName ):
Paul Prescod73678da2000-07-01 04:58:47 +0000300 attr = self._attrsNS[(namespaceURI, localName)]
Fred Drake55c38192000-06-29 19:39:57 +0000301 self.removeAttributeNode( attr )
302
303 def removeAttributeNode( self, node ):
Paul Prescod73678da2000-07-01 04:58:47 +0000304 node.unlink()
305 del self._attrs[node.name]
306 del self._attrsNS[(node.namespaceURI, node.localName)]
Fred Drake55c38192000-06-29 19:39:57 +0000307
308 def getElementsByTagName( self, name ):
309 return _getElementsByTagNameHelper( self, name, [] )
310
311 def getElementsByTagNameNS(self,namespaceURI,localName):
312 _getElementsByTagNameNSHelper( self, namespaceURI, localName, [] )
313
314 def __repr__( self ):
315 return "<DOM Element:"+self.tagName+" at "+`id( self )` +" >"
316
317 def writexml(self, writer):
318 writer.write("<"+self.tagName)
319
320 a_names=self._get_attributes().keys()
321 a_names.sort()
322
323 for a_name in a_names:
324 writer.write(" "+a_name+"=\"")
325 _write_data(writer, self._get_attributes()[a_name])
326 writer.write("\"")
327 if self.childNodes:
328 writer.write(">")
329 for node in self.childNodes:
330 node.writexml( writer )
331 writer.write("</"+self.tagName+">")
332 else:
333 writer.write("/>")
334
335 def _get_attributes( self ):
Paul Prescod73678da2000-07-01 04:58:47 +0000336 return AttributeList( self._attrs, self._attrsNS )
Fred Drake55c38192000-06-29 19:39:57 +0000337
338class Comment( Node ):
339 nodeType=Node.COMMENT_NODE
340 def __init__(self, data ):
341 Node.__init__( self )
342 self.data=self.nodeValue=data
343 self.nodeName="#comment"
344 self.attributes=None
345
346 def writexml( self, writer ):
347 writer.write( "<!--" + self.data + "-->" )
348
349class ProcessingInstruction( Node ):
350 nodeType=Node.PROCESSING_INSTRUCTION_NODE
351 def __init__(self, target, data ):
352 Node.__init__( self )
353 self.target = self.nodeName = target
354 self.data = self.nodeValue = data
355 self.attributes=None
356
357 def writexml( self, writer ):
358 writer.write( "<?" + self.target +" " + self.data+ "?>" )
359
360class Text( Node ):
361 nodeType=Node.TEXT_NODE
362 nodeName="#text"
363 def __init__(self, data ):
364 Node.__init__( self )
365 self.data = self.nodeValue = data
366 self.attributes=None
367
368 def __repr__(self):
369 if len( self.data )> 10:
370 dotdotdot="..."
371 else:
372 dotdotdot=""
373 return "<DOM Text node \"" + self.data[0:10] + dotdotdot+"\">"
374
375 def writexml( self, writer ):
376 _write_data( writer, self.data )
377
Paul Prescod73678da2000-07-01 04:58:47 +0000378def _nssplit( qualifiedName ):
379 fields = string.split(qualifiedName, ':')
380 if len(fields) == 2:
381 return fields
382 elif len(fields) == 1:
383 return( '', fields[0] )
384
Fred Drake55c38192000-06-29 19:39:57 +0000385class Document( Node ):
386 nodeType=Node.DOCUMENT_NODE
Paul Prescod73678da2000-07-01 04:58:47 +0000387 documentElement=None
Fred Drake55c38192000-06-29 19:39:57 +0000388 def __init__( self ):
389 Node.__init__( self )
Fred Drake55c38192000-06-29 19:39:57 +0000390 self.attributes=None
391 self.nodeName="#document"
392 self.nodeValue=None
393
Paul Prescod73678da2000-07-01 04:58:47 +0000394 def appendChild( self, node ):
395 if node.nodeType==Node.ELEMENT_NODE and self.documentElement:
396 raise TypeError, "Two document elements disallowed"
397 else:
398 self.documentElement=node
399 Node.appendChild( self, node )
400 return node
401
Fred Drake55c38192000-06-29 19:39:57 +0000402 createElement=Element
403
404 createTextNode=Text
405
406 createComment=Comment
407
408 createProcessingInstruction=ProcessingInstruction
409
410 createAttribute=Attr
411
412 def createElementNS(self, namespaceURI, qualifiedName):
Paul Prescod73678da2000-07-01 04:58:47 +0000413 prefix,localName=_nssplit( qualifiedName )
414 return Element(qualifiedName, namespaceURI, prefix, localName)
Fred Drake55c38192000-06-29 19:39:57 +0000415
416 def createAttributeNS(self, namespaceURI, qualifiedName):
Paul Prescod73678da2000-07-01 04:58:47 +0000417 prefix,localName=_nssplit( qualifiedName )
418 return Attr(namespaceURI, qualifiedName, localName, prefix)
Fred Drake55c38192000-06-29 19:39:57 +0000419
420 def getElementsByTagNameNS(self,namespaceURI,localName):
421 _getElementsByTagNameNSHelper( self, namespaceURI, localName )
422
Fred Drake55c38192000-06-29 19:39:57 +0000423 def unlink( self ):
424 self.documentElement=None
425 Node.unlink( self )
426
427 def getElementsByTagName( self, name ):
428 rc=[]
429 _getElementsByTagNameHelper( self, name, rc )
430 return rc
431
432 def writexml( self, writer ):
433 for node in self.childNodes:
434 node.writexml( writer )
435
436def _doparse( func, args, kwargs ):
437 events=apply( func, args, kwargs )
438 (toktype, rootNode)=events.getEvent()
439 events.expandNode( rootNode )
440 return rootNode
441
442def parse( *args, **kwargs ):
443 return _doparse( pulldom.parse, args, kwargs )
444
445def parseString( *args, **kwargs ):
446 return _doparse( pulldom.parseString, args, kwargs )