blob: 3f5668ed7787e093dc2dfaeaae2acdf78d7dc991 [file] [log] [blame]
Fred Drake55c38192000-06-29 19:39:57 +00001import pulldom
2import string
3from StringIO import StringIO
4import types
5
6"""
7minidom.py -- a lightweight DOM implementation based on SAX.
8
Paul Prescod623511b2000-07-21 22:05:49 +00009parse( "foo.xml" )
10
11parseString( "<foo><bar/></foo>" )
12
Fred Drake55c38192000-06-29 19:39:57 +000013Todo:
14=====
15 * convenience methods for getting elements and text.
16 * more testing
17 * bring some of the writer and linearizer code into conformance with this
18 interface
19 * SAX 2 namespaces
20"""
21
22class Node:
23 ELEMENT_NODE = 1
24 ATTRIBUTE_NODE = 2
25 TEXT_NODE = 3
26 CDATA_SECTION_NODE = 4
27 ENTITY_REFERENCE_NODE = 5
28 ENTITY_NODE = 6
29 PROCESSING_INSTRUCTION_NODE = 7
30 COMMENT_NODE = 8
31 DOCUMENT_NODE = 9
32 DOCUMENT_TYPE_NODE = 10
33 DOCUMENT_FRAGMENT_NODE = 11
34 NOTATION_NODE = 12
35
Paul Prescod73678da2000-07-01 04:58:47 +000036 allnodes={}
37 _debug=0
38 _makeParentNodes=1
39 debug=None
Fred Drake55c38192000-06-29 19:39:57 +000040
41 def __init__( self ):
42 self.childNodes=[]
Paul Prescod73678da2000-07-01 04:58:47 +000043 if Node._debug:
44 index=repr( id( self ))+repr( self.__class__ )
45 Node.allnodes[index]=repr( self.__dict__ )
46 if Node.debug==None:
Paul Prescod1e688272000-07-01 19:21:47 +000047 Node.debug=StringIO()
Paul Prescod6c4753f2000-07-04 03:39:33 +000048 #open( "debug4.out", "w" )
Paul Prescod73678da2000-07-01 04:58:47 +000049 Node.debug.write( "create %s\n"%index )
Fred Drake55c38192000-06-29 19:39:57 +000050
51 def __getattr__( self, key ):
52 if key[0:2]=="__": raise AttributeError
53 # getattr should never call getattr!
54 if self.__dict__.has_key("inGetAttr"):
55 del self.inGetAttr
56 raise AttributeError, key
57
58 prefix,attrname=key[:5],key[5:]
59 if prefix=="_get_":
60 self.inGetAttr=1
61 if hasattr( self, attrname ):
62 del self.inGetAttr
63 return (lambda self=self, attrname=attrname:
64 getattr( self, attrname ))
65 else:
66 del self.inGetAttr
67 raise AttributeError, key
68 else:
69 self.inGetAttr=1
70 try:
71 func = getattr( self, "_get_"+key )
72 except AttributeError:
73 raise AttributeError, key
74 del self.inGetAttr
75 return func()
76
77 def __nonzero__(self): return 1
78
79 def toxml( self ):
80 writer=StringIO()
81 self.writexml( writer )
82 return writer.getvalue()
83
84 def hasChildNodes( self ):
85 if self.childNodes: return 1
86 else: return 0
87
Paul Prescod73678da2000-07-01 04:58:47 +000088 def _get_firstChild( self ):
89 return self.childNodes[0]
90
91 def _get_lastChild( self ):
92 return self.childNodes[-1]
93
Fred Drake55c38192000-06-29 19:39:57 +000094 def insertBefore( self, newChild, refChild):
95 index=self.childNodes.index( refChild )
96 self.childNodes.insert( index, newChild )
Paul Prescod73678da2000-07-01 04:58:47 +000097 if self._makeParentNodes:
98 newChild.parentNode=self
Fred Drake55c38192000-06-29 19:39:57 +000099
100 def appendChild( self, node ):
101 self.childNodes.append( node )
Paul Prescod73678da2000-07-01 04:58:47 +0000102 return node
103
104 def replaceChild( self, newChild, oldChild ):
105 index=self.childNodes.index( oldChild )
106 self.childNodes[index]=oldChild
107
108 def removeChild( self, oldChild ):
109 index=self.childNodes.index( oldChild )
110 del self.childNodes[index]
111
112 def cloneNode( self, deep ):
113 import new
114 clone=new.instance( self.__class__, self.__dict__ )
115 clone.attributes=self.attributes.copy()
116 if not deep:
117 clone.childNodes=[]
118 else:
119 clone.childNodes=map( lambda x: x.cloneNode, self.childNodes )
120 return clone
Fred Drake55c38192000-06-29 19:39:57 +0000121
122 def unlink( self ):
123 self.parentNode=None
124 while self.childNodes:
125 self.childNodes[-1].unlink()
126 del self.childNodes[-1] # probably not most efficient!
127 self.childNodes=None
128 if self.attributes:
Paul Prescod73678da2000-07-01 04:58:47 +0000129 for attr in self._attrs.values():
130 self.removeAttributeNode( attr )
131 assert not len( self._attrs )
132 assert not len( self._attrsNS )
133 if Node._debug:
134 index=repr( id( self ))+repr( self.__class__ )
135 self.debug.write( "Deleting: %s\n" % index )
136 del Node.allnodes[index]
Fred Drake55c38192000-06-29 19:39:57 +0000137
138def _write_data( writer, data):
139 "Writes datachars to writer."
140 data=string.replace(data,"&","&amp;")
141 data=string.replace(data,"<","&lt;")
142 data=string.replace(data,"\"","&quot;")
143 data=string.replace(data,">","&gt;")
144 writer.write(data)
145
Fred Drake55c38192000-06-29 19:39:57 +0000146def _getElementsByTagNameHelper( parent, name, rc ):
147 for node in parent.childNodes:
148 if node.nodeType==Node.ELEMENT_NODE and\
149 (name=="*" or node.tagName==name):
150 rc.append( node )
151 _getElementsByTagNameHelper( node, name, rc )
152 return rc
153
154def _getElementsByTagNameNSHelper( parent, nsURI, localName, rc ):
155 for node in parent.childNodes:
156 if (node.nodeType==Node.ELEMENT_NODE ):
157 if ((localName=="*" or node.tagName==localName) and
158 (nsURI=="*" or node.namespaceURI==nsURI)):
159 rc.append( node )
160 _getElementsByTagNameNSHelper( node, name, rc )
161
162class Attr(Node):
163 nodeType=Node.ATTRIBUTE_NODE
Paul Prescod73678da2000-07-01 04:58:47 +0000164 def __init__( self, qName, namespaceURI="", localName=None,
165prefix=None ):
Fred Drake55c38192000-06-29 19:39:57 +0000166 # skip setattr for performance
Fred Drake55c38192000-06-29 19:39:57 +0000167 self.__dict__["localName"]=localName or qName
Paul Prescod73678da2000-07-01 04:58:47 +0000168 self.__dict__["nodeName"] = self.__dict__["name"] = qName
Fred Drake55c38192000-06-29 19:39:57 +0000169 self.__dict__["namespaceURI"]=namespaceURI
Paul Prescod73678da2000-07-01 04:58:47 +0000170 self.__dict__["prefix"]=prefix
Fred Drake55c38192000-06-29 19:39:57 +0000171 self.attributes=None
Paul Prescod73678da2000-07-01 04:58:47 +0000172 Node.__init__( self )
173 # nodeValue and value are set elsewhere
Fred Drake55c38192000-06-29 19:39:57 +0000174
175 def __setattr__( self, name, value ):
176 if name in ("value", "nodeValue" ):
177 self.__dict__["value"]=self.__dict__["nodeValue"]=value
178 else:
179 self.__dict__[name]=value
180
181class AttributeList:
Paul Prescod73678da2000-07-01 04:58:47 +0000182 """the attribute list is a transient interface to the underlying
183dictionaries. mutations here will change the underlying element's
184dictionary"""
Fred Drake55c38192000-06-29 19:39:57 +0000185 def __init__( self, attrs, attrsNS ):
Paul Prescod73678da2000-07-01 04:58:47 +0000186 self._attrs=attrs
187 self._attrsNS=attrsNS
188 self.length=len( self._attrs.keys() )
Fred Drake55c38192000-06-29 19:39:57 +0000189
190 def item( self, index ):
191 try:
192 return self[self.keys()[index]]
193 except IndexError:
194 return None
195
196 def items( self ):
197 return map( lambda node: (node.tagName, node.value),
Paul Prescod73678da2000-07-01 04:58:47 +0000198 self._attrs.values() )
Fred Drake55c38192000-06-29 19:39:57 +0000199
200 def itemsNS( self ):
201 return map( lambda node: ((node.URI, node.localName), node.value),
Paul Prescod73678da2000-07-01 04:58:47 +0000202 self._attrs.values() )
Fred Drake55c38192000-06-29 19:39:57 +0000203
204 def keys( self ):
Paul Prescod73678da2000-07-01 04:58:47 +0000205 return self._attrs.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000206
207 def keysNS( self ):
Paul Prescod73678da2000-07-01 04:58:47 +0000208 return self._attrsNS.keys()
Fred Drake55c38192000-06-29 19:39:57 +0000209
210 def values( self ):
Paul Prescod73678da2000-07-01 04:58:47 +0000211 return self._attrs.values()
Fred Drake55c38192000-06-29 19:39:57 +0000212
213 def __len__( self ):
214 return self.length
215
216 def __cmp__( self, other ):
Paul Prescod73678da2000-07-01 04:58:47 +0000217 if self._attrs is getattr( other, "_attrs", None ):
Fred Drake55c38192000-06-29 19:39:57 +0000218 return 0
219 else:
220 return cmp( id( self ), id( other ) )
221
222 #FIXME: is it appropriate to return .value?
223 def __getitem__( self, attname_or_tuple ):
Paul Prescod1e688272000-07-01 19:21:47 +0000224 if type( attname_or_tuple ) == types.TupleType:
Paul Prescod73678da2000-07-01 04:58:47 +0000225 return self._attrsNS[attname_or_tuple]
Fred Drake55c38192000-06-29 19:39:57 +0000226 else:
Paul Prescod73678da2000-07-01 04:58:47 +0000227 return self._attrs[attname_or_tuple]
Fred Drake55c38192000-06-29 19:39:57 +0000228
Paul Prescod1e688272000-07-01 19:21:47 +0000229 # same as set
230 def __setitem__( self, attname, value ):
231 if type( value ) == types.StringType:
232 node=Attr( attname )
233 node.value=value
234 else:
235 assert isinstance( value, Attr ) or type( value )==types.StringType
236 node=value
Paul Prescod6c4753f2000-07-04 03:39:33 +0000237 old=self._attrs.get( attname, None)
Paul Prescod1e688272000-07-01 19:21:47 +0000238 if old:
239 old.unlink()
240 self._attrs[node.name]=node
241 self._attrsNS[(node.namespaceURI,node.localName)]=node
Paul Prescod73678da2000-07-01 04:58:47 +0000242
243 def __delitem__( self, attname_or_tuple ):
244 node=self[attname_or_tuple]
245 node.unlink()
246 del self._attrs[node.name]
247 del self._attrsNS[(node.namespaceURI, node.localName)]
248
Fred Drake55c38192000-06-29 19:39:57 +0000249class Element( Node ):
250 nodeType=Node.ELEMENT_NODE
251 def __init__( self, tagName, namespaceURI="", prefix="",
252 localName=None ):
253 Node.__init__( self )
254 self.tagName = self.nodeName = tagName
255 self.localName=localName or tagName
256 self.prefix=prefix
257 self.namespaceURI=namespaceURI
258 self.nodeValue=None
259
Paul Prescod73678da2000-07-01 04:58:47 +0000260 self._attrs={} # attributes are double-indexed:
261 self._attrsNS={}# tagName -> Attribute
Fred Drake55c38192000-06-29 19:39:57 +0000262 # URI,localName -> Attribute
263 # in the future: consider lazy generation of attribute objects
264 # this is too tricky for now because of headaches
265 # with namespaces.
266
267 def getAttribute( self, attname ):
Paul Prescod73678da2000-07-01 04:58:47 +0000268 return self._attrs[attname].value
Fred Drake55c38192000-06-29 19:39:57 +0000269
270 def getAttributeNS( self, namespaceURI, localName ):
Paul Prescod73678da2000-07-01 04:58:47 +0000271 return self._attrsNS[(namespaceURI, localName)].value
Fred Drake55c38192000-06-29 19:39:57 +0000272
273 def setAttribute( self, attname, value ):
274 attr=Attr( attname )
275 # for performance
276 attr.__dict__["value"]=attr.__dict__["nodeValue"]=value
277 self.setAttributeNode( attr )
278
279 def setAttributeNS( self, namespaceURI, qualifiedName, value ):
Paul Prescod73678da2000-07-01 04:58:47 +0000280 prefix,localname=_nssplit( qualifiedName )
Fred Drake55c38192000-06-29 19:39:57 +0000281 # for performance
Paul Prescod73678da2000-07-01 04:58:47 +0000282 attr = Attr( qualifiedName, namespaceURI, localname, prefix )
Fred Drake55c38192000-06-29 19:39:57 +0000283 attr.__dict__["value"]=attr.__dict__["nodeValue"]=value
284 self.setAttributeNode( attr )
285
Paul Prescod73678da2000-07-01 04:58:47 +0000286 def getAttributeNode( self, attrname ):
287 return self._attrs.get( attrname )
288
289 def getAttributeNodeNS( self, namespaceURI, localName ):
290 return self._attrsNS[(namespaceURI, localName)]
291
Fred Drake55c38192000-06-29 19:39:57 +0000292 def setAttributeNode( self, attr ):
Paul Prescod73678da2000-07-01 04:58:47 +0000293 old=self._attrs.get( attr.name, None)
294 if old:
295 old.unlink()
296 self._attrs[attr.name]=attr
297 self._attrsNS[(attr.namespaceURI,attr.localName)]=attr
Fred Drake55c38192000-06-29 19:39:57 +0000298
299 def removeAttribute( self, name ):
Paul Prescod73678da2000-07-01 04:58:47 +0000300 attr = self._attrs[name]
Fred Drake55c38192000-06-29 19:39:57 +0000301 self.removeAttributeNode( attr )
302
303 def removeAttributeNS( self, namespaceURI, localName ):
Paul Prescod73678da2000-07-01 04:58:47 +0000304 attr = self._attrsNS[(namespaceURI, localName)]
Fred Drake55c38192000-06-29 19:39:57 +0000305 self.removeAttributeNode( attr )
306
307 def removeAttributeNode( self, node ):
Paul Prescod73678da2000-07-01 04:58:47 +0000308 node.unlink()
309 del self._attrs[node.name]
310 del self._attrsNS[(node.namespaceURI, node.localName)]
Fred Drake55c38192000-06-29 19:39:57 +0000311
312 def getElementsByTagName( self, name ):
313 return _getElementsByTagNameHelper( self, name, [] )
314
315 def getElementsByTagNameNS(self,namespaceURI,localName):
316 _getElementsByTagNameNSHelper( self, namespaceURI, localName, [] )
317
318 def __repr__( self ):
319 return "<DOM Element:"+self.tagName+" at "+`id( self )` +" >"
320
321 def writexml(self, writer):
322 writer.write("<"+self.tagName)
323
324 a_names=self._get_attributes().keys()
325 a_names.sort()
326
327 for a_name in a_names:
328 writer.write(" "+a_name+"=\"")
329 _write_data(writer, self._get_attributes()[a_name])
330 writer.write("\"")
331 if self.childNodes:
332 writer.write(">")
333 for node in self.childNodes:
334 node.writexml( writer )
335 writer.write("</"+self.tagName+">")
336 else:
337 writer.write("/>")
338
339 def _get_attributes( self ):
Paul Prescod73678da2000-07-01 04:58:47 +0000340 return AttributeList( self._attrs, self._attrsNS )
Fred Drake55c38192000-06-29 19:39:57 +0000341
342class Comment( Node ):
343 nodeType=Node.COMMENT_NODE
344 def __init__(self, data ):
345 Node.__init__( self )
346 self.data=self.nodeValue=data
347 self.nodeName="#comment"
348 self.attributes=None
349
350 def writexml( self, writer ):
351 writer.write( "<!--" + self.data + "-->" )
352
353class ProcessingInstruction( Node ):
354 nodeType=Node.PROCESSING_INSTRUCTION_NODE
355 def __init__(self, target, data ):
356 Node.__init__( self )
357 self.target = self.nodeName = target
358 self.data = self.nodeValue = data
359 self.attributes=None
360
361 def writexml( self, writer ):
362 writer.write( "<?" + self.target +" " + self.data+ "?>" )
363
364class Text( Node ):
365 nodeType=Node.TEXT_NODE
366 nodeName="#text"
367 def __init__(self, data ):
368 Node.__init__( self )
369 self.data = self.nodeValue = data
370 self.attributes=None
371
372 def __repr__(self):
373 if len( self.data )> 10:
374 dotdotdot="..."
375 else:
376 dotdotdot=""
377 return "<DOM Text node \"" + self.data[0:10] + dotdotdot+"\">"
378
379 def writexml( self, writer ):
380 _write_data( writer, self.data )
381
Paul Prescod73678da2000-07-01 04:58:47 +0000382def _nssplit( qualifiedName ):
383 fields = string.split(qualifiedName, ':')
384 if len(fields) == 2:
385 return fields
386 elif len(fields) == 1:
387 return( '', fields[0] )
388
Fred Drake55c38192000-06-29 19:39:57 +0000389class Document( Node ):
390 nodeType=Node.DOCUMENT_NODE
Paul Prescod73678da2000-07-01 04:58:47 +0000391 documentElement=None
Fred Drake55c38192000-06-29 19:39:57 +0000392 def __init__( self ):
393 Node.__init__( self )
Fred Drake55c38192000-06-29 19:39:57 +0000394 self.attributes=None
395 self.nodeName="#document"
396 self.nodeValue=None
397
Paul Prescod73678da2000-07-01 04:58:47 +0000398 def appendChild( self, node ):
399 if node.nodeType==Node.ELEMENT_NODE and self.documentElement:
400 raise TypeError, "Two document elements disallowed"
401 else:
402 self.documentElement=node
403 Node.appendChild( self, node )
404 return node
405
Fred Drake55c38192000-06-29 19:39:57 +0000406 createElement=Element
407
408 createTextNode=Text
409
410 createComment=Comment
411
412 createProcessingInstruction=ProcessingInstruction
413
414 createAttribute=Attr
415
416 def createElementNS(self, namespaceURI, qualifiedName):
Paul Prescod73678da2000-07-01 04:58:47 +0000417 prefix,localName=_nssplit( qualifiedName )
418 return Element(qualifiedName, namespaceURI, prefix, localName)
Fred Drake55c38192000-06-29 19:39:57 +0000419
420 def createAttributeNS(self, namespaceURI, qualifiedName):
Paul Prescod73678da2000-07-01 04:58:47 +0000421 prefix,localName=_nssplit( qualifiedName )
422 return Attr(namespaceURI, qualifiedName, localName, prefix)
Fred Drake55c38192000-06-29 19:39:57 +0000423
424 def getElementsByTagNameNS(self,namespaceURI,localName):
425 _getElementsByTagNameNSHelper( self, namespaceURI, localName )
426
Fred Drake55c38192000-06-29 19:39:57 +0000427 def unlink( self ):
428 self.documentElement=None
429 Node.unlink( self )
430
431 def getElementsByTagName( self, name ):
432 rc=[]
433 _getElementsByTagNameHelper( self, name, rc )
434 return rc
435
436 def writexml( self, writer ):
437 for node in self.childNodes:
438 node.writexml( writer )
439
440def _doparse( func, args, kwargs ):
441 events=apply( func, args, kwargs )
442 (toktype, rootNode)=events.getEvent()
443 events.expandNode( rootNode )
444 return rootNode
445
446def parse( *args, **kwargs ):
Paul Prescod623511b2000-07-21 22:05:49 +0000447 "Parse a file into a DOM by filename or file object"
Fred Drake55c38192000-06-29 19:39:57 +0000448 return _doparse( pulldom.parse, args, kwargs )
449
450def parseString( *args, **kwargs ):
Paul Prescod623511b2000-07-21 22:05:49 +0000451 "Parse a file into a DOM from a string"
Fred Drake55c38192000-06-29 19:39:57 +0000452 return _doparse( pulldom.parseString, args, kwargs )
Paul Prescod623511b2000-07-21 22:05:49 +0000453