blob: 32d2d2b1bfa89eb5403af83d810120f600e56f13 [file] [log] [blame]
Fred Drake55c38192000-06-29 19:39:57 +00001import pulldom
2import string
3from StringIO import StringIO
4import types
5
6"""
7minidom.py -- a lightweight DOM implementation based on SAX.
8
9Todo:
10=====
11 * convenience methods for getting elements and text.
12 * more testing
13 * bring some of the writer and linearizer code into conformance with this
14 interface
15 * SAX 2 namespaces
16"""
17
18class Node:
19 ELEMENT_NODE = 1
20 ATTRIBUTE_NODE = 2
21 TEXT_NODE = 3
22 CDATA_SECTION_NODE = 4
23 ENTITY_REFERENCE_NODE = 5
24 ENTITY_NODE = 6
25 PROCESSING_INSTRUCTION_NODE = 7
26 COMMENT_NODE = 8
27 DOCUMENT_NODE = 9
28 DOCUMENT_TYPE_NODE = 10
29 DOCUMENT_FRAGMENT_NODE = 11
30 NOTATION_NODE = 12
31
32 allnodes=[]
33
34 def __init__( self ):
35 self.childNodes=[]
36 Node.allnodes.append( repr( id( self ))+repr( self.__class__ ))
37
38 def __getattr__( self, key ):
39 if key[0:2]=="__": raise AttributeError
40 # getattr should never call getattr!
41 if self.__dict__.has_key("inGetAttr"):
42 del self.inGetAttr
43 raise AttributeError, key
44
45 prefix,attrname=key[:5],key[5:]
46 if prefix=="_get_":
47 self.inGetAttr=1
48 if hasattr( self, attrname ):
49 del self.inGetAttr
50 return (lambda self=self, attrname=attrname:
51 getattr( self, attrname ))
52 else:
53 del self.inGetAttr
54 raise AttributeError, key
55 else:
56 self.inGetAttr=1
57 try:
58 func = getattr( self, "_get_"+key )
59 except AttributeError:
60 raise AttributeError, key
61 del self.inGetAttr
62 return func()
63
64 def __nonzero__(self): return 1
65
66 def toxml( self ):
67 writer=StringIO()
68 self.writexml( writer )
69 return writer.getvalue()
70
71 def hasChildNodes( self ):
72 if self.childNodes: return 1
73 else: return 0
74
75 def insertBefore( self, newChild, refChild):
76 index=self.childNodes.index( refChild )
77 self.childNodes.insert( index, newChild )
78
79 def appendChild( self, node ):
80 self.childNodes.append( node )
81
82 def unlink( self ):
83 self.parentNode=None
84 while self.childNodes:
85 self.childNodes[-1].unlink()
86 del self.childNodes[-1] # probably not most efficient!
87 self.childNodes=None
88 if self.attributes:
89 for attr in self.attributes.values():
90 attr.unlink()
91 self.attributes=None
92 index=Node.allnodes.index( repr( id( self ))+repr( self.__class__ ))
93 del Node.allnodes[index]
94
95def _write_data( writer, data):
96 "Writes datachars to writer."
97 data=string.replace(data,"&","&")
98 data=string.replace(data,"<","&lt;")
99 data=string.replace(data,"\"","&quot;")
100 data=string.replace(data,">","&gt;")
101 writer.write(data)
102
103def _closeElement( element ):
104 del element.parentNode
105 for node in element.elements:
106 _closeElement( node )
107
108def _getElementsByTagNameHelper( parent, name, rc ):
109 for node in parent.childNodes:
110 if node.nodeType==Node.ELEMENT_NODE and\
111 (name=="*" or node.tagName==name):
112 rc.append( node )
113 _getElementsByTagNameHelper( node, name, rc )
114 return rc
115
116def _getElementsByTagNameNSHelper( parent, nsURI, localName, rc ):
117 for node in parent.childNodes:
118 if (node.nodeType==Node.ELEMENT_NODE ):
119 if ((localName=="*" or node.tagName==localName) and
120 (nsURI=="*" or node.namespaceURI==nsURI)):
121 rc.append( node )
122 _getElementsByTagNameNSHelper( node, name, rc )
123
124class Attr(Node):
125 nodeType=Node.ATTRIBUTE_NODE
126 def __init__( self, qName, namespaceURI="", prefix="",
127 localName=None ):
128 Node.__init__( self )
129 assert qName
130 # skip setattr for performance
131 self.__dict__["nodeName"] = self.__dict__["name"] = qName
132 self.__dict__["localName"]=localName or qName
133 self.__dict__["prefix"]=prefix
134 self.__dict__["namespaceURI"]=namespaceURI
135 # nodeValue and value are set elsewhere
136 self.attributes=None
137
138 def __setattr__( self, name, value ):
139 if name in ("value", "nodeValue" ):
140 self.__dict__["value"]=self.__dict__["nodeValue"]=value
141 else:
142 self.__dict__[name]=value
143
144class AttributeList:
145 # the attribute list is a transient interface to the underlying dictionaries
146 # mutations here will change the underlying element's dictionary
147 def __init__( self, attrs, attrsNS ):
148 self.__attrs=attrs
149 self.__attrsNS=attrs
150 self.length=len( self.__attrs.keys() )
151
152 def item( self, index ):
153 try:
154 return self[self.keys()[index]]
155 except IndexError:
156 return None
157
158 def items( self ):
159 return map( lambda node: (node.tagName, node.value),
160 self.__attrs.values() )
161
162 def itemsNS( self ):
163 return map( lambda node: ((node.URI, node.localName), node.value),
164 self.__attrs.values() )
165
166 def keys( self ):
167 return self.__attrs.keys()
168
169 def keysNS( self ):
170 return self.__attrsNS.keys()
171
172 def values( self ):
173 return self.__attrs.values()
174
175 def __len__( self ):
176 return self.length
177
178 def __cmp__( self, other ):
179 if self.__attrs is other.__attrs:
180 return 0
181 else:
182 return cmp( id( self ), id( other ) )
183
184 #FIXME: is it appropriate to return .value?
185 def __getitem__( self, attname_or_tuple ):
186 if type( attname_or_tuple ) == type( (1,2) ):
187 return self.__attrsNS[attname_or_tuple].value
188 else:
189 return self.__attrs[attname_or_tuple].value
190
191 def __setitem__( self, attname ):
192 raise TypeError, "object does not support item assignment"
193
194class Element( Node ):
195 nodeType=Node.ELEMENT_NODE
196 def __init__( self, tagName, namespaceURI="", prefix="",
197 localName=None ):
198 Node.__init__( self )
199 self.tagName = self.nodeName = tagName
200 self.localName=localName or tagName
201 self.prefix=prefix
202 self.namespaceURI=namespaceURI
203 self.nodeValue=None
204
205 self.__attrs={} # attributes are double-indexed:
206 self.__attrsNS={}# tagName -> Attribute
207 # URI,localName -> Attribute
208 # in the future: consider lazy generation of attribute objects
209 # this is too tricky for now because of headaches
210 # with namespaces.
211
212 def getAttribute( self, attname ):
213 return self.__attrs[attname].value
214
215 def getAttributeNS( self, namespaceURI, localName ):
216 return self.__attrsNS[(namespaceURI, localName)].value
217
218 def setAttribute( self, attname, value ):
219 attr=Attr( attname )
220 # for performance
221 attr.__dict__["value"]=attr.__dict__["nodeValue"]=value
222 self.setAttributeNode( attr )
223
224 def setAttributeNS( self, namespaceURI, qualifiedName, value ):
225 attr=createAttributeNS( namespaceURI, qualifiedName )
226 # for performance
227 attr.__dict__["value"]=attr.__dict__["nodeValue"]=value
228 self.setAttributeNode( attr )
229
230 def setAttributeNode( self, attr ):
231 self.__attrs[attr.name]=attr
232 self.__attrsNS[(attr.namespaceURI,attr.localName)]=attr
233
234 def removeAttribute( self, name ):
235 attr = self.__attrs[name]
236 self.removeAttributeNode( attr )
237
238 def removeAttributeNS( self, namespaceURI, localName ):
239 attr = self.__attrsNS[(uri, localName)]
240 self.removeAttributeNode( attr )
241
242 def removeAttributeNode( self, node ):
243 del self.__attrs[node.name]
244 del self.__attrsNS[(node.namespaceURI, node.localName)]
245
246 def getElementsByTagName( self, name ):
247 return _getElementsByTagNameHelper( self, name, [] )
248
249 def getElementsByTagNameNS(self,namespaceURI,localName):
250 _getElementsByTagNameNSHelper( self, namespaceURI, localName, [] )
251
252 def __repr__( self ):
253 return "<DOM Element:"+self.tagName+" at "+`id( self )` +" >"
254
255 def writexml(self, writer):
256 writer.write("<"+self.tagName)
257
258 a_names=self._get_attributes().keys()
259 a_names.sort()
260
261 for a_name in a_names:
262 writer.write(" "+a_name+"=\"")
263 _write_data(writer, self._get_attributes()[a_name])
264 writer.write("\"")
265 if self.childNodes:
266 writer.write(">")
267 for node in self.childNodes:
268 node.writexml( writer )
269 writer.write("</"+self.tagName+">")
270 else:
271 writer.write("/>")
272
273 def _get_attributes( self ):
274 return AttributeList( self.__attrs, self.__attrsNS )
275
276class Comment( Node ):
277 nodeType=Node.COMMENT_NODE
278 def __init__(self, data ):
279 Node.__init__( self )
280 self.data=self.nodeValue=data
281 self.nodeName="#comment"
282 self.attributes=None
283
284 def writexml( self, writer ):
285 writer.write( "<!--" + self.data + "-->" )
286
287class ProcessingInstruction( Node ):
288 nodeType=Node.PROCESSING_INSTRUCTION_NODE
289 def __init__(self, target, data ):
290 Node.__init__( self )
291 self.target = self.nodeName = target
292 self.data = self.nodeValue = data
293 self.attributes=None
294
295 def writexml( self, writer ):
296 writer.write( "<?" + self.target +" " + self.data+ "?>" )
297
298class Text( Node ):
299 nodeType=Node.TEXT_NODE
300 nodeName="#text"
301 def __init__(self, data ):
302 Node.__init__( self )
303 self.data = self.nodeValue = data
304 self.attributes=None
305
306 def __repr__(self):
307 if len( self.data )> 10:
308 dotdotdot="..."
309 else:
310 dotdotdot=""
311 return "<DOM Text node \"" + self.data[0:10] + dotdotdot+"\">"
312
313 def writexml( self, writer ):
314 _write_data( writer, self.data )
315
316class Document( Node ):
317 nodeType=Node.DOCUMENT_NODE
318 def __init__( self ):
319 Node.__init__( self )
320 self.documentElement=None
321 self.attributes=None
322 self.nodeName="#document"
323 self.nodeValue=None
324
325 createElement=Element
326
327 createTextNode=Text
328
329 createComment=Comment
330
331 createProcessingInstruction=ProcessingInstruction
332
333 createAttribute=Attr
334
335 def createElementNS(self, namespaceURI, qualifiedName):
336 fields = string.split(qualifiedName, ':')
337 if len(fields) == 2:
338 prefix = fields[0]
339 localName = fields[1]
340 elif len(fields) == 1:
341 prefix = ''
342 localName = fields[0]
343 return Element(self, qualifiedName, namespaceURI, prefix, localName)
344
345 def createAttributeNS(self, namespaceURI, qualifiedName):
346 fields = string.split(qualifiedName,':')
347 if len(fields) == 2:
348 localName = fields[1]
349 prefix = fields[0]
350 elif len(fields) == 1:
351 localName = fields[0]
352 prefix = None
353 return Attr(qualifiedName, namespaceURI, prefix, localName)
354
355 def getElementsByTagNameNS(self,namespaceURI,localName):
356 _getElementsByTagNameNSHelper( self, namespaceURI, localName )
357
358 def close( self ):
359 for node in self.elements:
360 _closeElement( node )
361
362 def unlink( self ):
363 self.documentElement=None
364 Node.unlink( self )
365
366 def getElementsByTagName( self, name ):
367 rc=[]
368 _getElementsByTagNameHelper( self, name, rc )
369 return rc
370
371 def writexml( self, writer ):
372 for node in self.childNodes:
373 node.writexml( writer )
374
375def _doparse( func, args, kwargs ):
376 events=apply( func, args, kwargs )
377 (toktype, rootNode)=events.getEvent()
378 events.expandNode( rootNode )
379 return rootNode
380
381def parse( *args, **kwargs ):
382 return _doparse( pulldom.parse, args, kwargs )
383
384def parseString( *args, **kwargs ):
385 return _doparse( pulldom.parseString, args, kwargs )