blob: 1f7c6997514e205f01fd0d1ece01548f0a8d20f6 [file] [log] [blame]
Fred Drake55c38192000-06-29 19:39:57 +00001import minidom
2import types
3import string
4import sys
Fred Drake55c38192000-06-29 19:39:57 +00005from xml.sax import ExpatParser
6
7#todo: SAX2/namespace handling
8
9START_ELEMENT="START_ELEMENT"
10END_ELEMENT="END_ELEMENT"
11COMMENT="COMMENT"
12START_DOCUMENT="START_DOCUMENT"
13END_DOCUMENT="END_DOCUMENT"
14PROCESSING_INSTRUCTION="PROCESSING_INSTRUCTION"
15IGNORABLE_WHITESPACE="IGNORABLE_WHITESPACE"
16CHARACTERS="CHARACTERS"
17
18class PullDOM:
19 def __init__( self ):
20 self.firstEvent=[None,None]
21 self.lastEvent=self.firstEvent
22
23 def setDocumentLocator( self, locator ): pass
24
Paul Prescod6c4753f2000-07-04 03:39:33 +000025 def startElement( self, name, tagName , attrs ):
Fred Drake55c38192000-06-29 19:39:57 +000026 if not hasattr( self, "curNode" ):
27 # FIXME: hack!
28 self.startDocument( )
29
30 node = self.document.createElement( tagName ) #FIXME namespaces!
31 for attr in attrs.keys():
32 node.setAttribute( attr, attrs[attr] )
33
34 parent=self.curNode
35 node.parentNode = parent
36 if parent.childNodes:
37 node.previousSibling=parent.childNodes[-1]
38 node.previousSibling.nextSibling=node
39 self.curNode = node
40 # FIXME: do I have to screen namespace attributes
41 self.lastEvent[1]=[(START_ELEMENT, node), None ]
42 self.lastEvent=self.lastEvent[1]
43 #self.events.append( (START_ELEMENT, node) )
44
Paul Prescod6c4753f2000-07-04 03:39:33 +000045 def endElement( self, name, tagName ):
Fred Drake55c38192000-06-29 19:39:57 +000046 node = self.curNode
47 self.lastEvent[1]=[(END_ELEMENT, node), None ]
48 self.lastEvent=self.lastEvent[1]
49 #self.events.append( (END_ELEMENT, node ))
50 self.curNode = node.parentNode
51
52 def comment( self, s):
53 node = self.document.createComment ( s )
54 parent=self.curNode
55 node.parentNode=parent
56 if parent.childNodes:
57 node.previousSibling=parent.childNodes[-1]
58 node.previousSibling.nextSibling=node
59 self.lastEvent[1]=[(COMMENT, node), None ]
60 self.lastEvent=self.lastEvent[1]
61 #self.events.append( (COMMENT, node ))
62
63 def processingInstruction( self, target, data ):
64 node = self.document.createProcessingInstruction( target, data )
65 #self.appendChild( node )
66
67 parent=self.curNode
68 node.parentNode=parent
69 if parent.childNodes:
70 node.previousSibling=parent.childNodes[-1]
71 node.previousSibling.nextSibling=node
72 self.lastEvent[1]=[(PROCESSING_INSTRUCTION, node), None ]
73 self.lastEvent=self.lastEvent[1]
74 #self.events.append( (PROCESSING_INSTRUCTION, node) )
75
76 def ignorableWhitespace( self, chars ):
77 node = self.document.createTextNode( chars[start:start+length] )
78 parent=self.curNode
79 node.parentNode=parent
80 if parent.childNodes:
81 node.previousSibling=parent.childNodes[-1]
82 node.previousSibling.nextSibling=node
83 self.lastEvent[1]=[(IGNORABLE_WHITESPACE, node), None ]
84 self.lastEvent=self.lastEvent[1]
85 #self.events.append( (IGNORABLE_WHITESPACE, node))
86
87 def characters( self, chars ):
88 node = self.document.createTextNode( chars )
89 node.parentNode=self.curNode
90 self.lastEvent[1]=[(CHARACTERS, node), None ]
91 self.lastEvent=self.lastEvent[1]
92
93 def startDocument( self ):
94 node = self.curNode = self.document = minidom.Document()
95 node.parentNode=None
96 self.lastEvent[1]=[(START_DOCUMENT, node), None ]
97 self.lastEvent=self.lastEvent[1]
98 #self.events.append( (START_DOCUMENT, node) )
99
100 def endDocument( self ):
101 assert( not self.curNode.parentNode )
102 for node in self.curNode.childNodes:
103 if node.nodeType==node.ELEMENT_NODE:
104 self.document.documentElement = node
105 #if not self.document.documentElement:
106 # raise Error, "No document element"
107
108 self.lastEvent[1]=[(END_DOCUMENT, node), None ]
109 #self.events.append( (END_DOCUMENT, self.curNode) )
110
111class ErrorHandler:
112 def warning( self, exception ):
113 print exception
114 def error( self, exception ):
115 raise exception
116 def fatalError( self, exception ):
117 raise exception
118
119class DOMEventStream:
120 def __init__( self, stream, parser, bufsize ):
121 self.stream=stream
122 self.parser=parser
123 self.bufsize=bufsize
124 self.reset()
125
126 def reset( self ):
127 self.pulldom = PullDOM()
128 self.parser.setContentHandler( self.pulldom )
129
130 def __getitem__( self, pos ):
131 rc=self.getEvent()
132 if rc: return rc
133 raise IndexError
134
135 def expandNode( self, node ):
136 event=self.getEvent()
137 while event:
138 token,cur_node=event
139 if cur_node is node: return
140
141 if token !=END_ELEMENT:
Paul Prescod73678da2000-07-01 04:58:47 +0000142 cur_node.parentNode.appendChild( cur_node )
Fred Drake55c38192000-06-29 19:39:57 +0000143 event=self.getEvent()
Fred Drake55c38192000-06-29 19:39:57 +0000144
145 def getEvent( self ):
146 if not self.pulldom.firstEvent[1]:
147 self.pulldom.lastEvent=self.pulldom.firstEvent
148 while not self.pulldom.firstEvent[1]:
149 buf=self.stream.read( self.bufsize )
150 if not buf:
151 #FIXME: why doesn't Expat close work?
152 #self.parser.close()
153 return None
154 self.parser.feed( buf )
155 rc=self.pulldom.firstEvent[1][0]
156 self.pulldom.firstEvent[1]=self.pulldom.firstEvent[1][1]
157 return rc
158
159# FIXME: sax2
160#def _getParser( ):
161 # from xml.sax.saxexts import make_parser
162 # expat doesn't report errors properly! Figure it out
163 # return make_parser()
164 # return make_parser("xml.sax.drivers.drv_xmllib")
165
166
167
168def _getParser():
169 return ExpatParser()
170
171default_bufsize=(2**14)-20
172# FIXME: move into sax package for common usage
173def parse( stream_or_string, parser=None, bufsize=default_bufsize ):
174 if type( stream_or_string ) == type( "" ):
175 stream=open( stream_or_string )
176 else:
177 stream=stream_or_string
178 if not parser:
179 parser=_getParser()
180 return DOMEventStream( stream, parser, bufsize )
181
182def parseString( string, parser=None ):
183 try:
184 import cStringIO
185 stringio=cStringIO.StringIO
186 except ImportError:
187 import StringIO
188 stringio=StringIO.StringIO
189
190 bufsize=len( string )
Paul Prescod73678da2000-07-01 04:58:47 +0000191 buf=stringio( string )
Fred Drake55c38192000-06-29 19:39:57 +0000192 parser=_getParser()
193 return DOMEventStream( buf, parser, bufsize )
194