blob: 9c856469b1eeef38b0f1e8a5b5aef2455dd3ec5b [file] [log] [blame]
Fred Drake55c38192000-06-29 19:39:57 +00001import minidom
2import types
3import string
4import sys
5import pyexpat
6from xml.sax import ExpatParser
7
8#todo: SAX2/namespace handling
9
10START_ELEMENT="START_ELEMENT"
11END_ELEMENT="END_ELEMENT"
12COMMENT="COMMENT"
13START_DOCUMENT="START_DOCUMENT"
14END_DOCUMENT="END_DOCUMENT"
15PROCESSING_INSTRUCTION="PROCESSING_INSTRUCTION"
16IGNORABLE_WHITESPACE="IGNORABLE_WHITESPACE"
17CHARACTERS="CHARACTERS"
18
19class PullDOM:
20 def __init__( self ):
21 self.firstEvent=[None,None]
22 self.lastEvent=self.firstEvent
23
24 def setDocumentLocator( self, locator ): pass
25
26 def startElement( self, tagName , attrs ):
27 if not hasattr( self, "curNode" ):
28 # FIXME: hack!
29 self.startDocument( )
30
31 node = self.document.createElement( tagName ) #FIXME namespaces!
32 for attr in attrs.keys():
33 node.setAttribute( attr, attrs[attr] )
34
35 parent=self.curNode
36 node.parentNode = parent
37 if parent.childNodes:
38 node.previousSibling=parent.childNodes[-1]
39 node.previousSibling.nextSibling=node
40 self.curNode = node
41 # FIXME: do I have to screen namespace attributes
42 self.lastEvent[1]=[(START_ELEMENT, node), None ]
43 self.lastEvent=self.lastEvent[1]
44 #self.events.append( (START_ELEMENT, node) )
45
46 def endElement( self, name ):
47 node = self.curNode
48 self.lastEvent[1]=[(END_ELEMENT, node), None ]
49 self.lastEvent=self.lastEvent[1]
50 #self.events.append( (END_ELEMENT, node ))
51 self.curNode = node.parentNode
52
53 def comment( self, s):
54 node = self.document.createComment ( s )
55 parent=self.curNode
56 node.parentNode=parent
57 if parent.childNodes:
58 node.previousSibling=parent.childNodes[-1]
59 node.previousSibling.nextSibling=node
60 self.lastEvent[1]=[(COMMENT, node), None ]
61 self.lastEvent=self.lastEvent[1]
62 #self.events.append( (COMMENT, node ))
63
64 def processingInstruction( self, target, data ):
65 node = self.document.createProcessingInstruction( target, data )
66 #self.appendChild( node )
67
68 parent=self.curNode
69 node.parentNode=parent
70 if parent.childNodes:
71 node.previousSibling=parent.childNodes[-1]
72 node.previousSibling.nextSibling=node
73 self.lastEvent[1]=[(PROCESSING_INSTRUCTION, node), None ]
74 self.lastEvent=self.lastEvent[1]
75 #self.events.append( (PROCESSING_INSTRUCTION, node) )
76
77 def ignorableWhitespace( self, chars ):
78 node = self.document.createTextNode( chars[start:start+length] )
79 parent=self.curNode
80 node.parentNode=parent
81 if parent.childNodes:
82 node.previousSibling=parent.childNodes[-1]
83 node.previousSibling.nextSibling=node
84 self.lastEvent[1]=[(IGNORABLE_WHITESPACE, node), None ]
85 self.lastEvent=self.lastEvent[1]
86 #self.events.append( (IGNORABLE_WHITESPACE, node))
87
88 def characters( self, chars ):
89 node = self.document.createTextNode( chars )
90 node.parentNode=self.curNode
91 self.lastEvent[1]=[(CHARACTERS, node), None ]
92 self.lastEvent=self.lastEvent[1]
93
94 def startDocument( self ):
95 node = self.curNode = self.document = minidom.Document()
96 node.parentNode=None
97 self.lastEvent[1]=[(START_DOCUMENT, node), None ]
98 self.lastEvent=self.lastEvent[1]
99 #self.events.append( (START_DOCUMENT, node) )
100
101 def endDocument( self ):
102 assert( not self.curNode.parentNode )
103 for node in self.curNode.childNodes:
104 if node.nodeType==node.ELEMENT_NODE:
105 self.document.documentElement = node
106 #if not self.document.documentElement:
107 # raise Error, "No document element"
108
109 self.lastEvent[1]=[(END_DOCUMENT, node), None ]
110 #self.events.append( (END_DOCUMENT, self.curNode) )
111
112class ErrorHandler:
113 def warning( self, exception ):
114 print exception
115 def error( self, exception ):
116 raise exception
117 def fatalError( self, exception ):
118 raise exception
119
120class DOMEventStream:
121 def __init__( self, stream, parser, bufsize ):
122 self.stream=stream
123 self.parser=parser
124 self.bufsize=bufsize
125 self.reset()
126
127 def reset( self ):
128 self.pulldom = PullDOM()
129 self.parser.setContentHandler( self.pulldom )
130
131 def __getitem__( self, pos ):
132 rc=self.getEvent()
133 if rc: return rc
134 raise IndexError
135
136 def expandNode( self, node ):
137 event=self.getEvent()
138 while event:
139 token,cur_node=event
140 if cur_node is node: return
141
142 if token !=END_ELEMENT:
143 cur_node.parentNode.childNodes.append( cur_node )
144 event=self.getEvent()
145 if node.nodeType==minidom.Node.DOCUMENT_NODE:
146 for child in node.childNodes:
147 if child.nodeType==minidom.Node.ELEMENT_NODE:
148 node.documentElement=child
149
150 def getEvent( self ):
151 if not self.pulldom.firstEvent[1]:
152 self.pulldom.lastEvent=self.pulldom.firstEvent
153 while not self.pulldom.firstEvent[1]:
154 buf=self.stream.read( self.bufsize )
155 if not buf:
156 #FIXME: why doesn't Expat close work?
157 #self.parser.close()
158 return None
159 self.parser.feed( buf )
160 rc=self.pulldom.firstEvent[1][0]
161 self.pulldom.firstEvent[1]=self.pulldom.firstEvent[1][1]
162 return rc
163
164# FIXME: sax2
165#def _getParser( ):
166 # from xml.sax.saxexts import make_parser
167 # expat doesn't report errors properly! Figure it out
168 # return make_parser()
169 # return make_parser("xml.sax.drivers.drv_xmllib")
170
171
172
173def _getParser():
174 return ExpatParser()
175
176default_bufsize=(2**14)-20
177# FIXME: move into sax package for common usage
178def parse( stream_or_string, parser=None, bufsize=default_bufsize ):
179 if type( stream_or_string ) == type( "" ):
180 stream=open( stream_or_string )
181 else:
182 stream=stream_or_string
183 if not parser:
184 parser=_getParser()
185 return DOMEventStream( stream, parser, bufsize )
186
187def parseString( string, parser=None ):
188 try:
189 import cStringIO
190 stringio=cStringIO.StringIO
191 except ImportError:
192 import StringIO
193 stringio=StringIO.StringIO
194
195 bufsize=len( string )
196 stringio( string )
197 parser=_getParser()
198 return DOMEventStream( buf, parser, bufsize )
199
200#FIXME: Use Lars' instead!!!
201class SAX_expat:
202 "SAX driver for the Pyexpat C module."
203
204 def __init__(self):
205 self.parser=pyexpat.ParserCreate()
206 self.started=0
207
208 def setDocumentHandler( self, handler ):
209 self.parser.StartElementHandler = handler.startElement
210 self.parser.EndElementHandler = handler.endElement
211 self.parser.CharacterDataHandler = handler.datachars
212 self.parser.ProcessingInstructionHandler = handler.processingInstruction
213 self.doc_handler=handler
214
215 def setErrorHandler( self, handler ):
216 self.err_handler=handler
217
218 # --- Locator methods. Only usable after errors.
219
220 def getLineNumber(self):
221 return self.parser.ErrorLineNumber
222
223 def getColumnNumber(self):
224 return self.parser.ErrorColumnNumber
225
226 # --- Internal
227
228 def __report_error(self):
229 msg=pyexpat.ErrorString(self.parser.ErrorCode)
230 self.err_handler.fatalError(msg)
231
232 # --- EXPERIMENTAL PYTHON SAX EXTENSIONS
233
234 def get_parser_name(self):
235 return "pyexpat"
236
237 def get_parser_version(self):
238 return "Unknown"
239
240 def get_driver_version(self):
241 return version
242
243 def is_validating(self):
244 return 0
245
246 def is_dtd_reading(self):
247 return 0
248
249 def reset(self):
250 self.parser=pyexpat.ParserCreate()
251 self.parser.StartElementHandler = self.startElement
252 self.parser.EndElementHandler = self.endElement
253 self.parser.CharacterDataHandler = self.characters
254 self.parser.ProcessingInstructionHandler = self.processingInstruction
255
256 def feed(self,data):
257 if not self.started:
258 self.doc_handler.startDocument()
259 self.started=1
260 if not self.parser.Parse(data):
261 self.__report_error()
262
263 def close(self):
264 if not self.parser.Parse("",1):
265 self.__report_error()
266 self.doc_handler.endDocument()
267 self.parser = None