blob: 04e7bc1aee952fa7ec8d179c8f061ee77860ad6b [file] [log] [blame]
Fred Drake45cd9de2000-06-29 19:34:54 +00001"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers
2should be based on this code. """
Fred Drake07cbc4e2000-09-21 17:43:48 +00003
4import handler
5
Fred Drake45cd9de2000-06-29 19:34:54 +00006# ===== XMLREADER =====
7
8class XMLReader:
Lars Gustäbel523b0a62000-09-24 18:54:49 +00009
Fred Drake45cd9de2000-06-29 19:34:54 +000010 def __init__(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +000011 self._cont_handler = handler.ContentHandler()
Lars Gustäbele292a242000-09-24 20:19:45 +000012 self._dtd_handler = handler.DTDHandler()
13 self._ent_handler = handler.EntityResolver()
Fred Drake07cbc4e2000-09-21 17:43:48 +000014 self._err_handler = handler.ErrorHandler()
Fred Drake45cd9de2000-06-29 19:34:54 +000015
16 def parse(self, source):
Skip Montanarof9059eb2000-07-06 03:01:40 +000017 "Parse an XML document from a system identifier or an InputSource."
Fred Drake45cd9de2000-06-29 19:34:54 +000018 raise NotImplementedError("This method must be implemented!")
19
20 def getContentHandler(self):
21 "Returns the current ContentHandler."
22 return self._cont_handler
23
24 def setContentHandler(self, handler):
25 "Registers a new object to receive document content events."
26 self._cont_handler = handler
27
28 def getDTDHandler(self):
29 "Returns the current DTD handler."
30 return self._dtd_handler
31
32 def setDTDHandler(self, handler):
Skip Montanarof9059eb2000-07-06 03:01:40 +000033 "Register an object to receive basic DTD-related events."
34 self._dtd_handler = handler
Fred Drake45cd9de2000-06-29 19:34:54 +000035
36 def getEntityResolver(self):
37 "Returns the current EntityResolver."
38 return self._ent_handler
39
40 def setEntityResolver(self, resolver):
Skip Montanarof9059eb2000-07-06 03:01:40 +000041 "Register an object to resolve external entities."
42 self._ent_handler = resolver
Fred Drake45cd9de2000-06-29 19:34:54 +000043
44 def getErrorHandler(self):
45 "Returns the current ErrorHandler."
46 return self._err_handler
47
48 def setErrorHandler(self, handler):
Skip Montanarof9059eb2000-07-06 03:01:40 +000049 "Register an object to receive error-message events."
50 self._err_handler = handler
Fred Drake45cd9de2000-06-29 19:34:54 +000051
52 def setLocale(self, locale):
53 """Allow an application to set the locale for errors and warnings.
54
Thomas Wouters7e474022000-07-16 12:04:32 +000055 SAX parsers are not required to provide localization for errors
Fred Drake45cd9de2000-06-29 19:34:54 +000056 and warnings; if they cannot support the requested locale,
57 however, they must throw a SAX exception. Applications may
58 request a locale change in the middle of a parse."""
59 raise SAXNotSupportedException("Locale support not implemented")
60
61 def getFeature(self, name):
62 "Looks up and returns the state of a SAX2 feature."
63 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
64
65 def setFeature(self, name, state):
66 "Sets the state of a SAX2 feature."
67 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
68
69 def getProperty(self, name):
70 "Looks up and returns the value of a SAX2 property."
71 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
72
73 def setProperty(self, name, value):
74 "Sets the value of a SAX2 property."
75 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
76
Lars Gustäbel523b0a62000-09-24 18:54:49 +000077import saxutils
78
Fred Drake45cd9de2000-06-29 19:34:54 +000079class IncrementalParser(XMLReader):
80 """This interface adds three extra methods to the XMLReader
81 interface that allow XML parsers to support incremental
82 parsing. Support for this interface is optional, since not all
83 underlying XML parsers support this functionality.
84
85 When the parser is instantiated it is ready to begin accepting
86 data from the feed method immediately. After parsing has been
87 finished with a call to close the reset method must be called to
88 make the parser ready to accept new data, either from feed or
89 using the parse method.
90
91 Note that these methods must _not_ be called during parsing, that
92 is, after parse has been called and before it returns.
93
94 By default, the class also implements the parse method of the XMLReader
95 interface using the feed, close and reset methods of the
96 IncrementalParser interface as a convenience to SAX 2.0 driver
97 writers."""
Fred Drake07cbc4e2000-09-21 17:43:48 +000098
99 def __init__(self, bufsize=2**16):
100 self._bufsize = bufsize
101 XMLReader.__init__(self)
102
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000103 def parse(self, source):
104 source = saxutils.prepare_input_source(source)
105
106 self.prepareParser(source)
107 file = source.getByteStream()
108 buffer = file.read(self._bufsize)
Fred Drake45cd9de2000-06-29 19:34:54 +0000109 while buffer != "":
110 self.feed(buffer)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000111 buffer = file.read(self._bufsize)
Fred Drake45cd9de2000-06-29 19:34:54 +0000112
113 def feed(self, data):
114 """This method gives the raw XML data in the data parameter to
115 the parser and makes it parse the data, emitting the
116 corresponding events. It is allowed for XML constructs to be
117 split across several calls to feed.
118
119 feed may raise SAXException."""
120 raise NotImplementedError("This method must be implemented!")
Fred Drake07cbc4e2000-09-21 17:43:48 +0000121
Fred Drake45cd9de2000-06-29 19:34:54 +0000122 def prepareParser(self, source):
123 """This method is called by the parse implementation to allow
124 the SAX 2.0 driver to prepare itself for parsing."""
125 raise NotImplementedError("prepareParser must be overridden!")
126
127 def close(self):
128 """This method is called when the entire XML document has been
129 passed to the parser through the feed method, to notify the
130 parser that there are no more data. This allows the parser to
131 do the final checks on the document and empty the internal
132 data buffer.
133
134 The parser will not be ready to parse another document until
135 the reset method has been called.
136
137 close may raise SAXException."""
138 raise NotImplementedError("This method must be implemented!")
139
140 def reset(self):
141 """This method is called after close has been called to reset
142 the parser so that it is ready to parse new documents. The
143 results of calling parse or feed after close without calling
144 reset are undefined."""
145 raise NotImplementedError("This method must be implemented!")
146
147# ===== LOCATOR =====
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000148
Fred Drake45cd9de2000-06-29 19:34:54 +0000149class Locator:
150 """Interface for associating a SAX event with a document
151 location. A locator object will return valid results only during
152 calls to DocumentHandler methods; at any other time, the
153 results are unpredictable."""
154
155 def getColumnNumber(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +0000156 "Return the column number where the current event ends."
157 return -1
Fred Drake45cd9de2000-06-29 19:34:54 +0000158
159 def getLineNumber(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +0000160 "Return the line number where the current event ends."
161 return -1
Fred Drake45cd9de2000-06-29 19:34:54 +0000162
163 def getPublicId(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +0000164 "Return the public identifier for the current event."
165 return None
Fred Drake45cd9de2000-06-29 19:34:54 +0000166
167 def getSystemId(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +0000168 "Return the system identifier for the current event."
169 return None
Fred Drake45cd9de2000-06-29 19:34:54 +0000170
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000171# ===== INPUTSOURCE =====
172
173class InputSource:
174 """Encapsulation of the information needed by the XMLReader to
175 read entities.
176
177 This class may include information about the public identifier,
178 system identifier, byte stream (possibly with character encoding
179 information) and/or the character stream of an entity.
180
181 Applications will create objects of this class for use in the
182 XMLReader.parse method and for returning from
183 EntityResolver.resolveEntity.
184
185 An InputSource belongs to the application, the XMLReader is not
186 allowed to modify InputSource objects passed to it from the
187 application, although it may make copies and modify those."""
188
189 def __init__(self, system_id = None):
190 self.__system_id = system_id
191 self.__public_id = None
192 self.__encoding = None
193 self.__bytefile = None
194 self.__charfile = None
195
196 def setPublicId(self, public_id):
197 "Sets the public identifier of this InputSource."
198 self.__public_id = public_id
199
200 def getPublicId(self):
201 "Returns the public identifier of this InputSource."
202 return self.__public_id
203
204 def setSystemId(self, system_id):
205 "Sets the system identifier of this InputSource."
206 self.__system_id = system_id
207
208 def getSystemId(self):
209 "Returns the system identifier of this InputSource."
210 return self.__system_id
211
212 def setEncoding(self, encoding):
213 """Sets the character encoding of this InputSource.
214
215 The encoding must be a string acceptable for an XML encoding
216 declaration (see section 4.3.3 of the XML recommendation).
217
218 The encoding attribute of the InputSource is ignored if the
219 InputSource also contains a character stream."""
220 self.__encoding = encoding
221
222 def getEncoding(self):
223 "Get the character encoding of this InputSource."
224 return self.__encoding
225
226 def setByteStream(self, bytefile):
227 """Set the byte stream (a Python file-like object which does
228 not perform byte-to-character conversion) for this input
229 source.
230
231 The SAX parser will ignore this if there is also a character
232 stream specified, but it will use a byte stream in preference
233 to opening a URI connection itself.
234
235 If the application knows the character encoding of the byte
236 stream, it should set it with the setEncoding method."""
237 self.__bytefile = bytefile
238
239 def getByteStream(self):
240 """Get the byte stream for this input source.
241
242 The getEncoding method will return the character encoding for
243 this byte stream, or None if unknown."""
244 return self.__bytefile
245
246 def setCharacterStream(self, charfile):
247 """Set the character stream for this input source. (The stream
248 must be a Python 1.6 Unicode-wrapped file-like that performs
249 conversion to Unicode strings.)
250
251 If there is a character stream specified, the SAX parser will
252 ignore any byte stream and will not attempt to open a URI
253 connection to the system identifier."""
254 self.__charfile = charfile
255
256 def getCharacterStream(self):
257 "Get the character stream for this input source."
258 return self.__charfile
259
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000260# ===== ATTRIBUTESIMPL =====
261
Fred Drake45cd9de2000-06-29 19:34:54 +0000262class AttributesImpl:
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000263
264 def __init__(self, attrs):
265 """Non-NS-aware implementation.
266
267 attrs should be of the form {name : value}."""
Fred Drake45cd9de2000-06-29 19:34:54 +0000268 self._attrs = attrs
Fred Drake45cd9de2000-06-29 19:34:54 +0000269
270 def getLength(self):
271 return len(self._attrs)
272
273 def getType(self, name):
274 return "CDATA"
275
276 def getValue(self, name):
277 return self._attrs[name]
278
279 def getValueByQName(self, name):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000280 return self._attrs[name]
Fred Drake45cd9de2000-06-29 19:34:54 +0000281
282 def getNameByQName(self, name):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000283 if not self._attrs.has_key(name):
284 raise KeyError
285 return name
Fred Drake45cd9de2000-06-29 19:34:54 +0000286
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000287 def getQNameByName(self, name):
288 if not self._attrs.has_key(name):
289 raise KeyError
290 return name
291
Fred Drake45cd9de2000-06-29 19:34:54 +0000292 def getNames(self):
293 return self._attrs.keys()
294
295 def getQNames(self):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000296 return self._attrs.keys()
Fred Drake45cd9de2000-06-29 19:34:54 +0000297
298 def __len__(self):
299 return len(self._attrs)
300
301 def __getitem__(self, name):
302 return self._attrs[name]
303
304 def keys(self):
305 return self._attrs.keys()
306
307 def has_key(self, name):
308 return self._attrs.has_key(name)
309
310 def get(self, name, alternative=None):
311 return self._attrs.get(name, alternative)
312
313 def copy(self):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000314 return self.__class__(self._attrs)
Fred Drake45cd9de2000-06-29 19:34:54 +0000315
316 def items(self):
317 return self._attrs.items()
318
319 def values(self):
320 return self._attrs.values()
321
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000322# ===== ATTRIBUTESNSIMPL =====
323
324class AttributesNSImpl(AttributesImpl):
325
326 def __init__(self, attrs, qnames):
327 """NS-aware implementation.
328
329 attrs should be of the form {(ns_uri, lname): value, ...}.
330 qnames of the form {(ns_uri, lname): qname, ...}."""
331 self._attrs = attrs
332 self._qnames = qnames
333
334 def getValueByQName(self, name):
335 for (nsname, qname) in self._qnames.items():
336 if qname == name:
337 return self._attrs[nsname]
338
339 raise KeyError
340
341 def getNameByQName(self, name):
342 for (nsname, qname) in self._qnames.items():
343 if qname == name:
344 return nsname
345
346 raise KeyError
347
348 def getQNameByName(self, name):
349 return self._qnames[name]
350
351 def getQNames(self):
352 return self._qnames.values()
353
354 def copy(self):
355 return self.__class__(self._attrs, self._qnames)
356
Fred Drake07cbc4e2000-09-21 17:43:48 +0000357
Fred Drake45cd9de2000-06-29 19:34:54 +0000358def _test():
359 XMLReader()
360 IncrementalParser()
361 Locator()
Fred Drake45cd9de2000-06-29 19:34:54 +0000362
Fred Drake07cbc4e2000-09-21 17:43:48 +0000363if __name__ == "__main__":
Fred Drake45cd9de2000-06-29 19:34:54 +0000364 _test()