blob: fea674bbaf9a56cc2c68283978c0d2eced91f22f [file] [log] [blame]
Fred Drake45cd9de2000-06-29 19:34:54 +00001"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers
2should be based on this code. """
Fred Drake07cbc4e2000-09-21 17:43:48 +00003
4import handler
5
Fred Drake45cd9de2000-06-29 19:34:54 +00006# ===== XMLREADER =====
7
8class XMLReader:
Lars Gustäbelbb757132000-09-24 20:38:18 +00009 """Interface for reading an XML document using callbacks.
10
11 XMLReader is the interface that an XML parser's SAX2 driver must
12 implement. This interface allows an application to set and query
13 features and properties in the parser, to register event handlers
14 for document processing, and to initiate a document parse.
15
16 All SAX interfaces are assumed to be synchronous: the parse
17 methods must not return until parsing is complete, and readers
18 must wait for an event-handler callback to return before reporting
19 the next event."""
Lars Gustäbel523b0a62000-09-24 18:54:49 +000020
Fred Drake45cd9de2000-06-29 19:34:54 +000021 def __init__(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +000022 self._cont_handler = handler.ContentHandler()
Lars Gustäbele292a242000-09-24 20:19:45 +000023 self._dtd_handler = handler.DTDHandler()
24 self._ent_handler = handler.EntityResolver()
Fred Drake07cbc4e2000-09-21 17:43:48 +000025 self._err_handler = handler.ErrorHandler()
Fred Drake45cd9de2000-06-29 19:34:54 +000026
27 def parse(self, source):
Skip Montanarof9059eb2000-07-06 03:01:40 +000028 "Parse an XML document from a system identifier or an InputSource."
Fred Drake45cd9de2000-06-29 19:34:54 +000029 raise NotImplementedError("This method must be implemented!")
30
31 def getContentHandler(self):
32 "Returns the current ContentHandler."
33 return self._cont_handler
34
35 def setContentHandler(self, handler):
36 "Registers a new object to receive document content events."
37 self._cont_handler = handler
38
39 def getDTDHandler(self):
40 "Returns the current DTD handler."
41 return self._dtd_handler
42
43 def setDTDHandler(self, handler):
Skip Montanarof9059eb2000-07-06 03:01:40 +000044 "Register an object to receive basic DTD-related events."
45 self._dtd_handler = handler
Fred Drake45cd9de2000-06-29 19:34:54 +000046
47 def getEntityResolver(self):
48 "Returns the current EntityResolver."
49 return self._ent_handler
50
51 def setEntityResolver(self, resolver):
Skip Montanarof9059eb2000-07-06 03:01:40 +000052 "Register an object to resolve external entities."
53 self._ent_handler = resolver
Fred Drake45cd9de2000-06-29 19:34:54 +000054
55 def getErrorHandler(self):
56 "Returns the current ErrorHandler."
57 return self._err_handler
58
59 def setErrorHandler(self, handler):
Skip Montanarof9059eb2000-07-06 03:01:40 +000060 "Register an object to receive error-message events."
61 self._err_handler = handler
Fred Drake45cd9de2000-06-29 19:34:54 +000062
63 def setLocale(self, locale):
64 """Allow an application to set the locale for errors and warnings.
65
Thomas Wouters7e474022000-07-16 12:04:32 +000066 SAX parsers are not required to provide localization for errors
Fred Drake45cd9de2000-06-29 19:34:54 +000067 and warnings; if they cannot support the requested locale,
68 however, they must throw a SAX exception. Applications may
69 request a locale change in the middle of a parse."""
70 raise SAXNotSupportedException("Locale support not implemented")
71
72 def getFeature(self, name):
73 "Looks up and returns the state of a SAX2 feature."
74 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
75
76 def setFeature(self, name, state):
77 "Sets the state of a SAX2 feature."
78 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
79
80 def getProperty(self, name):
81 "Looks up and returns the value of a SAX2 property."
82 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
83
84 def setProperty(self, name, value):
85 "Sets the value of a SAX2 property."
86 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
87
Fred Drake45cd9de2000-06-29 19:34:54 +000088class IncrementalParser(XMLReader):
89 """This interface adds three extra methods to the XMLReader
90 interface that allow XML parsers to support incremental
91 parsing. Support for this interface is optional, since not all
92 underlying XML parsers support this functionality.
93
94 When the parser is instantiated it is ready to begin accepting
95 data from the feed method immediately. After parsing has been
96 finished with a call to close the reset method must be called to
97 make the parser ready to accept new data, either from feed or
98 using the parse method.
99
100 Note that these methods must _not_ be called during parsing, that
101 is, after parse has been called and before it returns.
102
103 By default, the class also implements the parse method of the XMLReader
104 interface using the feed, close and reset methods of the
105 IncrementalParser interface as a convenience to SAX 2.0 driver
106 writers."""
Fred Drake07cbc4e2000-09-21 17:43:48 +0000107
108 def __init__(self, bufsize=2**16):
109 self._bufsize = bufsize
110 XMLReader.__init__(self)
111
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000112 def parse(self, source):
Martin v. Löwis491ded72000-09-29 18:59:50 +0000113 import saxutils
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000114 source = saxutils.prepare_input_source(source)
115
116 self.prepareParser(source)
117 file = source.getByteStream()
118 buffer = file.read(self._bufsize)
Fred Drake45cd9de2000-06-29 19:34:54 +0000119 while buffer != "":
120 self.feed(buffer)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000121 buffer = file.read(self._bufsize)
Fred Drake45cd9de2000-06-29 19:34:54 +0000122
123 def feed(self, data):
124 """This method gives the raw XML data in the data parameter to
125 the parser and makes it parse the data, emitting the
126 corresponding events. It is allowed for XML constructs to be
127 split across several calls to feed.
128
129 feed may raise SAXException."""
130 raise NotImplementedError("This method must be implemented!")
Fred Drake07cbc4e2000-09-21 17:43:48 +0000131
Fred Drake45cd9de2000-06-29 19:34:54 +0000132 def prepareParser(self, source):
133 """This method is called by the parse implementation to allow
134 the SAX 2.0 driver to prepare itself for parsing."""
135 raise NotImplementedError("prepareParser must be overridden!")
136
137 def close(self):
138 """This method is called when the entire XML document has been
139 passed to the parser through the feed method, to notify the
140 parser that there are no more data. This allows the parser to
141 do the final checks on the document and empty the internal
142 data buffer.
143
144 The parser will not be ready to parse another document until
145 the reset method has been called.
146
147 close may raise SAXException."""
148 raise NotImplementedError("This method must be implemented!")
149
150 def reset(self):
151 """This method is called after close has been called to reset
152 the parser so that it is ready to parse new documents. The
153 results of calling parse or feed after close without calling
154 reset are undefined."""
155 raise NotImplementedError("This method must be implemented!")
156
157# ===== LOCATOR =====
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000158
Fred Drake45cd9de2000-06-29 19:34:54 +0000159class Locator:
160 """Interface for associating a SAX event with a document
161 location. A locator object will return valid results only during
162 calls to DocumentHandler methods; at any other time, the
163 results are unpredictable."""
164
165 def getColumnNumber(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +0000166 "Return the column number where the current event ends."
167 return -1
Fred Drake45cd9de2000-06-29 19:34:54 +0000168
169 def getLineNumber(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +0000170 "Return the line number where the current event ends."
171 return -1
Fred Drake45cd9de2000-06-29 19:34:54 +0000172
173 def getPublicId(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +0000174 "Return the public identifier for the current event."
175 return None
Fred Drake45cd9de2000-06-29 19:34:54 +0000176
177 def getSystemId(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +0000178 "Return the system identifier for the current event."
179 return None
Fred Drake45cd9de2000-06-29 19:34:54 +0000180
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000181# ===== INPUTSOURCE =====
182
183class InputSource:
184 """Encapsulation of the information needed by the XMLReader to
185 read entities.
186
187 This class may include information about the public identifier,
188 system identifier, byte stream (possibly with character encoding
189 information) and/or the character stream of an entity.
190
191 Applications will create objects of this class for use in the
192 XMLReader.parse method and for returning from
193 EntityResolver.resolveEntity.
194
195 An InputSource belongs to the application, the XMLReader is not
196 allowed to modify InputSource objects passed to it from the
197 application, although it may make copies and modify those."""
198
199 def __init__(self, system_id = None):
200 self.__system_id = system_id
201 self.__public_id = None
202 self.__encoding = None
203 self.__bytefile = None
204 self.__charfile = None
205
206 def setPublicId(self, public_id):
207 "Sets the public identifier of this InputSource."
208 self.__public_id = public_id
209
210 def getPublicId(self):
211 "Returns the public identifier of this InputSource."
212 return self.__public_id
213
214 def setSystemId(self, system_id):
215 "Sets the system identifier of this InputSource."
216 self.__system_id = system_id
217
218 def getSystemId(self):
219 "Returns the system identifier of this InputSource."
220 return self.__system_id
221
222 def setEncoding(self, encoding):
223 """Sets the character encoding of this InputSource.
224
225 The encoding must be a string acceptable for an XML encoding
226 declaration (see section 4.3.3 of the XML recommendation).
227
228 The encoding attribute of the InputSource is ignored if the
229 InputSource also contains a character stream."""
230 self.__encoding = encoding
231
232 def getEncoding(self):
233 "Get the character encoding of this InputSource."
234 return self.__encoding
235
236 def setByteStream(self, bytefile):
237 """Set the byte stream (a Python file-like object which does
238 not perform byte-to-character conversion) for this input
239 source.
240
241 The SAX parser will ignore this if there is also a character
242 stream specified, but it will use a byte stream in preference
243 to opening a URI connection itself.
244
245 If the application knows the character encoding of the byte
246 stream, it should set it with the setEncoding method."""
247 self.__bytefile = bytefile
248
249 def getByteStream(self):
250 """Get the byte stream for this input source.
251
252 The getEncoding method will return the character encoding for
253 this byte stream, or None if unknown."""
254 return self.__bytefile
255
256 def setCharacterStream(self, charfile):
257 """Set the character stream for this input source. (The stream
258 must be a Python 1.6 Unicode-wrapped file-like that performs
259 conversion to Unicode strings.)
260
261 If there is a character stream specified, the SAX parser will
262 ignore any byte stream and will not attempt to open a URI
263 connection to the system identifier."""
264 self.__charfile = charfile
265
266 def getCharacterStream(self):
267 "Get the character stream for this input source."
268 return self.__charfile
269
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000270# ===== ATTRIBUTESIMPL =====
271
Fred Drake45cd9de2000-06-29 19:34:54 +0000272class AttributesImpl:
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000273
274 def __init__(self, attrs):
275 """Non-NS-aware implementation.
276
277 attrs should be of the form {name : value}."""
Fred Drake45cd9de2000-06-29 19:34:54 +0000278 self._attrs = attrs
Fred Drake45cd9de2000-06-29 19:34:54 +0000279
280 def getLength(self):
281 return len(self._attrs)
282
283 def getType(self, name):
284 return "CDATA"
285
286 def getValue(self, name):
287 return self._attrs[name]
288
289 def getValueByQName(self, name):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000290 return self._attrs[name]
Fred Drake45cd9de2000-06-29 19:34:54 +0000291
292 def getNameByQName(self, name):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000293 if not self._attrs.has_key(name):
294 raise KeyError
295 return name
Fred Drake45cd9de2000-06-29 19:34:54 +0000296
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000297 def getQNameByName(self, name):
298 if not self._attrs.has_key(name):
299 raise KeyError
300 return name
301
Fred Drake45cd9de2000-06-29 19:34:54 +0000302 def getNames(self):
303 return self._attrs.keys()
304
305 def getQNames(self):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000306 return self._attrs.keys()
Fred Drake45cd9de2000-06-29 19:34:54 +0000307
308 def __len__(self):
309 return len(self._attrs)
310
311 def __getitem__(self, name):
312 return self._attrs[name]
313
314 def keys(self):
315 return self._attrs.keys()
316
317 def has_key(self, name):
318 return self._attrs.has_key(name)
319
320 def get(self, name, alternative=None):
321 return self._attrs.get(name, alternative)
322
323 def copy(self):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000324 return self.__class__(self._attrs)
Fred Drake45cd9de2000-06-29 19:34:54 +0000325
326 def items(self):
327 return self._attrs.items()
328
329 def values(self):
330 return self._attrs.values()
331
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000332# ===== ATTRIBUTESNSIMPL =====
333
334class AttributesNSImpl(AttributesImpl):
335
336 def __init__(self, attrs, qnames):
337 """NS-aware implementation.
338
339 attrs should be of the form {(ns_uri, lname): value, ...}.
340 qnames of the form {(ns_uri, lname): qname, ...}."""
341 self._attrs = attrs
342 self._qnames = qnames
343
344 def getValueByQName(self, name):
345 for (nsname, qname) in self._qnames.items():
346 if qname == name:
347 return self._attrs[nsname]
348
349 raise KeyError
350
351 def getNameByQName(self, name):
352 for (nsname, qname) in self._qnames.items():
353 if qname == name:
354 return nsname
355
356 raise KeyError
357
358 def getQNameByName(self, name):
359 return self._qnames[name]
360
361 def getQNames(self):
362 return self._qnames.values()
363
364 def copy(self):
365 return self.__class__(self._attrs, self._qnames)
366
Fred Drake07cbc4e2000-09-21 17:43:48 +0000367
Fred Drake45cd9de2000-06-29 19:34:54 +0000368def _test():
369 XMLReader()
370 IncrementalParser()
371 Locator()
Fred Drake45cd9de2000-06-29 19:34:54 +0000372
Fred Drake07cbc4e2000-09-21 17:43:48 +0000373if __name__ == "__main__":
Fred Drake45cd9de2000-06-29 19:34:54 +0000374 _test()