blob: e5133f6a9105fb4ae4e7ad77dc5af143dfb3564e [file] [log] [blame]
Fred Drake45cd9de2000-06-29 19:34:54 +00001"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers
2should be based on this code. """
Fred Drake07cbc4e2000-09-21 17:43:48 +00003
4import handler
5
Fred Drake45cd9de2000-06-29 19:34:54 +00006# ===== XMLREADER =====
7
8class XMLReader:
Lars Gustäbel523b0a62000-09-24 18:54:49 +00009
Fred Drake45cd9de2000-06-29 19:34:54 +000010 def __init__(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +000011 self._cont_handler = handler.ContentHandler()
Fred Drake07cbc4e2000-09-21 17:43:48 +000012 #self._dtd_handler = handler.DTDHandler()
13 #self._ent_handler = handler.EntityResolver()
14 self._err_handler = handler.ErrorHandler()
Fred Drake45cd9de2000-06-29 19:34:54 +000015
16 def parse(self, source):
Skip Montanarof9059eb2000-07-06 03:01:40 +000017 "Parse an XML document from a system identifier or an InputSource."
Fred Drake45cd9de2000-06-29 19:34:54 +000018 raise NotImplementedError("This method must be implemented!")
19
20 def getContentHandler(self):
21 "Returns the current ContentHandler."
22 return self._cont_handler
23
24 def setContentHandler(self, handler):
25 "Registers a new object to receive document content events."
26 self._cont_handler = handler
27
28 def getDTDHandler(self):
29 "Returns the current DTD handler."
30 return self._dtd_handler
31
32 def setDTDHandler(self, handler):
Skip Montanarof9059eb2000-07-06 03:01:40 +000033 "Register an object to receive basic DTD-related events."
34 self._dtd_handler = handler
Fred Drake45cd9de2000-06-29 19:34:54 +000035
36 def getEntityResolver(self):
37 "Returns the current EntityResolver."
38 return self._ent_handler
39
40 def setEntityResolver(self, resolver):
Skip Montanarof9059eb2000-07-06 03:01:40 +000041 "Register an object to resolve external entities."
42 self._ent_handler = resolver
Fred Drake45cd9de2000-06-29 19:34:54 +000043
44 def getErrorHandler(self):
45 "Returns the current ErrorHandler."
46 return self._err_handler
47
48 def setErrorHandler(self, handler):
Skip Montanarof9059eb2000-07-06 03:01:40 +000049 "Register an object to receive error-message events."
50 self._err_handler = handler
Fred Drake45cd9de2000-06-29 19:34:54 +000051
52 def setLocale(self, locale):
53 """Allow an application to set the locale for errors and warnings.
54
Thomas Wouters7e474022000-07-16 12:04:32 +000055 SAX parsers are not required to provide localization for errors
Fred Drake45cd9de2000-06-29 19:34:54 +000056 and warnings; if they cannot support the requested locale,
57 however, they must throw a SAX exception. Applications may
58 request a locale change in the middle of a parse."""
59 raise SAXNotSupportedException("Locale support not implemented")
60
61 def getFeature(self, name):
62 "Looks up and returns the state of a SAX2 feature."
63 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
64
65 def setFeature(self, name, state):
66 "Sets the state of a SAX2 feature."
67 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
68
69 def getProperty(self, name):
70 "Looks up and returns the value of a SAX2 property."
71 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
72
73 def setProperty(self, name, value):
74 "Sets the value of a SAX2 property."
75 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
76
Lars Gustäbel523b0a62000-09-24 18:54:49 +000077import saxutils
78
Fred Drake45cd9de2000-06-29 19:34:54 +000079class IncrementalParser(XMLReader):
80 """This interface adds three extra methods to the XMLReader
81 interface that allow XML parsers to support incremental
82 parsing. Support for this interface is optional, since not all
83 underlying XML parsers support this functionality.
84
85 When the parser is instantiated it is ready to begin accepting
86 data from the feed method immediately. After parsing has been
87 finished with a call to close the reset method must be called to
88 make the parser ready to accept new data, either from feed or
89 using the parse method.
90
91 Note that these methods must _not_ be called during parsing, that
92 is, after parse has been called and before it returns.
93
94 By default, the class also implements the parse method of the XMLReader
95 interface using the feed, close and reset methods of the
96 IncrementalParser interface as a convenience to SAX 2.0 driver
97 writers."""
Fred Drake07cbc4e2000-09-21 17:43:48 +000098
99 def __init__(self, bufsize=2**16):
100 self._bufsize = bufsize
101 XMLReader.__init__(self)
102
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000103 def parse(self, source):
104 source = saxutils.prepare_input_source(source)
105
106 self.prepareParser(source)
107 file = source.getByteStream()
108 buffer = file.read(self._bufsize)
Fred Drake45cd9de2000-06-29 19:34:54 +0000109 while buffer != "":
110 self.feed(buffer)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000111 buffer = file.read(self._bufsize)
112
Fred Drake45cd9de2000-06-29 19:34:54 +0000113 self.reset()
114
115 def feed(self, data):
116 """This method gives the raw XML data in the data parameter to
117 the parser and makes it parse the data, emitting the
118 corresponding events. It is allowed for XML constructs to be
119 split across several calls to feed.
120
121 feed may raise SAXException."""
122 raise NotImplementedError("This method must be implemented!")
Fred Drake07cbc4e2000-09-21 17:43:48 +0000123
Fred Drake45cd9de2000-06-29 19:34:54 +0000124 def prepareParser(self, source):
125 """This method is called by the parse implementation to allow
126 the SAX 2.0 driver to prepare itself for parsing."""
127 raise NotImplementedError("prepareParser must be overridden!")
128
129 def close(self):
130 """This method is called when the entire XML document has been
131 passed to the parser through the feed method, to notify the
132 parser that there are no more data. This allows the parser to
133 do the final checks on the document and empty the internal
134 data buffer.
135
136 The parser will not be ready to parse another document until
137 the reset method has been called.
138
139 close may raise SAXException."""
140 raise NotImplementedError("This method must be implemented!")
141
142 def reset(self):
143 """This method is called after close has been called to reset
144 the parser so that it is ready to parse new documents. The
145 results of calling parse or feed after close without calling
146 reset are undefined."""
147 raise NotImplementedError("This method must be implemented!")
148
149# ===== LOCATOR =====
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000150
Fred Drake45cd9de2000-06-29 19:34:54 +0000151class Locator:
152 """Interface for associating a SAX event with a document
153 location. A locator object will return valid results only during
154 calls to DocumentHandler methods; at any other time, the
155 results are unpredictable."""
156
157 def getColumnNumber(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +0000158 "Return the column number where the current event ends."
159 return -1
Fred Drake45cd9de2000-06-29 19:34:54 +0000160
161 def getLineNumber(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +0000162 "Return the line number where the current event ends."
163 return -1
Fred Drake45cd9de2000-06-29 19:34:54 +0000164
165 def getPublicId(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +0000166 "Return the public identifier for the current event."
167 return None
Fred Drake45cd9de2000-06-29 19:34:54 +0000168
169 def getSystemId(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +0000170 "Return the system identifier for the current event."
171 return None
Fred Drake45cd9de2000-06-29 19:34:54 +0000172
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000173# ===== INPUTSOURCE =====
174
175class InputSource:
176 """Encapsulation of the information needed by the XMLReader to
177 read entities.
178
179 This class may include information about the public identifier,
180 system identifier, byte stream (possibly with character encoding
181 information) and/or the character stream of an entity.
182
183 Applications will create objects of this class for use in the
184 XMLReader.parse method and for returning from
185 EntityResolver.resolveEntity.
186
187 An InputSource belongs to the application, the XMLReader is not
188 allowed to modify InputSource objects passed to it from the
189 application, although it may make copies and modify those."""
190
191 def __init__(self, system_id = None):
192 self.__system_id = system_id
193 self.__public_id = None
194 self.__encoding = None
195 self.__bytefile = None
196 self.__charfile = None
197
198 def setPublicId(self, public_id):
199 "Sets the public identifier of this InputSource."
200 self.__public_id = public_id
201
202 def getPublicId(self):
203 "Returns the public identifier of this InputSource."
204 return self.__public_id
205
206 def setSystemId(self, system_id):
207 "Sets the system identifier of this InputSource."
208 self.__system_id = system_id
209
210 def getSystemId(self):
211 "Returns the system identifier of this InputSource."
212 return self.__system_id
213
214 def setEncoding(self, encoding):
215 """Sets the character encoding of this InputSource.
216
217 The encoding must be a string acceptable for an XML encoding
218 declaration (see section 4.3.3 of the XML recommendation).
219
220 The encoding attribute of the InputSource is ignored if the
221 InputSource also contains a character stream."""
222 self.__encoding = encoding
223
224 def getEncoding(self):
225 "Get the character encoding of this InputSource."
226 return self.__encoding
227
228 def setByteStream(self, bytefile):
229 """Set the byte stream (a Python file-like object which does
230 not perform byte-to-character conversion) for this input
231 source.
232
233 The SAX parser will ignore this if there is also a character
234 stream specified, but it will use a byte stream in preference
235 to opening a URI connection itself.
236
237 If the application knows the character encoding of the byte
238 stream, it should set it with the setEncoding method."""
239 self.__bytefile = bytefile
240
241 def getByteStream(self):
242 """Get the byte stream for this input source.
243
244 The getEncoding method will return the character encoding for
245 this byte stream, or None if unknown."""
246 return self.__bytefile
247
248 def setCharacterStream(self, charfile):
249 """Set the character stream for this input source. (The stream
250 must be a Python 1.6 Unicode-wrapped file-like that performs
251 conversion to Unicode strings.)
252
253 If there is a character stream specified, the SAX parser will
254 ignore any byte stream and will not attempt to open a URI
255 connection to the system identifier."""
256 self.__charfile = charfile
257
258 def getCharacterStream(self):
259 "Get the character stream for this input source."
260 return self.__charfile
261
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000262# ===== ATTRIBUTESIMPL =====
263
Fred Drake45cd9de2000-06-29 19:34:54 +0000264class AttributesImpl:
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000265
266 def __init__(self, attrs):
267 """Non-NS-aware implementation.
268
269 attrs should be of the form {name : value}."""
Fred Drake45cd9de2000-06-29 19:34:54 +0000270 self._attrs = attrs
Fred Drake45cd9de2000-06-29 19:34:54 +0000271
272 def getLength(self):
273 return len(self._attrs)
274
275 def getType(self, name):
276 return "CDATA"
277
278 def getValue(self, name):
279 return self._attrs[name]
280
281 def getValueByQName(self, name):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000282 return self._attrs[name]
Fred Drake45cd9de2000-06-29 19:34:54 +0000283
284 def getNameByQName(self, name):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000285 if not self._attrs.has_key(name):
286 raise KeyError
287 return name
Fred Drake45cd9de2000-06-29 19:34:54 +0000288
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000289 def getQNameByName(self, name):
290 if not self._attrs.has_key(name):
291 raise KeyError
292 return name
293
Fred Drake45cd9de2000-06-29 19:34:54 +0000294 def getNames(self):
295 return self._attrs.keys()
296
297 def getQNames(self):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000298 return self._attrs.keys()
Fred Drake45cd9de2000-06-29 19:34:54 +0000299
300 def __len__(self):
301 return len(self._attrs)
302
303 def __getitem__(self, name):
304 return self._attrs[name]
305
306 def keys(self):
307 return self._attrs.keys()
308
309 def has_key(self, name):
310 return self._attrs.has_key(name)
311
312 def get(self, name, alternative=None):
313 return self._attrs.get(name, alternative)
314
315 def copy(self):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000316 return self.__class__(self._attrs)
Fred Drake45cd9de2000-06-29 19:34:54 +0000317
318 def items(self):
319 return self._attrs.items()
320
321 def values(self):
322 return self._attrs.values()
323
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000324# ===== ATTRIBUTESNSIMPL =====
325
326class AttributesNSImpl(AttributesImpl):
327
328 def __init__(self, attrs, qnames):
329 """NS-aware implementation.
330
331 attrs should be of the form {(ns_uri, lname): value, ...}.
332 qnames of the form {(ns_uri, lname): qname, ...}."""
333 self._attrs = attrs
334 self._qnames = qnames
335
336 def getValueByQName(self, name):
337 for (nsname, qname) in self._qnames.items():
338 if qname == name:
339 return self._attrs[nsname]
340
341 raise KeyError
342
343 def getNameByQName(self, name):
344 for (nsname, qname) in self._qnames.items():
345 if qname == name:
346 return nsname
347
348 raise KeyError
349
350 def getQNameByName(self, name):
351 return self._qnames[name]
352
353 def getQNames(self):
354 return self._qnames.values()
355
356 def copy(self):
357 return self.__class__(self._attrs, self._qnames)
358
Fred Drake07cbc4e2000-09-21 17:43:48 +0000359
Fred Drake45cd9de2000-06-29 19:34:54 +0000360def _test():
361 XMLReader()
362 IncrementalParser()
363 Locator()
Fred Drake45cd9de2000-06-29 19:34:54 +0000364
Fred Drake07cbc4e2000-09-21 17:43:48 +0000365if __name__ == "__main__":
Fred Drake45cd9de2000-06-29 19:34:54 +0000366 _test()