blob: 6dae0b539475317d6e089f99e9e8ee881a290466 [file] [log] [blame]
Fred Drake45cd9de2000-06-29 19:34:54 +00001"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers
2should be based on this code. """
Fred Drake07cbc4e2000-09-21 17:43:48 +00003
4import handler
5
Fred Drake45cd9de2000-06-29 19:34:54 +00006# ===== XMLREADER =====
7
8class XMLReader:
9 def __init__(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +000010 self._cont_handler = handler.ContentHandler()
Fred Drake07cbc4e2000-09-21 17:43:48 +000011 #self._dtd_handler = handler.DTDHandler()
12 #self._ent_handler = handler.EntityResolver()
13 self._err_handler = handler.ErrorHandler()
Fred Drake45cd9de2000-06-29 19:34:54 +000014
15 def parse(self, source):
Skip Montanarof9059eb2000-07-06 03:01:40 +000016 "Parse an XML document from a system identifier or an InputSource."
Fred Drake45cd9de2000-06-29 19:34:54 +000017 raise NotImplementedError("This method must be implemented!")
18
19 def getContentHandler(self):
20 "Returns the current ContentHandler."
21 return self._cont_handler
22
23 def setContentHandler(self, handler):
24 "Registers a new object to receive document content events."
25 self._cont_handler = handler
26
27 def getDTDHandler(self):
28 "Returns the current DTD handler."
29 return self._dtd_handler
30
31 def setDTDHandler(self, handler):
Skip Montanarof9059eb2000-07-06 03:01:40 +000032 "Register an object to receive basic DTD-related events."
33 self._dtd_handler = handler
Fred Drake45cd9de2000-06-29 19:34:54 +000034
35 def getEntityResolver(self):
36 "Returns the current EntityResolver."
37 return self._ent_handler
38
39 def setEntityResolver(self, resolver):
Skip Montanarof9059eb2000-07-06 03:01:40 +000040 "Register an object to resolve external entities."
41 self._ent_handler = resolver
Fred Drake45cd9de2000-06-29 19:34:54 +000042
43 def getErrorHandler(self):
44 "Returns the current ErrorHandler."
45 return self._err_handler
46
47 def setErrorHandler(self, handler):
Skip Montanarof9059eb2000-07-06 03:01:40 +000048 "Register an object to receive error-message events."
49 self._err_handler = handler
Fred Drake45cd9de2000-06-29 19:34:54 +000050
51 def setLocale(self, locale):
52 """Allow an application to set the locale for errors and warnings.
53
Thomas Wouters7e474022000-07-16 12:04:32 +000054 SAX parsers are not required to provide localization for errors
Fred Drake45cd9de2000-06-29 19:34:54 +000055 and warnings; if they cannot support the requested locale,
56 however, they must throw a SAX exception. Applications may
57 request a locale change in the middle of a parse."""
58 raise SAXNotSupportedException("Locale support not implemented")
59
60 def getFeature(self, name):
61 "Looks up and returns the state of a SAX2 feature."
62 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
63
64 def setFeature(self, name, state):
65 "Sets the state of a SAX2 feature."
66 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
67
68 def getProperty(self, name):
69 "Looks up and returns the value of a SAX2 property."
70 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
71
72 def setProperty(self, name, value):
73 "Sets the value of a SAX2 property."
74 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
75
76
77class IncrementalParser(XMLReader):
78 """This interface adds three extra methods to the XMLReader
79 interface that allow XML parsers to support incremental
80 parsing. Support for this interface is optional, since not all
81 underlying XML parsers support this functionality.
82
83 When the parser is instantiated it is ready to begin accepting
84 data from the feed method immediately. After parsing has been
85 finished with a call to close the reset method must be called to
86 make the parser ready to accept new data, either from feed or
87 using the parse method.
88
89 Note that these methods must _not_ be called during parsing, that
90 is, after parse has been called and before it returns.
91
92 By default, the class also implements the parse method of the XMLReader
93 interface using the feed, close and reset methods of the
94 IncrementalParser interface as a convenience to SAX 2.0 driver
95 writers."""
Fred Drake07cbc4e2000-09-21 17:43:48 +000096
97 def __init__(self, bufsize=2**16):
98 self._bufsize = bufsize
99 XMLReader.__init__(self)
100
101 def _parseOpenFile(self, source):
102 buffer = source.read(self._bufsize)
Fred Drake45cd9de2000-06-29 19:34:54 +0000103 while buffer != "":
104 self.feed(buffer)
Fred Drake07cbc4e2000-09-21 17:43:48 +0000105 buffer = source.read(self._bufsize)
Fred Drake45cd9de2000-06-29 19:34:54 +0000106 self.close()
107 self.reset()
108
Fred Drake07cbc4e2000-09-21 17:43:48 +0000109 def parse(self, source):
110 if hasattr(source, "read"):
111 self._parseOpenFile(source)
112 else:
113 #FIXME: how to recognize if it is a URL instead of filename?
114 self.prepareParser(source)
115 file = open(source)
116 self._parseOpenFile(file)
117 file.close()
118
Fred Drake45cd9de2000-06-29 19:34:54 +0000119 def feed(self, data):
120 """This method gives the raw XML data in the data parameter to
121 the parser and makes it parse the data, emitting the
122 corresponding events. It is allowed for XML constructs to be
123 split across several calls to feed.
124
125 feed may raise SAXException."""
126 raise NotImplementedError("This method must be implemented!")
Fred Drake07cbc4e2000-09-21 17:43:48 +0000127
Fred Drake45cd9de2000-06-29 19:34:54 +0000128 def prepareParser(self, source):
129 """This method is called by the parse implementation to allow
130 the SAX 2.0 driver to prepare itself for parsing."""
131 raise NotImplementedError("prepareParser must be overridden!")
132
133 def close(self):
134 """This method is called when the entire XML document has been
135 passed to the parser through the feed method, to notify the
136 parser that there are no more data. This allows the parser to
137 do the final checks on the document and empty the internal
138 data buffer.
139
140 The parser will not be ready to parse another document until
141 the reset method has been called.
142
143 close may raise SAXException."""
144 raise NotImplementedError("This method must be implemented!")
145
146 def reset(self):
147 """This method is called after close has been called to reset
148 the parser so that it is ready to parse new documents. The
149 results of calling parse or feed after close without calling
150 reset are undefined."""
151 raise NotImplementedError("This method must be implemented!")
152
153# ===== LOCATOR =====
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000154
Fred Drake45cd9de2000-06-29 19:34:54 +0000155class Locator:
156 """Interface for associating a SAX event with a document
157 location. A locator object will return valid results only during
158 calls to DocumentHandler methods; at any other time, the
159 results are unpredictable."""
160
161 def getColumnNumber(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +0000162 "Return the column number where the current event ends."
163 return -1
Fred Drake45cd9de2000-06-29 19:34:54 +0000164
165 def getLineNumber(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +0000166 "Return the line number where the current event ends."
167 return -1
Fred Drake45cd9de2000-06-29 19:34:54 +0000168
169 def getPublicId(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +0000170 "Return the public identifier for the current event."
171 return None
Fred Drake45cd9de2000-06-29 19:34:54 +0000172
173 def getSystemId(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +0000174 "Return the system identifier for the current event."
175 return None
Fred Drake45cd9de2000-06-29 19:34:54 +0000176
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000177# ===== ATTRIBUTESIMPL =====
178
Fred Drake45cd9de2000-06-29 19:34:54 +0000179class AttributesImpl:
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000180
181 def __init__(self, attrs):
182 """Non-NS-aware implementation.
183
184 attrs should be of the form {name : value}."""
Fred Drake45cd9de2000-06-29 19:34:54 +0000185 self._attrs = attrs
Fred Drake45cd9de2000-06-29 19:34:54 +0000186
187 def getLength(self):
188 return len(self._attrs)
189
190 def getType(self, name):
191 return "CDATA"
192
193 def getValue(self, name):
194 return self._attrs[name]
195
196 def getValueByQName(self, name):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000197 return self._attrs[name]
Fred Drake45cd9de2000-06-29 19:34:54 +0000198
199 def getNameByQName(self, name):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000200 if not self._attrs.has_key(name):
201 raise KeyError
202 return name
Fred Drake45cd9de2000-06-29 19:34:54 +0000203
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000204 def getQNameByName(self, name):
205 if not self._attrs.has_key(name):
206 raise KeyError
207 return name
208
Fred Drake45cd9de2000-06-29 19:34:54 +0000209 def getNames(self):
210 return self._attrs.keys()
211
212 def getQNames(self):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000213 return self._attrs.keys()
Fred Drake45cd9de2000-06-29 19:34:54 +0000214
215 def __len__(self):
216 return len(self._attrs)
217
218 def __getitem__(self, name):
219 return self._attrs[name]
220
221 def keys(self):
222 return self._attrs.keys()
223
224 def has_key(self, name):
225 return self._attrs.has_key(name)
226
227 def get(self, name, alternative=None):
228 return self._attrs.get(name, alternative)
229
230 def copy(self):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000231 return self.__class__(self._attrs)
Fred Drake45cd9de2000-06-29 19:34:54 +0000232
233 def items(self):
234 return self._attrs.items()
235
236 def values(self):
237 return self._attrs.values()
238
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000239# ===== ATTRIBUTESNSIMPL =====
240
241class AttributesNSImpl(AttributesImpl):
242
243 def __init__(self, attrs, qnames):
244 """NS-aware implementation.
245
246 attrs should be of the form {(ns_uri, lname): value, ...}.
247 qnames of the form {(ns_uri, lname): qname, ...}."""
248 self._attrs = attrs
249 self._qnames = qnames
250
251 def getValueByQName(self, name):
252 for (nsname, qname) in self._qnames.items():
253 if qname == name:
254 return self._attrs[nsname]
255
256 raise KeyError
257
258 def getNameByQName(self, name):
259 for (nsname, qname) in self._qnames.items():
260 if qname == name:
261 return nsname
262
263 raise KeyError
264
265 def getQNameByName(self, name):
266 return self._qnames[name]
267
268 def getQNames(self):
269 return self._qnames.values()
270
271 def copy(self):
272 return self.__class__(self._attrs, self._qnames)
273
Fred Drake07cbc4e2000-09-21 17:43:48 +0000274
Fred Drake45cd9de2000-06-29 19:34:54 +0000275def _test():
276 XMLReader()
277 IncrementalParser()
278 Locator()
Fred Drake45cd9de2000-06-29 19:34:54 +0000279
Fred Drake07cbc4e2000-09-21 17:43:48 +0000280if __name__ == "__main__":
Fred Drake45cd9de2000-06-29 19:34:54 +0000281 _test()