blob: 3bcf2b94b652ba68bd6d461e757340605b86a128 [file] [log] [blame]
Fred Drake45cd9de2000-06-29 19:34:54 +00001"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers
2should be based on this code. """
Fred Drake07cbc4e2000-09-21 17:43:48 +00003
4import handler
5
Fred Drake45cd9de2000-06-29 19:34:54 +00006# ===== XMLREADER =====
7
8class XMLReader:
9 def __init__(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +000010 self._cont_handler = handler.ContentHandler()
Fred Drake07cbc4e2000-09-21 17:43:48 +000011 #self._dtd_handler = handler.DTDHandler()
12 #self._ent_handler = handler.EntityResolver()
13 self._err_handler = handler.ErrorHandler()
Fred Drake45cd9de2000-06-29 19:34:54 +000014
15 def parse(self, source):
Skip Montanarof9059eb2000-07-06 03:01:40 +000016 "Parse an XML document from a system identifier or an InputSource."
Fred Drake45cd9de2000-06-29 19:34:54 +000017 raise NotImplementedError("This method must be implemented!")
18
19 def getContentHandler(self):
20 "Returns the current ContentHandler."
21 return self._cont_handler
22
23 def setContentHandler(self, handler):
24 "Registers a new object to receive document content events."
25 self._cont_handler = handler
26
27 def getDTDHandler(self):
28 "Returns the current DTD handler."
29 return self._dtd_handler
30
31 def setDTDHandler(self, handler):
Skip Montanarof9059eb2000-07-06 03:01:40 +000032 "Register an object to receive basic DTD-related events."
33 self._dtd_handler = handler
Fred Drake45cd9de2000-06-29 19:34:54 +000034
35 def getEntityResolver(self):
36 "Returns the current EntityResolver."
37 return self._ent_handler
38
39 def setEntityResolver(self, resolver):
Skip Montanarof9059eb2000-07-06 03:01:40 +000040 "Register an object to resolve external entities."
41 self._ent_handler = resolver
Fred Drake45cd9de2000-06-29 19:34:54 +000042
43 def getErrorHandler(self):
44 "Returns the current ErrorHandler."
45 return self._err_handler
46
47 def setErrorHandler(self, handler):
Skip Montanarof9059eb2000-07-06 03:01:40 +000048 "Register an object to receive error-message events."
49 self._err_handler = handler
Fred Drake45cd9de2000-06-29 19:34:54 +000050
51 def setLocale(self, locale):
52 """Allow an application to set the locale for errors and warnings.
53
Thomas Wouters7e474022000-07-16 12:04:32 +000054 SAX parsers are not required to provide localization for errors
Fred Drake45cd9de2000-06-29 19:34:54 +000055 and warnings; if they cannot support the requested locale,
56 however, they must throw a SAX exception. Applications may
57 request a locale change in the middle of a parse."""
58 raise SAXNotSupportedException("Locale support not implemented")
59
60 def getFeature(self, name):
61 "Looks up and returns the state of a SAX2 feature."
62 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
63
64 def setFeature(self, name, state):
65 "Sets the state of a SAX2 feature."
66 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
67
68 def getProperty(self, name):
69 "Looks up and returns the value of a SAX2 property."
70 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
71
72 def setProperty(self, name, value):
73 "Sets the value of a SAX2 property."
74 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
75
76
77class IncrementalParser(XMLReader):
78 """This interface adds three extra methods to the XMLReader
79 interface that allow XML parsers to support incremental
80 parsing. Support for this interface is optional, since not all
81 underlying XML parsers support this functionality.
82
83 When the parser is instantiated it is ready to begin accepting
84 data from the feed method immediately. After parsing has been
85 finished with a call to close the reset method must be called to
86 make the parser ready to accept new data, either from feed or
87 using the parse method.
88
89 Note that these methods must _not_ be called during parsing, that
90 is, after parse has been called and before it returns.
91
92 By default, the class also implements the parse method of the XMLReader
93 interface using the feed, close and reset methods of the
94 IncrementalParser interface as a convenience to SAX 2.0 driver
95 writers."""
Fred Drake07cbc4e2000-09-21 17:43:48 +000096
97 def __init__(self, bufsize=2**16):
98 self._bufsize = bufsize
99 XMLReader.__init__(self)
100
101 def _parseOpenFile(self, source):
102 buffer = source.read(self._bufsize)
Fred Drake45cd9de2000-06-29 19:34:54 +0000103 while buffer != "":
104 self.feed(buffer)
Fred Drake07cbc4e2000-09-21 17:43:48 +0000105 buffer = source.read(self._bufsize)
Fred Drake45cd9de2000-06-29 19:34:54 +0000106 self.close()
107 self.reset()
108
Fred Drake07cbc4e2000-09-21 17:43:48 +0000109 def parse(self, source):
110 if hasattr(source, "read"):
111 self._parseOpenFile(source)
112 else:
113 #FIXME: how to recognize if it is a URL instead of filename?
114 self.prepareParser(source)
115 file = open(source)
116 self._parseOpenFile(file)
117 file.close()
118
Fred Drake45cd9de2000-06-29 19:34:54 +0000119 def feed(self, data):
120 """This method gives the raw XML data in the data parameter to
121 the parser and makes it parse the data, emitting the
122 corresponding events. It is allowed for XML constructs to be
123 split across several calls to feed.
124
125 feed may raise SAXException."""
126 raise NotImplementedError("This method must be implemented!")
Fred Drake07cbc4e2000-09-21 17:43:48 +0000127
Fred Drake45cd9de2000-06-29 19:34:54 +0000128 def prepareParser(self, source):
129 """This method is called by the parse implementation to allow
130 the SAX 2.0 driver to prepare itself for parsing."""
131 raise NotImplementedError("prepareParser must be overridden!")
132
133 def close(self):
134 """This method is called when the entire XML document has been
135 passed to the parser through the feed method, to notify the
136 parser that there are no more data. This allows the parser to
137 do the final checks on the document and empty the internal
138 data buffer.
139
140 The parser will not be ready to parse another document until
141 the reset method has been called.
142
143 close may raise SAXException."""
144 raise NotImplementedError("This method must be implemented!")
145
146 def reset(self):
147 """This method is called after close has been called to reset
148 the parser so that it is ready to parse new documents. The
149 results of calling parse or feed after close without calling
150 reset are undefined."""
151 raise NotImplementedError("This method must be implemented!")
152
153# ===== LOCATOR =====
154class Locator:
155 """Interface for associating a SAX event with a document
156 location. A locator object will return valid results only during
157 calls to DocumentHandler methods; at any other time, the
158 results are unpredictable."""
159
160 def getColumnNumber(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +0000161 "Return the column number where the current event ends."
162 return -1
Fred Drake45cd9de2000-06-29 19:34:54 +0000163
164 def getLineNumber(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +0000165 "Return the line number where the current event ends."
166 return -1
Fred Drake45cd9de2000-06-29 19:34:54 +0000167
168 def getPublicId(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +0000169 "Return the public identifier for the current event."
170 return None
Fred Drake45cd9de2000-06-29 19:34:54 +0000171
172 def getSystemId(self):
Skip Montanarof9059eb2000-07-06 03:01:40 +0000173 "Return the system identifier for the current event."
174 return None
Fred Drake45cd9de2000-06-29 19:34:54 +0000175
176# --- AttributesImpl
177class AttributesImpl:
178 def __init__(self, attrs, rawnames):
179 self._attrs = attrs
180 self._rawnames = rawnames
181
182 def getLength(self):
183 return len(self._attrs)
184
185 def getType(self, name):
186 return "CDATA"
187
188 def getValue(self, name):
189 return self._attrs[name]
190
191 def getValueByQName(self, name):
192 return self._attrs[self._rawnames[name]]
193
194 def getNameByQName(self, name):
195 return self._rawnames[name]
196
197 def getNames(self):
198 return self._attrs.keys()
199
200 def getQNames(self):
201 return self._rawnames.keys()
202
203 def __len__(self):
204 return len(self._attrs)
205
206 def __getitem__(self, name):
207 return self._attrs[name]
208
209 def keys(self):
210 return self._attrs.keys()
211
212 def has_key(self, name):
213 return self._attrs.has_key(name)
214
215 def get(self, name, alternative=None):
216 return self._attrs.get(name, alternative)
217
218 def copy(self):
219 return self.__class__(self._attrs, self._rawnames)
220
221 def items(self):
222 return self._attrs.items()
223
224 def values(self):
225 return self._attrs.values()
226
Fred Drake07cbc4e2000-09-21 17:43:48 +0000227
Fred Drake45cd9de2000-06-29 19:34:54 +0000228def _test():
229 XMLReader()
230 IncrementalParser()
231 Locator()
232 AttributesImpl()
233
Fred Drake07cbc4e2000-09-21 17:43:48 +0000234if __name__ == "__main__":
Fred Drake45cd9de2000-06-29 19:34:54 +0000235 _test()