blob: 8d9c5196aaea1981f425d3ccf0422b08e9d369d1 [file] [log] [blame]
Fred Drake014f0e32000-10-12 20:05:09 +00001\section{\module{xml.sax.handler} ---
2 Base classes for SAX handlers}
3
4\declaremodule{standard}{xml.sax.handler}
5\modulesynopsis{Base classes for SAX event handlers.}
6\sectionauthor{Martin v. L\"owis}{loewis@informatik.hu-berlin.de}
7\moduleauthor{Lars Marius Garshol}{larsga@garshol.priv.no}
8
9\versionadded{2.0}
10
11
12The SAX API defines four kinds of handlers: content handlers, DTD
13handlers, error handlers, and entity resolvers. Applications normally
14only need to implement those interfaces whose events they are
15interested in; they can implement the interfaces in a single object or
16in multiple objects. Handler implementations should inherit from the
17base classes provided in the module \module{xml.sax}, so that all
18methods get default implementations.
19
20\begin{classdesc}{ContentHandler}{}
21 This is the main callback interface in SAX, and the one most
22 important to applications. The order of events in this interface
23 mirrors the order of the information in the document.
24\end{classdesc}
25
26\begin{classdesc}{DTDHandler}{}
27 Handle DTD events.
28
29 This interface specifies only those DTD events required for basic
30 parsing (unparsed entities and attributes).
31\end{classdesc}
32
33\begin{classdesc}{EntityResolver}{}
34 Basic interface for resolving entities. If you create an object
35 implementing this interface, then register the object with your
36 Parser, the parser will call the method in your object to resolve all
37 external entities.
38\end{classdesc}
39
Fred Drake5b567652000-12-19 04:07:54 +000040\begin{classdesc}{ErrorHandler}{}
41 Interface used by the parser to present error and warning messages
42 to the application. The methods of this object control whether errors
43 are immediately converted to exceptions or are handled in some other
44 way.
45\end{classdesc}
46
Fred Drake014f0e32000-10-12 20:05:09 +000047In addition to these classes, \module{xml.sax.handler} provides
48symbolic constants for the feature and property names.
49
50\begin{datadesc}{feature_namespaces}
51 Value: \code{"http://xml.org/sax/features/namespaces"}\\
52 true: Perform Namespace processing (default).\\
53 false: Optionally do not perform Namespace processing
54 (implies namespace-prefixes).\\
55 access: (parsing) read-only; (not parsing) read/write\\
56\end{datadesc}
57
58\begin{datadesc}{feature_namespace_prefixes}
59 Value: \code{"http://xml.org/sax/features/namespace-prefixes"}\\
60 true: Report the original prefixed names and attributes used for Namespace
61 declarations.\\
62 false: Do not report attributes used for Namespace declarations, and
63 optionally do not report original prefixed names (default).\\
64 access: (parsing) read-only; (not parsing) read/write
65\end{datadesc}
66
67\begin{datadesc}{feature_string_interning}
68 Value: \code{"http://xml.org/sax/features/string-interning"}
69 true: All element names, prefixes, attribute names, Namespace URIs, and
70 local names are interned using the built-in intern function.\\
71 false: Names are not necessarily interned, although they may be (default).\\
72 access: (parsing) read-only; (not parsing) read/write
73\end{datadesc}
74
75\begin{datadesc}{feature_validation}
76 Value: \code{"http://xml.org/sax/features/validation"}\\
77 true: Report all validation errors (implies external-general-entities and
78 external-parameter-entities).\\
79 false: Do not report validation errors.\\
80 access: (parsing) read-only; (not parsing) read/write
81\end{datadesc}
82
83\begin{datadesc}{feature_external_ges}
84 Value: \code{"http://xml.org/sax/features/external-general-entities"}\\
85 true: Include all external general (text) entities.\\
86 false: Do not include external general entities.\\
87 access: (parsing) read-only; (not parsing) read/write
88\end{datadesc}
89
90\begin{datadesc}{feature_external_pes}
91 Value: \code{"http://xml.org/sax/features/external-parameter-entities"}\\
92 true: Include all external parameter entities, including the external
93 DTD subset.\\
94 false: Do not include any external parameter entities, even the external
95 DTD subset.\\
96 access: (parsing) read-only; (not parsing) read/write
97\end{datadesc}
98
99\begin{datadesc}{all_features}
100 List of all features.
101\end{datadesc}
102
103\begin{datadesc}{property_lexical_handler}
104 Value: \code{"http://xml.org/sax/properties/lexical-handler"}\\
105 data type: xml.sax.sax2lib.LexicalHandler (not supported in Python 2)\\
106 description: An optional extension handler for lexical events like comments.\\
107 access: read/write
108\end{datadesc}
109
110\begin{datadesc}{property_declaration_handler}
111 Value: \code{"http://xml.org/sax/properties/declaration-handler"}\\
112 data type: xml.sax.sax2lib.DeclHandler (not supported in Python 2)\\
113 description: An optional extension handler for DTD-related events other
114 than notations and unparsed entities.\\
115 access: read/write
116\end{datadesc}
117
118\begin{datadesc}{property_dom_node}
119 Value: \code{"http://xml.org/sax/properties/dom-node"}\\
120 data type: org.w3c.dom.Node (not supported in Python 2) \\
121 description: When parsing, the current DOM node being visited if this is
122 a DOM iterator; when not parsing, the root DOM node for
123 iteration.\\
124 access: (parsing) read-only; (not parsing) read/write
125\end{datadesc}
126
127\begin{datadesc}{property_xml_string}
128 Value: \code{"http://xml.org/sax/properties/xml-string"}\\
129 data type: String\\
130 description: The literal string of characters that was the source for
131 the current event.\\
132 access: read-only
133\end{datadesc}
134
135\begin{datadesc}{all_properties}
136 List of all known property names.
137\end{datadesc}
138
139
140\subsection{ContentHandler Objects \label{content-handler-objects}}
141
142Users are expected to subclass \class{ContentHandler} to support their
143application. The following methods are called by the parser on the
144appropriate events in the input document:
145
146\begin{methoddesc}[ContentHandler]{setDocumentLocator}{locator}
147 Called by the parser to give the application a locator for locating
148 the origin of document events.
149
150 SAX parsers are strongly encouraged (though not absolutely required)
151 to supply a locator: if it does so, it must supply the locator to
152 the application by invoking this method before invoking any of the
153 other methods in the DocumentHandler interface.
154
155 The locator allows the application to determine the end position of
156 any document-related event, even if the parser is not reporting an
157 error. Typically, the application will use this information for
158 reporting its own errors (such as character content that does not
159 match an application's business rules). The information returned by
160 the locator is probably not sufficient for use with a search engine.
161
162 Note that the locator will return correct information only during
163 the invocation of the events in this interface. The application
164 should not attempt to use it at any other time.
165\end{methoddesc}
166
167\begin{methoddesc}[ContentHandler]{startDocument}{}
168 Receive notification of the beginning of a document.
169
170 The SAX parser will invoke this method only once, before any other
171 methods in this interface or in DTDHandler (except for
172 \method{setDocumentLocator()}).
173\end{methoddesc}
174
175\begin{methoddesc}[ContentHandler]{endDocument}{}
176 Receive notification of the end of a document.
177
178 The SAX parser will invoke this method only once, and it will be the
179 last method invoked during the parse. The parser shall not invoke
180 this method until it has either abandoned parsing (because of an
181 unrecoverable error) or reached the end of input.
182\end{methoddesc}
183
184\begin{methoddesc}[ContentHandler]{startPrefixMapping}{prefix, uri}
185 Begin the scope of a prefix-URI Namespace mapping.
186
187 The information from this event is not necessary for normal
188 Namespace processing: the SAX XML reader will automatically replace
189 prefixes for element and attribute names when the
190 \code{http://xml.org/sax/features/namespaces} feature is true (the
191 default).
192
193%% XXX This is not really the default, is it? MvL
194
195 There are cases, however, when applications need to use prefixes in
196 character data or in attribute values, where they cannot safely be
197 expanded automatically; the start/endPrefixMapping event supplies
198 the information to the application to expand prefixes in those
199 contexts itself, if necessary.
200
201 Note that start/endPrefixMapping events are not guaranteed to be
202 properly nested relative to each-other: all
203 \method{startPrefixMapping()} events will occur before the
204 corresponding startElement event, and all \method{endPrefixMapping()}
205 events will occur after the corresponding \method{endElement()} event,
206 but their order is not guaranteed.
207\end{methoddesc}
208
209\begin{methoddesc}[ContentHandler]{endPrefixMapping}{prefix}
210 End the scope of a prefix-URI mapping.
211
212 See \method{startPrefixMapping()} for details. This event will always
213 occur after the corresponding endElement event, but the order of
214 endPrefixMapping events is not otherwise guaranteed.
215\end{methoddesc}
216
217\begin{methoddesc}[ContentHandler]{startElement}{name, attrs}
218 Signals the start of an element in non-namespace mode.
219
220 The \var{name} parameter contains the raw XML 1.0 name of the
221 element type as a string and the \var{attrs} parameter holds an
222 instance of the \class{Attributes} class containing the attributes
223 of the element.
224\end{methoddesc}
225
226\begin{methoddesc}[ContentHandler]{endElement}{name}
227 Signals the end of an element in non-namespace mode.
228
229 The \var{name} parameter contains the name of the element type, just
230 as with the startElement event.
231\end{methoddesc}
232
233\begin{methoddesc}[ContentHandler]{startElementNS}{name, qname, attrs}
234 Signals the start of an element in namespace mode.
235
236 The \var{name} parameter contains the name of the element type as a
237 (uri, localname) tuple, the \var{qname} parameter the raw XML 1.0
238 name used in the source document, and the \var{attrs} parameter
239 holds an instance of the \class{AttributesNS} class containing the
240 attributes of the element.
241
242 Parsers may set the \var{qname} parameter to \code{None}, unless the
243 \code{http://xml.org/sax/features/namespace-prefixes} feature is
244 activated.
245\end{methoddesc}
246
247\begin{methoddesc}[ContentHandler]{endElementNS}{name, qname}
248 Signals the end of an element in namespace mode.
249
250 The \var{name} parameter contains the name of the element type, just
251 as with the startElementNS event, likewise the \var{qname} parameter.
252\end{methoddesc}
253
254\begin{methoddesc}[ContentHandler]{characters}{content}
255 Receive notification of character data.
256
257 The Parser will call this method to report each chunk of character
258 data. SAX parsers may return all contiguous character data in a
259 single chunk, or they may split it into several chunks; however, all
260 of the characters in any single event must come from the same
261 external entity so that the Locator provides useful information.
262
263 \var{content} may be a Unicode string or a byte string; the
264 \code{expat} reader module produces always Unicode strings.
Fred Drakee119c8f2000-12-04 22:04:15 +0000265
266 \strong{Note:} The earlier SAX 1 interface provided by the Python
267 XML Special Interest Group used a more Java-like interface for this
Fred Drake21e4dd02000-12-04 22:29:17 +0000268 method. Since most parsers used from Python did not take advantage
Fred Drakee119c8f2000-12-04 22:04:15 +0000269 of the older interface, the simpler signature was chosen to replace
270 it. To convert old code to the new interface, use \var{content}
271 instead of slicing content with the old \var{offset} and
Fred Drake21e4dd02000-12-04 22:29:17 +0000272 \var{length} parameters.
Fred Drake014f0e32000-10-12 20:05:09 +0000273\end{methoddesc}
274
275\begin{methoddesc}[ContentHandler]{ignorableWhitespace}{}
276 Receive notification of ignorable whitespace in element content.
277
278 Validating Parsers must use this method to report each chunk
279 of ignorable whitespace (see the W3C XML 1.0 recommendation,
280 section 2.10): non-validating parsers may also use this method
281 if they are capable of parsing and using content models.
282
283 SAX parsers may return all contiguous whitespace in a single
284 chunk, or they may split it into several chunks; however, all
285 of the characters in any single event must come from the same
286 external entity, so that the Locator provides useful
287 information.
288\end{methoddesc}
289
290\begin{methoddesc}[ContentHandler]{processingInstruction}{target, data}
291 Receive notification of a processing instruction.
292
293 The Parser will invoke this method once for each processing
294 instruction found: note that processing instructions may occur
295 before or after the main document element.
296
297 A SAX parser should never report an XML declaration (XML 1.0,
298 section 2.8) or a text declaration (XML 1.0, section 4.3.1) using
299 this method.
300\end{methoddesc}
301
302\begin{methoddesc}[ContentHandler]{skippedEntity}{name}
303 Receive notification of a skipped entity.
304
305 The Parser will invoke this method once for each entity
306 skipped. Non-validating processors may skip entities if they have
307 not seen the declarations (because, for example, the entity was
308 declared in an external DTD subset). All processors may skip
309 external entities, depending on the values of the
310 \code{http://xml.org/sax/features/external-general-entities} and the
311 \code{http://xml.org/sax/features/external-parameter-entities}
312 properties.
313\end{methoddesc}
314
315
316\subsection{DTDHandler Objects \label{dtd-handler-objects}}
317
318\class{DTDHandler} instances provide the following methods:
319
320\begin{methoddesc}[DTDHandler]{notationDecl}{name, publicId, systemId}
321 Handle a notation declaration event.
322\end{methoddesc}
323
324\begin{methoddesc}[DTDHandler]{unparsedEntityDecl}{name, publicId,
325 systemId, ndata}
326 Handle an unparsed entity declaration event.
327\end{methoddesc}
328
329
330\subsection{EntityResolver Objects \label{entity-resolver-objects}}
331
332\begin{methoddesc}[EntityResolver]{resolveEntity}{publicId, systemId}
333 Resolve the system identifier of an entity and return either the
334 system identifier to read from as a string, or an InputSource to
335 read from. The default implementation returns \var{systemId}.
336\end{methoddesc}
Fred Drake5b567652000-12-19 04:07:54 +0000337
338
339\subsection{ErrorHandler Objects \label{sax-error-handler}}
340
341Objects with this interface are used to receive error and warning
342information from the \class{XMLReader}. If you create an object that
343implements this interface, then register the object with your
344\class{XMLReader}, the parser will call the methods in your object to
345report all warnings and errors. There are three levels of errors
346available: warnings, (possibly) recoverable errors, and unrecoverable
347errors. All methods take a \exception{SAXParseException} as the only
348parameter. Errors and warnings may be converted to an exception by
349raising the passed-in exception object.
350
351\begin{methoddesc}[ErrorHandler]{error}{exception}
352 Called when the parser encounters a recoverable error. If this method
353 does not raise an exception, parsing may continue, but further document
354 information should not be expected by the application. Allowing the
355 parser to continue may allow additional errors to be discovered in the
356 input document.
357\end{methoddesc}
358
359\begin{methoddesc}[ErrorHandler]{fatalError}{exception}
360 Called when the parser encounters an error it cannot recover from;
361 parsing is expected to terminate when this method returns.
362\end{methoddesc}
363
364\begin{methoddesc}[ErrorHandler]{warning}{exception}
365 Called when the parser presents minor warning information to the
366 application. Parsing is expected to continue when this method returns,
367 and document information will continue to be passed to the application.
368 Raising an exception in this method will cause parsing to end.
369\end{methoddesc}