blob: 0b922efae162e80be516015a0bcb47a87a755f9f [file] [log] [blame]
Fred Drake014f0e32000-10-12 20:05:09 +00001\section{\module{xml.sax.handler} ---
2 Base classes for SAX handlers}
3
4\declaremodule{standard}{xml.sax.handler}
5\modulesynopsis{Base classes for SAX event handlers.}
Martin v. Löwis338bcbc2003-04-18 22:04:34 +00006\sectionauthor{Martin v. L\"owis}{martin@v.loewis.de}
Fred Drake014f0e32000-10-12 20:05:09 +00007\moduleauthor{Lars Marius Garshol}{larsga@garshol.priv.no}
8
9\versionadded{2.0}
10
11
12The SAX API defines four kinds of handlers: content handlers, DTD
13handlers, error handlers, and entity resolvers. Applications normally
14only need to implement those interfaces whose events they are
15interested in; they can implement the interfaces in a single object or
16in multiple objects. Handler implementations should inherit from the
17base classes provided in the module \module{xml.sax}, so that all
18methods get default implementations.
19
Fred Drake10b81ce2001-11-06 22:13:19 +000020\begin{classdesc*}{ContentHandler}
Fred Drake014f0e32000-10-12 20:05:09 +000021 This is the main callback interface in SAX, and the one most
22 important to applications. The order of events in this interface
23 mirrors the order of the information in the document.
Fred Drake10b81ce2001-11-06 22:13:19 +000024\end{classdesc*}
Fred Drake014f0e32000-10-12 20:05:09 +000025
Fred Drake10b81ce2001-11-06 22:13:19 +000026\begin{classdesc*}{DTDHandler}
Fred Drake014f0e32000-10-12 20:05:09 +000027 Handle DTD events.
28
29 This interface specifies only those DTD events required for basic
30 parsing (unparsed entities and attributes).
Fred Drake10b81ce2001-11-06 22:13:19 +000031\end{classdesc*}
Fred Drake014f0e32000-10-12 20:05:09 +000032
Fred Drake10b81ce2001-11-06 22:13:19 +000033\begin{classdesc*}{EntityResolver}
Fred Drake014f0e32000-10-12 20:05:09 +000034 Basic interface for resolving entities. If you create an object
35 implementing this interface, then register the object with your
36 Parser, the parser will call the method in your object to resolve all
37 external entities.
Fred Drake10b81ce2001-11-06 22:13:19 +000038\end{classdesc*}
Fred Drake014f0e32000-10-12 20:05:09 +000039
Fred Drake10b81ce2001-11-06 22:13:19 +000040\begin{classdesc*}{ErrorHandler}
Fred Drake5b567652000-12-19 04:07:54 +000041 Interface used by the parser to present error and warning messages
42 to the application. The methods of this object control whether errors
43 are immediately converted to exceptions or are handled in some other
44 way.
Fred Drake10b81ce2001-11-06 22:13:19 +000045\end{classdesc*}
Fred Drake5b567652000-12-19 04:07:54 +000046
Fred Drake014f0e32000-10-12 20:05:09 +000047In addition to these classes, \module{xml.sax.handler} provides
48symbolic constants for the feature and property names.
49
50\begin{datadesc}{feature_namespaces}
51 Value: \code{"http://xml.org/sax/features/namespaces"}\\
Fred Drake7064d3b2001-11-19 04:34:50 +000052 true: Perform Namespace processing.\\
Fred Drake014f0e32000-10-12 20:05:09 +000053 false: Optionally do not perform Namespace processing
Fred Drake7064d3b2001-11-19 04:34:50 +000054 (implies namespace-prefixes; default).\\
Fred Drake10b81ce2001-11-06 22:13:19 +000055 access: (parsing) read-only; (not parsing) read/write
Fred Drake014f0e32000-10-12 20:05:09 +000056\end{datadesc}
57
58\begin{datadesc}{feature_namespace_prefixes}
59 Value: \code{"http://xml.org/sax/features/namespace-prefixes"}\\
60 true: Report the original prefixed names and attributes used for Namespace
61 declarations.\\
62 false: Do not report attributes used for Namespace declarations, and
63 optionally do not report original prefixed names (default).\\
64 access: (parsing) read-only; (not parsing) read/write
65\end{datadesc}
66
67\begin{datadesc}{feature_string_interning}
Fred Drake3ed23852004-10-25 21:35:17 +000068 Value: \code{"http://xml.org/sax/features/string-interning"}\\
Fred Drake014f0e32000-10-12 20:05:09 +000069 true: All element names, prefixes, attribute names, Namespace URIs, and
70 local names are interned using the built-in intern function.\\
71 false: Names are not necessarily interned, although they may be (default).\\
72 access: (parsing) read-only; (not parsing) read/write
73\end{datadesc}
74
75\begin{datadesc}{feature_validation}
76 Value: \code{"http://xml.org/sax/features/validation"}\\
77 true: Report all validation errors (implies external-general-entities and
78 external-parameter-entities).\\
79 false: Do not report validation errors.\\
80 access: (parsing) read-only; (not parsing) read/write
81\end{datadesc}
82
83\begin{datadesc}{feature_external_ges}
84 Value: \code{"http://xml.org/sax/features/external-general-entities"}\\
85 true: Include all external general (text) entities.\\
86 false: Do not include external general entities.\\
87 access: (parsing) read-only; (not parsing) read/write
88\end{datadesc}
89
90\begin{datadesc}{feature_external_pes}
91 Value: \code{"http://xml.org/sax/features/external-parameter-entities"}\\
92 true: Include all external parameter entities, including the external
93 DTD subset.\\
94 false: Do not include any external parameter entities, even the external
95 DTD subset.\\
96 access: (parsing) read-only; (not parsing) read/write
97\end{datadesc}
98
99\begin{datadesc}{all_features}
100 List of all features.
101\end{datadesc}
102
103\begin{datadesc}{property_lexical_handler}
104 Value: \code{"http://xml.org/sax/properties/lexical-handler"}\\
105 data type: xml.sax.sax2lib.LexicalHandler (not supported in Python 2)\\
106 description: An optional extension handler for lexical events like comments.\\
107 access: read/write
108\end{datadesc}
109
110\begin{datadesc}{property_declaration_handler}
111 Value: \code{"http://xml.org/sax/properties/declaration-handler"}\\
112 data type: xml.sax.sax2lib.DeclHandler (not supported in Python 2)\\
113 description: An optional extension handler for DTD-related events other
114 than notations and unparsed entities.\\
115 access: read/write
116\end{datadesc}
117
118\begin{datadesc}{property_dom_node}
119 Value: \code{"http://xml.org/sax/properties/dom-node"}\\
120 data type: org.w3c.dom.Node (not supported in Python 2) \\
121 description: When parsing, the current DOM node being visited if this is
122 a DOM iterator; when not parsing, the root DOM node for
123 iteration.\\
124 access: (parsing) read-only; (not parsing) read/write
125\end{datadesc}
126
127\begin{datadesc}{property_xml_string}
128 Value: \code{"http://xml.org/sax/properties/xml-string"}\\
129 data type: String\\
130 description: The literal string of characters that was the source for
131 the current event.\\
132 access: read-only
133\end{datadesc}
134
135\begin{datadesc}{all_properties}
136 List of all known property names.
137\end{datadesc}
138
139
140\subsection{ContentHandler Objects \label{content-handler-objects}}
141
142Users are expected to subclass \class{ContentHandler} to support their
143application. The following methods are called by the parser on the
144appropriate events in the input document:
145
146\begin{methoddesc}[ContentHandler]{setDocumentLocator}{locator}
147 Called by the parser to give the application a locator for locating
148 the origin of document events.
149
150 SAX parsers are strongly encouraged (though not absolutely required)
151 to supply a locator: if it does so, it must supply the locator to
152 the application by invoking this method before invoking any of the
153 other methods in the DocumentHandler interface.
154
155 The locator allows the application to determine the end position of
156 any document-related event, even if the parser is not reporting an
157 error. Typically, the application will use this information for
158 reporting its own errors (such as character content that does not
159 match an application's business rules). The information returned by
160 the locator is probably not sufficient for use with a search engine.
161
162 Note that the locator will return correct information only during
163 the invocation of the events in this interface. The application
164 should not attempt to use it at any other time.
165\end{methoddesc}
166
167\begin{methoddesc}[ContentHandler]{startDocument}{}
168 Receive notification of the beginning of a document.
169
170 The SAX parser will invoke this method only once, before any other
171 methods in this interface or in DTDHandler (except for
172 \method{setDocumentLocator()}).
173\end{methoddesc}
174
175\begin{methoddesc}[ContentHandler]{endDocument}{}
176 Receive notification of the end of a document.
177
178 The SAX parser will invoke this method only once, and it will be the
179 last method invoked during the parse. The parser shall not invoke
180 this method until it has either abandoned parsing (because of an
181 unrecoverable error) or reached the end of input.
182\end{methoddesc}
183
184\begin{methoddesc}[ContentHandler]{startPrefixMapping}{prefix, uri}
185 Begin the scope of a prefix-URI Namespace mapping.
186
187 The information from this event is not necessary for normal
188 Namespace processing: the SAX XML reader will automatically replace
189 prefixes for element and attribute names when the
Fred Drake10b81ce2001-11-06 22:13:19 +0000190 \code{feature_namespaces} feature is enabled (the default).
Fred Drake014f0e32000-10-12 20:05:09 +0000191
192%% XXX This is not really the default, is it? MvL
193
194 There are cases, however, when applications need to use prefixes in
195 character data or in attribute values, where they cannot safely be
Fred Drakec5e27922002-06-25 17:10:50 +0000196 expanded automatically; the \method{startPrefixMapping()} and
197 \method{endPrefixMapping()} events supply the information to the
198 application to expand prefixes in those contexts itself, if
199 necessary.
Fred Drake014f0e32000-10-12 20:05:09 +0000200
Fred Drakec5e27922002-06-25 17:10:50 +0000201 Note that \method{startPrefixMapping()} and
202 \method{endPrefixMapping()} events are not guaranteed to be properly
203 nested relative to each-other: all \method{startPrefixMapping()}
204 events will occur before the corresponding \method{startElement()}
205 event, and all \method{endPrefixMapping()} events will occur after
206 the corresponding \method{endElement()} event, but their order is
207 not guaranteed.
Fred Drake014f0e32000-10-12 20:05:09 +0000208\end{methoddesc}
209
210\begin{methoddesc}[ContentHandler]{endPrefixMapping}{prefix}
211 End the scope of a prefix-URI mapping.
Fred Drakec5e27922002-06-25 17:10:50 +0000212
213 See \method{startPrefixMapping()} for details. This event will
214 always occur after the corresponding \method{endElement()} event,
215 but the order of \method{endPrefixMapping()} events is not otherwise
216 guaranteed.
Fred Drake014f0e32000-10-12 20:05:09 +0000217\end{methoddesc}
218
219\begin{methoddesc}[ContentHandler]{startElement}{name, attrs}
220 Signals the start of an element in non-namespace mode.
221
222 The \var{name} parameter contains the raw XML 1.0 name of the
223 element type as a string and the \var{attrs} parameter holds an
Fred Drakec5e27922002-06-25 17:10:50 +0000224 object of the \ulink{\class{Attributes}
225 interface}{attributes-objects.html} containing the attributes of the
226 element. The object passed as \var{attrs} may be re-used by the
227 parser; holding on to a reference to it is not a reliable way to
Fred Drakeebbd14d2001-11-18 04:58:28 +0000228 keep a copy of the attributes. To keep a copy of the attributes,
229 use the \method{copy()} method of the \var{attrs} object.
Fred Drake014f0e32000-10-12 20:05:09 +0000230\end{methoddesc}
231
232\begin{methoddesc}[ContentHandler]{endElement}{name}
233 Signals the end of an element in non-namespace mode.
234
235 The \var{name} parameter contains the name of the element type, just
Fred Drakec5e27922002-06-25 17:10:50 +0000236 as with the \method{startElement()} event.
Fred Drake014f0e32000-10-12 20:05:09 +0000237\end{methoddesc}
238
239\begin{methoddesc}[ContentHandler]{startElementNS}{name, qname, attrs}
240 Signals the start of an element in namespace mode.
241
242 The \var{name} parameter contains the name of the element type as a
Fred Drakeebbd14d2001-11-18 04:58:28 +0000243 \code{(\var{uri}, \var{localname})} tuple, the \var{qname} parameter
244 contains the raw XML 1.0 name used in the source document, and the
Fred Drakec5e27922002-06-25 17:10:50 +0000245 \var{attrs} parameter holds an instance of the
246 \ulink{\class{AttributesNS} interface}{attributes-ns-objects.html}
247 containing the attributes of the element. If no namespace is
Fred Drakeebbd14d2001-11-18 04:58:28 +0000248 associated with the element, the \var{uri} component of \var{name}
249 will be \code{None}. The object passed as \var{attrs} may be
250 re-used by the parser; holding on to a reference to it is not a
251 reliable way to keep a copy of the attributes. To keep a copy of
252 the attributes, use the \method{copy()} method of the \var{attrs}
253 object.
Fred Drake014f0e32000-10-12 20:05:09 +0000254
255 Parsers may set the \var{qname} parameter to \code{None}, unless the
Fred Drake10b81ce2001-11-06 22:13:19 +0000256 \code{feature_namespace_prefixes} feature is activated.
Fred Drake014f0e32000-10-12 20:05:09 +0000257\end{methoddesc}
258
259\begin{methoddesc}[ContentHandler]{endElementNS}{name, qname}
260 Signals the end of an element in namespace mode.
261
262 The \var{name} parameter contains the name of the element type, just
Fred Drake10b81ce2001-11-06 22:13:19 +0000263 as with the \method{startElementNS()} method, likewise the
264 \var{qname} parameter.
Fred Drake014f0e32000-10-12 20:05:09 +0000265\end{methoddesc}
266
267\begin{methoddesc}[ContentHandler]{characters}{content}
268 Receive notification of character data.
269
270 The Parser will call this method to report each chunk of character
271 data. SAX parsers may return all contiguous character data in a
272 single chunk, or they may split it into several chunks; however, all
273 of the characters in any single event must come from the same
274 external entity so that the Locator provides useful information.
275
276 \var{content} may be a Unicode string or a byte string; the
277 \code{expat} reader module produces always Unicode strings.
Fred Drakee119c8f2000-12-04 22:04:15 +0000278
Fred Drake0aa811c2001-10-20 04:24:09 +0000279 \note{The earlier SAX 1 interface provided by the Python
Fred Drakee119c8f2000-12-04 22:04:15 +0000280 XML Special Interest Group used a more Java-like interface for this
Fred Drake21e4dd02000-12-04 22:29:17 +0000281 method. Since most parsers used from Python did not take advantage
Fred Drakee119c8f2000-12-04 22:04:15 +0000282 of the older interface, the simpler signature was chosen to replace
283 it. To convert old code to the new interface, use \var{content}
284 instead of slicing content with the old \var{offset} and
Fred Drake0aa811c2001-10-20 04:24:09 +0000285 \var{length} parameters.}
Fred Drake014f0e32000-10-12 20:05:09 +0000286\end{methoddesc}
287
Fred Drakee143bbb2004-05-06 03:47:48 +0000288\begin{methoddesc}[ContentHandler]{ignorableWhitespace}{whitespace}
Fred Drake014f0e32000-10-12 20:05:09 +0000289 Receive notification of ignorable whitespace in element content.
290
291 Validating Parsers must use this method to report each chunk
292 of ignorable whitespace (see the W3C XML 1.0 recommendation,
293 section 2.10): non-validating parsers may also use this method
294 if they are capable of parsing and using content models.
295
296 SAX parsers may return all contiguous whitespace in a single
297 chunk, or they may split it into several chunks; however, all
298 of the characters in any single event must come from the same
299 external entity, so that the Locator provides useful
300 information.
301\end{methoddesc}
302
303\begin{methoddesc}[ContentHandler]{processingInstruction}{target, data}
304 Receive notification of a processing instruction.
305
306 The Parser will invoke this method once for each processing
307 instruction found: note that processing instructions may occur
308 before or after the main document element.
309
310 A SAX parser should never report an XML declaration (XML 1.0,
311 section 2.8) or a text declaration (XML 1.0, section 4.3.1) using
312 this method.
313\end{methoddesc}
314
315\begin{methoddesc}[ContentHandler]{skippedEntity}{name}
316 Receive notification of a skipped entity.
317
318 The Parser will invoke this method once for each entity
319 skipped. Non-validating processors may skip entities if they have
320 not seen the declarations (because, for example, the entity was
321 declared in an external DTD subset). All processors may skip
322 external entities, depending on the values of the
Fred Drake10b81ce2001-11-06 22:13:19 +0000323 \code{feature_external_ges} and the
324 \code{feature_external_pes} properties.
Fred Drake014f0e32000-10-12 20:05:09 +0000325\end{methoddesc}
326
327
328\subsection{DTDHandler Objects \label{dtd-handler-objects}}
329
330\class{DTDHandler} instances provide the following methods:
331
332\begin{methoddesc}[DTDHandler]{notationDecl}{name, publicId, systemId}
333 Handle a notation declaration event.
334\end{methoddesc}
335
336\begin{methoddesc}[DTDHandler]{unparsedEntityDecl}{name, publicId,
337 systemId, ndata}
338 Handle an unparsed entity declaration event.
339\end{methoddesc}
340
341
342\subsection{EntityResolver Objects \label{entity-resolver-objects}}
343
344\begin{methoddesc}[EntityResolver]{resolveEntity}{publicId, systemId}
345 Resolve the system identifier of an entity and return either the
346 system identifier to read from as a string, or an InputSource to
347 read from. The default implementation returns \var{systemId}.
348\end{methoddesc}
Fred Drake5b567652000-12-19 04:07:54 +0000349
350
351\subsection{ErrorHandler Objects \label{sax-error-handler}}
352
353Objects with this interface are used to receive error and warning
354information from the \class{XMLReader}. If you create an object that
355implements this interface, then register the object with your
356\class{XMLReader}, the parser will call the methods in your object to
357report all warnings and errors. There are three levels of errors
358available: warnings, (possibly) recoverable errors, and unrecoverable
359errors. All methods take a \exception{SAXParseException} as the only
360parameter. Errors and warnings may be converted to an exception by
361raising the passed-in exception object.
362
363\begin{methoddesc}[ErrorHandler]{error}{exception}
364 Called when the parser encounters a recoverable error. If this method
365 does not raise an exception, parsing may continue, but further document
366 information should not be expected by the application. Allowing the
367 parser to continue may allow additional errors to be discovered in the
368 input document.
369\end{methoddesc}
370
371\begin{methoddesc}[ErrorHandler]{fatalError}{exception}
372 Called when the parser encounters an error it cannot recover from;
373 parsing is expected to terminate when this method returns.
374\end{methoddesc}
375
376\begin{methoddesc}[ErrorHandler]{warning}{exception}
377 Called when the parser presents minor warning information to the
378 application. Parsing is expected to continue when this method returns,
379 and document information will continue to be passed to the application.
380 Raising an exception in this method will cause parsing to end.
381\end{methoddesc}