blob: 952a5631db196f1766f48e1bf5e23b055f601224 [file] [log] [blame]
Fred Drake014f0e32000-10-12 20:05:09 +00001\section{\module{xml.sax.handler} ---
2 Base classes for SAX handlers}
3
4\declaremodule{standard}{xml.sax.handler}
5\modulesynopsis{Base classes for SAX event handlers.}
6\sectionauthor{Martin v. L\"owis}{loewis@informatik.hu-berlin.de}
7\moduleauthor{Lars Marius Garshol}{larsga@garshol.priv.no}
8
9\versionadded{2.0}
10
11
12The SAX API defines four kinds of handlers: content handlers, DTD
13handlers, error handlers, and entity resolvers. Applications normally
14only need to implement those interfaces whose events they are
15interested in; they can implement the interfaces in a single object or
16in multiple objects. Handler implementations should inherit from the
17base classes provided in the module \module{xml.sax}, so that all
18methods get default implementations.
19
Fred Drake10b81ce2001-11-06 22:13:19 +000020\begin{classdesc*}{ContentHandler}
Fred Drake014f0e32000-10-12 20:05:09 +000021 This is the main callback interface in SAX, and the one most
22 important to applications. The order of events in this interface
23 mirrors the order of the information in the document.
Fred Drake10b81ce2001-11-06 22:13:19 +000024\end{classdesc*}
Fred Drake014f0e32000-10-12 20:05:09 +000025
Fred Drake10b81ce2001-11-06 22:13:19 +000026\begin{classdesc*}{DTDHandler}
Fred Drake014f0e32000-10-12 20:05:09 +000027 Handle DTD events.
28
29 This interface specifies only those DTD events required for basic
30 parsing (unparsed entities and attributes).
Fred Drake10b81ce2001-11-06 22:13:19 +000031\end{classdesc*}
Fred Drake014f0e32000-10-12 20:05:09 +000032
Fred Drake10b81ce2001-11-06 22:13:19 +000033\begin{classdesc*}{EntityResolver}
Fred Drake014f0e32000-10-12 20:05:09 +000034 Basic interface for resolving entities. If you create an object
35 implementing this interface, then register the object with your
36 Parser, the parser will call the method in your object to resolve all
37 external entities.
Fred Drake10b81ce2001-11-06 22:13:19 +000038\end{classdesc*}
Fred Drake014f0e32000-10-12 20:05:09 +000039
Fred Drake10b81ce2001-11-06 22:13:19 +000040\begin{classdesc*}{ErrorHandler}
Fred Drake5b567652000-12-19 04:07:54 +000041 Interface used by the parser to present error and warning messages
42 to the application. The methods of this object control whether errors
43 are immediately converted to exceptions or are handled in some other
44 way.
Fred Drake10b81ce2001-11-06 22:13:19 +000045\end{classdesc*}
Fred Drake5b567652000-12-19 04:07:54 +000046
Fred Drake014f0e32000-10-12 20:05:09 +000047In addition to these classes, \module{xml.sax.handler} provides
48symbolic constants for the feature and property names.
49
50\begin{datadesc}{feature_namespaces}
51 Value: \code{"http://xml.org/sax/features/namespaces"}\\
Fred Drake7064d3b2001-11-19 04:34:50 +000052 true: Perform Namespace processing.\\
Fred Drake014f0e32000-10-12 20:05:09 +000053 false: Optionally do not perform Namespace processing
Fred Drake7064d3b2001-11-19 04:34:50 +000054 (implies namespace-prefixes; default).\\
Fred Drake10b81ce2001-11-06 22:13:19 +000055 access: (parsing) read-only; (not parsing) read/write
Fred Drake014f0e32000-10-12 20:05:09 +000056\end{datadesc}
57
58\begin{datadesc}{feature_namespace_prefixes}
59 Value: \code{"http://xml.org/sax/features/namespace-prefixes"}\\
60 true: Report the original prefixed names and attributes used for Namespace
61 declarations.\\
62 false: Do not report attributes used for Namespace declarations, and
63 optionally do not report original prefixed names (default).\\
64 access: (parsing) read-only; (not parsing) read/write
65\end{datadesc}
66
67\begin{datadesc}{feature_string_interning}
68 Value: \code{"http://xml.org/sax/features/string-interning"}
69 true: All element names, prefixes, attribute names, Namespace URIs, and
70 local names are interned using the built-in intern function.\\
71 false: Names are not necessarily interned, although they may be (default).\\
72 access: (parsing) read-only; (not parsing) read/write
73\end{datadesc}
74
75\begin{datadesc}{feature_validation}
76 Value: \code{"http://xml.org/sax/features/validation"}\\
77 true: Report all validation errors (implies external-general-entities and
78 external-parameter-entities).\\
79 false: Do not report validation errors.\\
80 access: (parsing) read-only; (not parsing) read/write
81\end{datadesc}
82
83\begin{datadesc}{feature_external_ges}
84 Value: \code{"http://xml.org/sax/features/external-general-entities"}\\
85 true: Include all external general (text) entities.\\
86 false: Do not include external general entities.\\
87 access: (parsing) read-only; (not parsing) read/write
88\end{datadesc}
89
90\begin{datadesc}{feature_external_pes}
91 Value: \code{"http://xml.org/sax/features/external-parameter-entities"}\\
92 true: Include all external parameter entities, including the external
93 DTD subset.\\
94 false: Do not include any external parameter entities, even the external
95 DTD subset.\\
96 access: (parsing) read-only; (not parsing) read/write
97\end{datadesc}
98
99\begin{datadesc}{all_features}
100 List of all features.
101\end{datadesc}
102
103\begin{datadesc}{property_lexical_handler}
104 Value: \code{"http://xml.org/sax/properties/lexical-handler"}\\
105 data type: xml.sax.sax2lib.LexicalHandler (not supported in Python 2)\\
106 description: An optional extension handler for lexical events like comments.\\
107 access: read/write
108\end{datadesc}
109
110\begin{datadesc}{property_declaration_handler}
111 Value: \code{"http://xml.org/sax/properties/declaration-handler"}\\
112 data type: xml.sax.sax2lib.DeclHandler (not supported in Python 2)\\
113 description: An optional extension handler for DTD-related events other
114 than notations and unparsed entities.\\
115 access: read/write
116\end{datadesc}
117
118\begin{datadesc}{property_dom_node}
119 Value: \code{"http://xml.org/sax/properties/dom-node"}\\
120 data type: org.w3c.dom.Node (not supported in Python 2) \\
121 description: When parsing, the current DOM node being visited if this is
122 a DOM iterator; when not parsing, the root DOM node for
123 iteration.\\
124 access: (parsing) read-only; (not parsing) read/write
125\end{datadesc}
126
127\begin{datadesc}{property_xml_string}
128 Value: \code{"http://xml.org/sax/properties/xml-string"}\\
129 data type: String\\
130 description: The literal string of characters that was the source for
131 the current event.\\
132 access: read-only
133\end{datadesc}
134
135\begin{datadesc}{all_properties}
136 List of all known property names.
137\end{datadesc}
138
139
140\subsection{ContentHandler Objects \label{content-handler-objects}}
141
142Users are expected to subclass \class{ContentHandler} to support their
143application. The following methods are called by the parser on the
144appropriate events in the input document:
145
146\begin{methoddesc}[ContentHandler]{setDocumentLocator}{locator}
147 Called by the parser to give the application a locator for locating
148 the origin of document events.
149
150 SAX parsers are strongly encouraged (though not absolutely required)
151 to supply a locator: if it does so, it must supply the locator to
152 the application by invoking this method before invoking any of the
153 other methods in the DocumentHandler interface.
154
155 The locator allows the application to determine the end position of
156 any document-related event, even if the parser is not reporting an
157 error. Typically, the application will use this information for
158 reporting its own errors (such as character content that does not
159 match an application's business rules). The information returned by
160 the locator is probably not sufficient for use with a search engine.
161
162 Note that the locator will return correct information only during
163 the invocation of the events in this interface. The application
164 should not attempt to use it at any other time.
165\end{methoddesc}
166
167\begin{methoddesc}[ContentHandler]{startDocument}{}
168 Receive notification of the beginning of a document.
169
170 The SAX parser will invoke this method only once, before any other
171 methods in this interface or in DTDHandler (except for
172 \method{setDocumentLocator()}).
173\end{methoddesc}
174
175\begin{methoddesc}[ContentHandler]{endDocument}{}
176 Receive notification of the end of a document.
177
178 The SAX parser will invoke this method only once, and it will be the
179 last method invoked during the parse. The parser shall not invoke
180 this method until it has either abandoned parsing (because of an
181 unrecoverable error) or reached the end of input.
182\end{methoddesc}
183
184\begin{methoddesc}[ContentHandler]{startPrefixMapping}{prefix, uri}
185 Begin the scope of a prefix-URI Namespace mapping.
186
187 The information from this event is not necessary for normal
188 Namespace processing: the SAX XML reader will automatically replace
189 prefixes for element and attribute names when the
Fred Drake10b81ce2001-11-06 22:13:19 +0000190 \code{feature_namespaces} feature is enabled (the default).
Fred Drake014f0e32000-10-12 20:05:09 +0000191
192%% XXX This is not really the default, is it? MvL
193
194 There are cases, however, when applications need to use prefixes in
195 character data or in attribute values, where they cannot safely be
196 expanded automatically; the start/endPrefixMapping event supplies
197 the information to the application to expand prefixes in those
198 contexts itself, if necessary.
199
200 Note that start/endPrefixMapping events are not guaranteed to be
201 properly nested relative to each-other: all
202 \method{startPrefixMapping()} events will occur before the
Fred Drakeebbd14d2001-11-18 04:58:28 +0000203 corresponding \method{startElement()} event, and all
204 \method{endPrefixMapping()} events will occur after the
205 corresponding \method{endElement()} event, but their order is not
206 guaranteed.
Fred Drake014f0e32000-10-12 20:05:09 +0000207\end{methoddesc}
208
209\begin{methoddesc}[ContentHandler]{endPrefixMapping}{prefix}
210 End the scope of a prefix-URI mapping.
211
212 See \method{startPrefixMapping()} for details. This event will always
213 occur after the corresponding endElement event, but the order of
214 endPrefixMapping events is not otherwise guaranteed.
215\end{methoddesc}
216
217\begin{methoddesc}[ContentHandler]{startElement}{name, attrs}
218 Signals the start of an element in non-namespace mode.
219
220 The \var{name} parameter contains the raw XML 1.0 name of the
221 element type as a string and the \var{attrs} parameter holds an
222 instance of the \class{Attributes} class containing the attributes
Fred Drakeebbd14d2001-11-18 04:58:28 +0000223 of the element. The object passed as \var{attrs} may be re-used by
224 the parser; holding on to a reference to it is not a reliable way to
225 keep a copy of the attributes. To keep a copy of the attributes,
226 use the \method{copy()} method of the \var{attrs} object.
Fred Drake014f0e32000-10-12 20:05:09 +0000227\end{methoddesc}
228
229\begin{methoddesc}[ContentHandler]{endElement}{name}
230 Signals the end of an element in non-namespace mode.
231
232 The \var{name} parameter contains the name of the element type, just
233 as with the startElement event.
234\end{methoddesc}
235
236\begin{methoddesc}[ContentHandler]{startElementNS}{name, qname, attrs}
237 Signals the start of an element in namespace mode.
238
239 The \var{name} parameter contains the name of the element type as a
Fred Drakeebbd14d2001-11-18 04:58:28 +0000240 \code{(\var{uri}, \var{localname})} tuple, the \var{qname} parameter
241 contains the raw XML 1.0 name used in the source document, and the
242 \var{attrs} parameter holds an instance of the \class{AttributesNS}
243 class containing the attributes of the element. If no namespace is
244 associated with the element, the \var{uri} component of \var{name}
245 will be \code{None}. The object passed as \var{attrs} may be
246 re-used by the parser; holding on to a reference to it is not a
247 reliable way to keep a copy of the attributes. To keep a copy of
248 the attributes, use the \method{copy()} method of the \var{attrs}
249 object.
Fred Drake014f0e32000-10-12 20:05:09 +0000250
251 Parsers may set the \var{qname} parameter to \code{None}, unless the
Fred Drake10b81ce2001-11-06 22:13:19 +0000252 \code{feature_namespace_prefixes} feature is activated.
Fred Drake014f0e32000-10-12 20:05:09 +0000253\end{methoddesc}
254
255\begin{methoddesc}[ContentHandler]{endElementNS}{name, qname}
256 Signals the end of an element in namespace mode.
257
258 The \var{name} parameter contains the name of the element type, just
Fred Drake10b81ce2001-11-06 22:13:19 +0000259 as with the \method{startElementNS()} method, likewise the
260 \var{qname} parameter.
Fred Drake014f0e32000-10-12 20:05:09 +0000261\end{methoddesc}
262
263\begin{methoddesc}[ContentHandler]{characters}{content}
264 Receive notification of character data.
265
266 The Parser will call this method to report each chunk of character
267 data. SAX parsers may return all contiguous character data in a
268 single chunk, or they may split it into several chunks; however, all
269 of the characters in any single event must come from the same
270 external entity so that the Locator provides useful information.
271
272 \var{content} may be a Unicode string or a byte string; the
273 \code{expat} reader module produces always Unicode strings.
Fred Drakee119c8f2000-12-04 22:04:15 +0000274
Fred Drake0aa811c2001-10-20 04:24:09 +0000275 \note{The earlier SAX 1 interface provided by the Python
Fred Drakee119c8f2000-12-04 22:04:15 +0000276 XML Special Interest Group used a more Java-like interface for this
Fred Drake21e4dd02000-12-04 22:29:17 +0000277 method. Since most parsers used from Python did not take advantage
Fred Drakee119c8f2000-12-04 22:04:15 +0000278 of the older interface, the simpler signature was chosen to replace
279 it. To convert old code to the new interface, use \var{content}
280 instead of slicing content with the old \var{offset} and
Fred Drake0aa811c2001-10-20 04:24:09 +0000281 \var{length} parameters.}
Fred Drake014f0e32000-10-12 20:05:09 +0000282\end{methoddesc}
283
284\begin{methoddesc}[ContentHandler]{ignorableWhitespace}{}
285 Receive notification of ignorable whitespace in element content.
286
287 Validating Parsers must use this method to report each chunk
288 of ignorable whitespace (see the W3C XML 1.0 recommendation,
289 section 2.10): non-validating parsers may also use this method
290 if they are capable of parsing and using content models.
291
292 SAX parsers may return all contiguous whitespace in a single
293 chunk, or they may split it into several chunks; however, all
294 of the characters in any single event must come from the same
295 external entity, so that the Locator provides useful
296 information.
297\end{methoddesc}
298
299\begin{methoddesc}[ContentHandler]{processingInstruction}{target, data}
300 Receive notification of a processing instruction.
301
302 The Parser will invoke this method once for each processing
303 instruction found: note that processing instructions may occur
304 before or after the main document element.
305
306 A SAX parser should never report an XML declaration (XML 1.0,
307 section 2.8) or a text declaration (XML 1.0, section 4.3.1) using
308 this method.
309\end{methoddesc}
310
311\begin{methoddesc}[ContentHandler]{skippedEntity}{name}
312 Receive notification of a skipped entity.
313
314 The Parser will invoke this method once for each entity
315 skipped. Non-validating processors may skip entities if they have
316 not seen the declarations (because, for example, the entity was
317 declared in an external DTD subset). All processors may skip
318 external entities, depending on the values of the
Fred Drake10b81ce2001-11-06 22:13:19 +0000319 \code{feature_external_ges} and the
320 \code{feature_external_pes} properties.
Fred Drake014f0e32000-10-12 20:05:09 +0000321\end{methoddesc}
322
323
324\subsection{DTDHandler Objects \label{dtd-handler-objects}}
325
326\class{DTDHandler} instances provide the following methods:
327
328\begin{methoddesc}[DTDHandler]{notationDecl}{name, publicId, systemId}
329 Handle a notation declaration event.
330\end{methoddesc}
331
332\begin{methoddesc}[DTDHandler]{unparsedEntityDecl}{name, publicId,
333 systemId, ndata}
334 Handle an unparsed entity declaration event.
335\end{methoddesc}
336
337
338\subsection{EntityResolver Objects \label{entity-resolver-objects}}
339
340\begin{methoddesc}[EntityResolver]{resolveEntity}{publicId, systemId}
341 Resolve the system identifier of an entity and return either the
342 system identifier to read from as a string, or an InputSource to
343 read from. The default implementation returns \var{systemId}.
344\end{methoddesc}
Fred Drake5b567652000-12-19 04:07:54 +0000345
346
347\subsection{ErrorHandler Objects \label{sax-error-handler}}
348
349Objects with this interface are used to receive error and warning
350information from the \class{XMLReader}. If you create an object that
351implements this interface, then register the object with your
352\class{XMLReader}, the parser will call the methods in your object to
353report all warnings and errors. There are three levels of errors
354available: warnings, (possibly) recoverable errors, and unrecoverable
355errors. All methods take a \exception{SAXParseException} as the only
356parameter. Errors and warnings may be converted to an exception by
357raising the passed-in exception object.
358
359\begin{methoddesc}[ErrorHandler]{error}{exception}
360 Called when the parser encounters a recoverable error. If this method
361 does not raise an exception, parsing may continue, but further document
362 information should not be expected by the application. Allowing the
363 parser to continue may allow additional errors to be discovered in the
364 input document.
365\end{methoddesc}
366
367\begin{methoddesc}[ErrorHandler]{fatalError}{exception}
368 Called when the parser encounters an error it cannot recover from;
369 parsing is expected to terminate when this method returns.
370\end{methoddesc}
371
372\begin{methoddesc}[ErrorHandler]{warning}{exception}
373 Called when the parser presents minor warning information to the
374 application. Parsing is expected to continue when this method returns,
375 and document information will continue to be passed to the application.
376 Raising an exception in this method will cause parsing to end.
377\end{methoddesc}