blob: 77ec724668f6f7b82dd33b6c72ec2f424b61487d [file] [log] [blame]
Fred Drake014f0e32000-10-12 20:05:09 +00001\section{\module{xml.sax.handler} ---
2 Base classes for SAX handlers}
3
4\declaremodule{standard}{xml.sax.handler}
5\modulesynopsis{Base classes for SAX event handlers.}
6\sectionauthor{Martin v. L\"owis}{loewis@informatik.hu-berlin.de}
7\moduleauthor{Lars Marius Garshol}{larsga@garshol.priv.no}
8
9\versionadded{2.0}
10
11
12The SAX API defines four kinds of handlers: content handlers, DTD
13handlers, error handlers, and entity resolvers. Applications normally
14only need to implement those interfaces whose events they are
15interested in; they can implement the interfaces in a single object or
16in multiple objects. Handler implementations should inherit from the
17base classes provided in the module \module{xml.sax}, so that all
18methods get default implementations.
19
Fred Drake10b81ce2001-11-06 22:13:19 +000020\begin{classdesc*}{ContentHandler}
Fred Drake014f0e32000-10-12 20:05:09 +000021 This is the main callback interface in SAX, and the one most
22 important to applications. The order of events in this interface
23 mirrors the order of the information in the document.
Fred Drake10b81ce2001-11-06 22:13:19 +000024\end{classdesc*}
Fred Drake014f0e32000-10-12 20:05:09 +000025
Fred Drake10b81ce2001-11-06 22:13:19 +000026\begin{classdesc*}{DTDHandler}
Fred Drake014f0e32000-10-12 20:05:09 +000027 Handle DTD events.
28
29 This interface specifies only those DTD events required for basic
30 parsing (unparsed entities and attributes).
Fred Drake10b81ce2001-11-06 22:13:19 +000031\end{classdesc*}
Fred Drake014f0e32000-10-12 20:05:09 +000032
Fred Drake10b81ce2001-11-06 22:13:19 +000033\begin{classdesc*}{EntityResolver}
Fred Drake014f0e32000-10-12 20:05:09 +000034 Basic interface for resolving entities. If you create an object
35 implementing this interface, then register the object with your
36 Parser, the parser will call the method in your object to resolve all
37 external entities.
Fred Drake10b81ce2001-11-06 22:13:19 +000038\end{classdesc*}
Fred Drake014f0e32000-10-12 20:05:09 +000039
Fred Drake10b81ce2001-11-06 22:13:19 +000040\begin{classdesc*}{ErrorHandler}
Fred Drake5b567652000-12-19 04:07:54 +000041 Interface used by the parser to present error and warning messages
42 to the application. The methods of this object control whether errors
43 are immediately converted to exceptions or are handled in some other
44 way.
Fred Drake10b81ce2001-11-06 22:13:19 +000045\end{classdesc*}
Fred Drake5b567652000-12-19 04:07:54 +000046
Fred Drake014f0e32000-10-12 20:05:09 +000047In addition to these classes, \module{xml.sax.handler} provides
48symbolic constants for the feature and property names.
49
50\begin{datadesc}{feature_namespaces}
51 Value: \code{"http://xml.org/sax/features/namespaces"}\\
52 true: Perform Namespace processing (default).\\
53 false: Optionally do not perform Namespace processing
54 (implies namespace-prefixes).\\
Fred Drake10b81ce2001-11-06 22:13:19 +000055 access: (parsing) read-only; (not parsing) read/write
Fred Drake014f0e32000-10-12 20:05:09 +000056\end{datadesc}
57
58\begin{datadesc}{feature_namespace_prefixes}
59 Value: \code{"http://xml.org/sax/features/namespace-prefixes"}\\
60 true: Report the original prefixed names and attributes used for Namespace
61 declarations.\\
62 false: Do not report attributes used for Namespace declarations, and
63 optionally do not report original prefixed names (default).\\
64 access: (parsing) read-only; (not parsing) read/write
65\end{datadesc}
66
67\begin{datadesc}{feature_string_interning}
68 Value: \code{"http://xml.org/sax/features/string-interning"}
69 true: All element names, prefixes, attribute names, Namespace URIs, and
70 local names are interned using the built-in intern function.\\
71 false: Names are not necessarily interned, although they may be (default).\\
72 access: (parsing) read-only; (not parsing) read/write
73\end{datadesc}
74
75\begin{datadesc}{feature_validation}
76 Value: \code{"http://xml.org/sax/features/validation"}\\
77 true: Report all validation errors (implies external-general-entities and
78 external-parameter-entities).\\
79 false: Do not report validation errors.\\
80 access: (parsing) read-only; (not parsing) read/write
81\end{datadesc}
82
83\begin{datadesc}{feature_external_ges}
84 Value: \code{"http://xml.org/sax/features/external-general-entities"}\\
85 true: Include all external general (text) entities.\\
86 false: Do not include external general entities.\\
87 access: (parsing) read-only; (not parsing) read/write
88\end{datadesc}
89
90\begin{datadesc}{feature_external_pes}
91 Value: \code{"http://xml.org/sax/features/external-parameter-entities"}\\
92 true: Include all external parameter entities, including the external
93 DTD subset.\\
94 false: Do not include any external parameter entities, even the external
95 DTD subset.\\
96 access: (parsing) read-only; (not parsing) read/write
97\end{datadesc}
98
99\begin{datadesc}{all_features}
100 List of all features.
101\end{datadesc}
102
103\begin{datadesc}{property_lexical_handler}
104 Value: \code{"http://xml.org/sax/properties/lexical-handler"}\\
105 data type: xml.sax.sax2lib.LexicalHandler (not supported in Python 2)\\
106 description: An optional extension handler for lexical events like comments.\\
107 access: read/write
108\end{datadesc}
109
110\begin{datadesc}{property_declaration_handler}
111 Value: \code{"http://xml.org/sax/properties/declaration-handler"}\\
112 data type: xml.sax.sax2lib.DeclHandler (not supported in Python 2)\\
113 description: An optional extension handler for DTD-related events other
114 than notations and unparsed entities.\\
115 access: read/write
116\end{datadesc}
117
118\begin{datadesc}{property_dom_node}
119 Value: \code{"http://xml.org/sax/properties/dom-node"}\\
120 data type: org.w3c.dom.Node (not supported in Python 2) \\
121 description: When parsing, the current DOM node being visited if this is
122 a DOM iterator; when not parsing, the root DOM node for
123 iteration.\\
124 access: (parsing) read-only; (not parsing) read/write
125\end{datadesc}
126
127\begin{datadesc}{property_xml_string}
128 Value: \code{"http://xml.org/sax/properties/xml-string"}\\
129 data type: String\\
130 description: The literal string of characters that was the source for
131 the current event.\\
132 access: read-only
133\end{datadesc}
134
135\begin{datadesc}{all_properties}
136 List of all known property names.
137\end{datadesc}
138
139
140\subsection{ContentHandler Objects \label{content-handler-objects}}
141
142Users are expected to subclass \class{ContentHandler} to support their
143application. The following methods are called by the parser on the
144appropriate events in the input document:
145
146\begin{methoddesc}[ContentHandler]{setDocumentLocator}{locator}
147 Called by the parser to give the application a locator for locating
148 the origin of document events.
149
150 SAX parsers are strongly encouraged (though not absolutely required)
151 to supply a locator: if it does so, it must supply the locator to
152 the application by invoking this method before invoking any of the
153 other methods in the DocumentHandler interface.
154
155 The locator allows the application to determine the end position of
156 any document-related event, even if the parser is not reporting an
157 error. Typically, the application will use this information for
158 reporting its own errors (such as character content that does not
159 match an application's business rules). The information returned by
160 the locator is probably not sufficient for use with a search engine.
161
162 Note that the locator will return correct information only during
163 the invocation of the events in this interface. The application
164 should not attempt to use it at any other time.
165\end{methoddesc}
166
167\begin{methoddesc}[ContentHandler]{startDocument}{}
168 Receive notification of the beginning of a document.
169
170 The SAX parser will invoke this method only once, before any other
171 methods in this interface or in DTDHandler (except for
172 \method{setDocumentLocator()}).
173\end{methoddesc}
174
175\begin{methoddesc}[ContentHandler]{endDocument}{}
176 Receive notification of the end of a document.
177
178 The SAX parser will invoke this method only once, and it will be the
179 last method invoked during the parse. The parser shall not invoke
180 this method until it has either abandoned parsing (because of an
181 unrecoverable error) or reached the end of input.
182\end{methoddesc}
183
184\begin{methoddesc}[ContentHandler]{startPrefixMapping}{prefix, uri}
185 Begin the scope of a prefix-URI Namespace mapping.
186
187 The information from this event is not necessary for normal
188 Namespace processing: the SAX XML reader will automatically replace
189 prefixes for element and attribute names when the
Fred Drake10b81ce2001-11-06 22:13:19 +0000190 \code{feature_namespaces} feature is enabled (the default).
Fred Drake014f0e32000-10-12 20:05:09 +0000191
192%% XXX This is not really the default, is it? MvL
193
194 There are cases, however, when applications need to use prefixes in
195 character data or in attribute values, where they cannot safely be
196 expanded automatically; the start/endPrefixMapping event supplies
197 the information to the application to expand prefixes in those
198 contexts itself, if necessary.
199
200 Note that start/endPrefixMapping events are not guaranteed to be
201 properly nested relative to each-other: all
202 \method{startPrefixMapping()} events will occur before the
203 corresponding startElement event, and all \method{endPrefixMapping()}
204 events will occur after the corresponding \method{endElement()} event,
205 but their order is not guaranteed.
206\end{methoddesc}
207
208\begin{methoddesc}[ContentHandler]{endPrefixMapping}{prefix}
209 End the scope of a prefix-URI mapping.
210
211 See \method{startPrefixMapping()} for details. This event will always
212 occur after the corresponding endElement event, but the order of
213 endPrefixMapping events is not otherwise guaranteed.
214\end{methoddesc}
215
216\begin{methoddesc}[ContentHandler]{startElement}{name, attrs}
217 Signals the start of an element in non-namespace mode.
218
219 The \var{name} parameter contains the raw XML 1.0 name of the
220 element type as a string and the \var{attrs} parameter holds an
221 instance of the \class{Attributes} class containing the attributes
222 of the element.
223\end{methoddesc}
224
225\begin{methoddesc}[ContentHandler]{endElement}{name}
226 Signals the end of an element in non-namespace mode.
227
228 The \var{name} parameter contains the name of the element type, just
229 as with the startElement event.
230\end{methoddesc}
231
232\begin{methoddesc}[ContentHandler]{startElementNS}{name, qname, attrs}
233 Signals the start of an element in namespace mode.
234
235 The \var{name} parameter contains the name of the element type as a
236 (uri, localname) tuple, the \var{qname} parameter the raw XML 1.0
237 name used in the source document, and the \var{attrs} parameter
238 holds an instance of the \class{AttributesNS} class containing the
239 attributes of the element.
240
241 Parsers may set the \var{qname} parameter to \code{None}, unless the
Fred Drake10b81ce2001-11-06 22:13:19 +0000242 \code{feature_namespace_prefixes} feature is activated.
Fred Drake014f0e32000-10-12 20:05:09 +0000243\end{methoddesc}
244
245\begin{methoddesc}[ContentHandler]{endElementNS}{name, qname}
246 Signals the end of an element in namespace mode.
247
248 The \var{name} parameter contains the name of the element type, just
Fred Drake10b81ce2001-11-06 22:13:19 +0000249 as with the \method{startElementNS()} method, likewise the
250 \var{qname} parameter.
Fred Drake014f0e32000-10-12 20:05:09 +0000251\end{methoddesc}
252
253\begin{methoddesc}[ContentHandler]{characters}{content}
254 Receive notification of character data.
255
256 The Parser will call this method to report each chunk of character
257 data. SAX parsers may return all contiguous character data in a
258 single chunk, or they may split it into several chunks; however, all
259 of the characters in any single event must come from the same
260 external entity so that the Locator provides useful information.
261
262 \var{content} may be a Unicode string or a byte string; the
263 \code{expat} reader module produces always Unicode strings.
Fred Drakee119c8f2000-12-04 22:04:15 +0000264
Fred Drake0aa811c2001-10-20 04:24:09 +0000265 \note{The earlier SAX 1 interface provided by the Python
Fred Drakee119c8f2000-12-04 22:04:15 +0000266 XML Special Interest Group used a more Java-like interface for this
Fred Drake21e4dd02000-12-04 22:29:17 +0000267 method. Since most parsers used from Python did not take advantage
Fred Drakee119c8f2000-12-04 22:04:15 +0000268 of the older interface, the simpler signature was chosen to replace
269 it. To convert old code to the new interface, use \var{content}
270 instead of slicing content with the old \var{offset} and
Fred Drake0aa811c2001-10-20 04:24:09 +0000271 \var{length} parameters.}
Fred Drake014f0e32000-10-12 20:05:09 +0000272\end{methoddesc}
273
274\begin{methoddesc}[ContentHandler]{ignorableWhitespace}{}
275 Receive notification of ignorable whitespace in element content.
276
277 Validating Parsers must use this method to report each chunk
278 of ignorable whitespace (see the W3C XML 1.0 recommendation,
279 section 2.10): non-validating parsers may also use this method
280 if they are capable of parsing and using content models.
281
282 SAX parsers may return all contiguous whitespace in a single
283 chunk, or they may split it into several chunks; however, all
284 of the characters in any single event must come from the same
285 external entity, so that the Locator provides useful
286 information.
287\end{methoddesc}
288
289\begin{methoddesc}[ContentHandler]{processingInstruction}{target, data}
290 Receive notification of a processing instruction.
291
292 The Parser will invoke this method once for each processing
293 instruction found: note that processing instructions may occur
294 before or after the main document element.
295
296 A SAX parser should never report an XML declaration (XML 1.0,
297 section 2.8) or a text declaration (XML 1.0, section 4.3.1) using
298 this method.
299\end{methoddesc}
300
301\begin{methoddesc}[ContentHandler]{skippedEntity}{name}
302 Receive notification of a skipped entity.
303
304 The Parser will invoke this method once for each entity
305 skipped. Non-validating processors may skip entities if they have
306 not seen the declarations (because, for example, the entity was
307 declared in an external DTD subset). All processors may skip
308 external entities, depending on the values of the
Fred Drake10b81ce2001-11-06 22:13:19 +0000309 \code{feature_external_ges} and the
310 \code{feature_external_pes} properties.
Fred Drake014f0e32000-10-12 20:05:09 +0000311\end{methoddesc}
312
313
314\subsection{DTDHandler Objects \label{dtd-handler-objects}}
315
316\class{DTDHandler} instances provide the following methods:
317
318\begin{methoddesc}[DTDHandler]{notationDecl}{name, publicId, systemId}
319 Handle a notation declaration event.
320\end{methoddesc}
321
322\begin{methoddesc}[DTDHandler]{unparsedEntityDecl}{name, publicId,
323 systemId, ndata}
324 Handle an unparsed entity declaration event.
325\end{methoddesc}
326
327
328\subsection{EntityResolver Objects \label{entity-resolver-objects}}
329
330\begin{methoddesc}[EntityResolver]{resolveEntity}{publicId, systemId}
331 Resolve the system identifier of an entity and return either the
332 system identifier to read from as a string, or an InputSource to
333 read from. The default implementation returns \var{systemId}.
334\end{methoddesc}
Fred Drake5b567652000-12-19 04:07:54 +0000335
336
337\subsection{ErrorHandler Objects \label{sax-error-handler}}
338
339Objects with this interface are used to receive error and warning
340information from the \class{XMLReader}. If you create an object that
341implements this interface, then register the object with your
342\class{XMLReader}, the parser will call the methods in your object to
343report all warnings and errors. There are three levels of errors
344available: warnings, (possibly) recoverable errors, and unrecoverable
345errors. All methods take a \exception{SAXParseException} as the only
346parameter. Errors and warnings may be converted to an exception by
347raising the passed-in exception object.
348
349\begin{methoddesc}[ErrorHandler]{error}{exception}
350 Called when the parser encounters a recoverable error. If this method
351 does not raise an exception, parsing may continue, but further document
352 information should not be expected by the application. Allowing the
353 parser to continue may allow additional errors to be discovered in the
354 input document.
355\end{methoddesc}
356
357\begin{methoddesc}[ErrorHandler]{fatalError}{exception}
358 Called when the parser encounters an error it cannot recover from;
359 parsing is expected to terminate when this method returns.
360\end{methoddesc}
361
362\begin{methoddesc}[ErrorHandler]{warning}{exception}
363 Called when the parser presents minor warning information to the
364 application. Parsing is expected to continue when this method returns,
365 and document information will continue to be passed to the application.
366 Raising an exception in this method will cause parsing to end.
367\end{methoddesc}