blob: bd72fd7709ba2eb60d6b4dd36fe39b8cba9a3015 [file] [log] [blame]
Fred Drake014f0e32000-10-12 20:05:09 +00001\section{\module{xml.sax.handler} ---
2 Base classes for SAX handlers}
3
4\declaremodule{standard}{xml.sax.handler}
5\modulesynopsis{Base classes for SAX event handlers.}
6\sectionauthor{Martin v. L\"owis}{loewis@informatik.hu-berlin.de}
7\moduleauthor{Lars Marius Garshol}{larsga@garshol.priv.no}
8
9\versionadded{2.0}
10
11
12The SAX API defines four kinds of handlers: content handlers, DTD
13handlers, error handlers, and entity resolvers. Applications normally
14only need to implement those interfaces whose events they are
15interested in; they can implement the interfaces in a single object or
16in multiple objects. Handler implementations should inherit from the
17base classes provided in the module \module{xml.sax}, so that all
18methods get default implementations.
19
20\begin{classdesc}{ContentHandler}{}
21 This is the main callback interface in SAX, and the one most
22 important to applications. The order of events in this interface
23 mirrors the order of the information in the document.
24\end{classdesc}
25
26\begin{classdesc}{DTDHandler}{}
27 Handle DTD events.
28
29 This interface specifies only those DTD events required for basic
30 parsing (unparsed entities and attributes).
31\end{classdesc}
32
33\begin{classdesc}{EntityResolver}{}
34 Basic interface for resolving entities. If you create an object
35 implementing this interface, then register the object with your
36 Parser, the parser will call the method in your object to resolve all
37 external entities.
38\end{classdesc}
39
40In addition to these classes, \module{xml.sax.handler} provides
41symbolic constants for the feature and property names.
42
43\begin{datadesc}{feature_namespaces}
44 Value: \code{"http://xml.org/sax/features/namespaces"}\\
45 true: Perform Namespace processing (default).\\
46 false: Optionally do not perform Namespace processing
47 (implies namespace-prefixes).\\
48 access: (parsing) read-only; (not parsing) read/write\\
49\end{datadesc}
50
51\begin{datadesc}{feature_namespace_prefixes}
52 Value: \code{"http://xml.org/sax/features/namespace-prefixes"}\\
53 true: Report the original prefixed names and attributes used for Namespace
54 declarations.\\
55 false: Do not report attributes used for Namespace declarations, and
56 optionally do not report original prefixed names (default).\\
57 access: (parsing) read-only; (not parsing) read/write
58\end{datadesc}
59
60\begin{datadesc}{feature_string_interning}
61 Value: \code{"http://xml.org/sax/features/string-interning"}
62 true: All element names, prefixes, attribute names, Namespace URIs, and
63 local names are interned using the built-in intern function.\\
64 false: Names are not necessarily interned, although they may be (default).\\
65 access: (parsing) read-only; (not parsing) read/write
66\end{datadesc}
67
68\begin{datadesc}{feature_validation}
69 Value: \code{"http://xml.org/sax/features/validation"}\\
70 true: Report all validation errors (implies external-general-entities and
71 external-parameter-entities).\\
72 false: Do not report validation errors.\\
73 access: (parsing) read-only; (not parsing) read/write
74\end{datadesc}
75
76\begin{datadesc}{feature_external_ges}
77 Value: \code{"http://xml.org/sax/features/external-general-entities"}\\
78 true: Include all external general (text) entities.\\
79 false: Do not include external general entities.\\
80 access: (parsing) read-only; (not parsing) read/write
81\end{datadesc}
82
83\begin{datadesc}{feature_external_pes}
84 Value: \code{"http://xml.org/sax/features/external-parameter-entities"}\\
85 true: Include all external parameter entities, including the external
86 DTD subset.\\
87 false: Do not include any external parameter entities, even the external
88 DTD subset.\\
89 access: (parsing) read-only; (not parsing) read/write
90\end{datadesc}
91
92\begin{datadesc}{all_features}
93 List of all features.
94\end{datadesc}
95
96\begin{datadesc}{property_lexical_handler}
97 Value: \code{"http://xml.org/sax/properties/lexical-handler"}\\
98 data type: xml.sax.sax2lib.LexicalHandler (not supported in Python 2)\\
99 description: An optional extension handler for lexical events like comments.\\
100 access: read/write
101\end{datadesc}
102
103\begin{datadesc}{property_declaration_handler}
104 Value: \code{"http://xml.org/sax/properties/declaration-handler"}\\
105 data type: xml.sax.sax2lib.DeclHandler (not supported in Python 2)\\
106 description: An optional extension handler for DTD-related events other
107 than notations and unparsed entities.\\
108 access: read/write
109\end{datadesc}
110
111\begin{datadesc}{property_dom_node}
112 Value: \code{"http://xml.org/sax/properties/dom-node"}\\
113 data type: org.w3c.dom.Node (not supported in Python 2) \\
114 description: When parsing, the current DOM node being visited if this is
115 a DOM iterator; when not parsing, the root DOM node for
116 iteration.\\
117 access: (parsing) read-only; (not parsing) read/write
118\end{datadesc}
119
120\begin{datadesc}{property_xml_string}
121 Value: \code{"http://xml.org/sax/properties/xml-string"}\\
122 data type: String\\
123 description: The literal string of characters that was the source for
124 the current event.\\
125 access: read-only
126\end{datadesc}
127
128\begin{datadesc}{all_properties}
129 List of all known property names.
130\end{datadesc}
131
132
133\subsection{ContentHandler Objects \label{content-handler-objects}}
134
135Users are expected to subclass \class{ContentHandler} to support their
136application. The following methods are called by the parser on the
137appropriate events in the input document:
138
139\begin{methoddesc}[ContentHandler]{setDocumentLocator}{locator}
140 Called by the parser to give the application a locator for locating
141 the origin of document events.
142
143 SAX parsers are strongly encouraged (though not absolutely required)
144 to supply a locator: if it does so, it must supply the locator to
145 the application by invoking this method before invoking any of the
146 other methods in the DocumentHandler interface.
147
148 The locator allows the application to determine the end position of
149 any document-related event, even if the parser is not reporting an
150 error. Typically, the application will use this information for
151 reporting its own errors (such as character content that does not
152 match an application's business rules). The information returned by
153 the locator is probably not sufficient for use with a search engine.
154
155 Note that the locator will return correct information only during
156 the invocation of the events in this interface. The application
157 should not attempt to use it at any other time.
158\end{methoddesc}
159
160\begin{methoddesc}[ContentHandler]{startDocument}{}
161 Receive notification of the beginning of a document.
162
163 The SAX parser will invoke this method only once, before any other
164 methods in this interface or in DTDHandler (except for
165 \method{setDocumentLocator()}).
166\end{methoddesc}
167
168\begin{methoddesc}[ContentHandler]{endDocument}{}
169 Receive notification of the end of a document.
170
171 The SAX parser will invoke this method only once, and it will be the
172 last method invoked during the parse. The parser shall not invoke
173 this method until it has either abandoned parsing (because of an
174 unrecoverable error) or reached the end of input.
175\end{methoddesc}
176
177\begin{methoddesc}[ContentHandler]{startPrefixMapping}{prefix, uri}
178 Begin the scope of a prefix-URI Namespace mapping.
179
180 The information from this event is not necessary for normal
181 Namespace processing: the SAX XML reader will automatically replace
182 prefixes for element and attribute names when the
183 \code{http://xml.org/sax/features/namespaces} feature is true (the
184 default).
185
186%% XXX This is not really the default, is it? MvL
187
188 There are cases, however, when applications need to use prefixes in
189 character data or in attribute values, where they cannot safely be
190 expanded automatically; the start/endPrefixMapping event supplies
191 the information to the application to expand prefixes in those
192 contexts itself, if necessary.
193
194 Note that start/endPrefixMapping events are not guaranteed to be
195 properly nested relative to each-other: all
196 \method{startPrefixMapping()} events will occur before the
197 corresponding startElement event, and all \method{endPrefixMapping()}
198 events will occur after the corresponding \method{endElement()} event,
199 but their order is not guaranteed.
200\end{methoddesc}
201
202\begin{methoddesc}[ContentHandler]{endPrefixMapping}{prefix}
203 End the scope of a prefix-URI mapping.
204
205 See \method{startPrefixMapping()} for details. This event will always
206 occur after the corresponding endElement event, but the order of
207 endPrefixMapping events is not otherwise guaranteed.
208\end{methoddesc}
209
210\begin{methoddesc}[ContentHandler]{startElement}{name, attrs}
211 Signals the start of an element in non-namespace mode.
212
213 The \var{name} parameter contains the raw XML 1.0 name of the
214 element type as a string and the \var{attrs} parameter holds an
215 instance of the \class{Attributes} class containing the attributes
216 of the element.
217\end{methoddesc}
218
219\begin{methoddesc}[ContentHandler]{endElement}{name}
220 Signals the end of an element in non-namespace mode.
221
222 The \var{name} parameter contains the name of the element type, just
223 as with the startElement event.
224\end{methoddesc}
225
226\begin{methoddesc}[ContentHandler]{startElementNS}{name, qname, attrs}
227 Signals the start of an element in namespace mode.
228
229 The \var{name} parameter contains the name of the element type as a
230 (uri, localname) tuple, the \var{qname} parameter the raw XML 1.0
231 name used in the source document, and the \var{attrs} parameter
232 holds an instance of the \class{AttributesNS} class containing the
233 attributes of the element.
234
235 Parsers may set the \var{qname} parameter to \code{None}, unless the
236 \code{http://xml.org/sax/features/namespace-prefixes} feature is
237 activated.
238\end{methoddesc}
239
240\begin{methoddesc}[ContentHandler]{endElementNS}{name, qname}
241 Signals the end of an element in namespace mode.
242
243 The \var{name} parameter contains the name of the element type, just
244 as with the startElementNS event, likewise the \var{qname} parameter.
245\end{methoddesc}
246
247\begin{methoddesc}[ContentHandler]{characters}{content}
248 Receive notification of character data.
249
250 The Parser will call this method to report each chunk of character
251 data. SAX parsers may return all contiguous character data in a
252 single chunk, or they may split it into several chunks; however, all
253 of the characters in any single event must come from the same
254 external entity so that the Locator provides useful information.
255
256 \var{content} may be a Unicode string or a byte string; the
257 \code{expat} reader module produces always Unicode strings.
Fred Drakee119c8f2000-12-04 22:04:15 +0000258
259 \strong{Note:} The earlier SAX 1 interface provided by the Python
260 XML Special Interest Group used a more Java-like interface for this
261 method. Since most parsers used from Python did not take advatage
262 of the older interface, the simpler signature was chosen to replace
263 it. To convert old code to the new interface, use \var{content}
264 instead of slicing content with the old \var{offset} and
265 \var{lenght} parameters.
Fred Drake014f0e32000-10-12 20:05:09 +0000266\end{methoddesc}
267
268\begin{methoddesc}[ContentHandler]{ignorableWhitespace}{}
269 Receive notification of ignorable whitespace in element content.
270
271 Validating Parsers must use this method to report each chunk
272 of ignorable whitespace (see the W3C XML 1.0 recommendation,
273 section 2.10): non-validating parsers may also use this method
274 if they are capable of parsing and using content models.
275
276 SAX parsers may return all contiguous whitespace in a single
277 chunk, or they may split it into several chunks; however, all
278 of the characters in any single event must come from the same
279 external entity, so that the Locator provides useful
280 information.
281\end{methoddesc}
282
283\begin{methoddesc}[ContentHandler]{processingInstruction}{target, data}
284 Receive notification of a processing instruction.
285
286 The Parser will invoke this method once for each processing
287 instruction found: note that processing instructions may occur
288 before or after the main document element.
289
290 A SAX parser should never report an XML declaration (XML 1.0,
291 section 2.8) or a text declaration (XML 1.0, section 4.3.1) using
292 this method.
293\end{methoddesc}
294
295\begin{methoddesc}[ContentHandler]{skippedEntity}{name}
296 Receive notification of a skipped entity.
297
298 The Parser will invoke this method once for each entity
299 skipped. Non-validating processors may skip entities if they have
300 not seen the declarations (because, for example, the entity was
301 declared in an external DTD subset). All processors may skip
302 external entities, depending on the values of the
303 \code{http://xml.org/sax/features/external-general-entities} and the
304 \code{http://xml.org/sax/features/external-parameter-entities}
305 properties.
306\end{methoddesc}
307
308
309\subsection{DTDHandler Objects \label{dtd-handler-objects}}
310
311\class{DTDHandler} instances provide the following methods:
312
313\begin{methoddesc}[DTDHandler]{notationDecl}{name, publicId, systemId}
314 Handle a notation declaration event.
315\end{methoddesc}
316
317\begin{methoddesc}[DTDHandler]{unparsedEntityDecl}{name, publicId,
318 systemId, ndata}
319 Handle an unparsed entity declaration event.
320\end{methoddesc}
321
322
323\subsection{EntityResolver Objects \label{entity-resolver-objects}}
324
325\begin{methoddesc}[EntityResolver]{resolveEntity}{publicId, systemId}
326 Resolve the system identifier of an entity and return either the
327 system identifier to read from as a string, or an InputSource to
328 read from. The default implementation returns \var{systemId}.
329\end{methoddesc}