blob: ed0bf6a17f83d70f59dea10124fecd901b750fa2 [file] [log] [blame]
Fred Drake7fbc85c2000-09-23 04:47:56 +00001\section{\module{xml.parsers.expat} ---
Fred Drakeefffe8e2000-10-29 05:10:30 +00002 Fast XML parsing using Expat}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +00003
Fred Drake5ed1dac2001-02-08 15:40:33 +00004% Markup notes:
5%
6% Many of the attributes of the XMLParser objects are callbacks.
7% Since signature information must be presented, these are described
8% using the methoddesc environment. Since they are attributes which
9% are set by client code, in-text references to these attributes
10% should be marked using the \member macro and should not include the
11% parentheses used when marking functions and methods.
12
Fred Drake7fbc85c2000-09-23 04:47:56 +000013\declaremodule{standard}{xml.parsers.expat}
14\modulesynopsis{An interface to the Expat non-validating XML parser.}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000015\moduleauthor{Paul Prescod}{paul@prescod.net}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000016
Fred Drake7fbc85c2000-09-23 04:47:56 +000017\versionadded{2.0}
18
Fred Drakeefffe8e2000-10-29 05:10:30 +000019The \module{xml.parsers.expat} module is a Python interface to the
20Expat\index{Expat} non-validating XML parser.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000021The module provides a single extension type, \class{xmlparser}, that
22represents the current state of an XML parser. After an
23\class{xmlparser} object has been created, various attributes of the object
24can be set to handler functions. When an XML document is then fed to
25the parser, the handler functions are called for the character data
26and markup in the XML document.
Fred Drake7fbc85c2000-09-23 04:47:56 +000027
28This module uses the \module{pyexpat}\refbimodindex{pyexpat} module to
29provide access to the Expat parser. Direct use of the
30\module{pyexpat} module is deprecated.
Fred Drakeefffe8e2000-10-29 05:10:30 +000031
32This module provides one exception and one type object:
33
Fred Drake1d8ad2b2001-02-14 18:54:32 +000034\begin{excdesc}{ExpatError}
Fred Drakee0af35e2001-09-20 20:43:28 +000035 The exception raised when Expat reports an error. See section
36 \ref{expaterror-objects}, ``ExpatError Exceptions,'' for more
37 information on interpreting Expat errors.
Fred Drakeefffe8e2000-10-29 05:10:30 +000038\end{excdesc}
39
Fred Drake1d8ad2b2001-02-14 18:54:32 +000040\begin{excdesc}{error}
41 Alias for \exception{ExpatError}.
42\end{excdesc}
43
Fred Drakeefffe8e2000-10-29 05:10:30 +000044\begin{datadesc}{XMLParserType}
45 The type of the return values from the \function{ParserCreate()}
46 function.
47\end{datadesc}
48
49
Fred Drake7fbc85c2000-09-23 04:47:56 +000050The \module{xml.parsers.expat} module contains two functions:
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000051
52\begin{funcdesc}{ErrorString}{errno}
53Returns an explanatory string for a given error number \var{errno}.
54\end{funcdesc}
55
Fred Drakeefffe8e2000-10-29 05:10:30 +000056\begin{funcdesc}{ParserCreate}{\optional{encoding\optional{,
57 namespace_separator}}}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000058Creates and returns a new \class{xmlparser} object.
59\var{encoding}, if specified, must be a string naming the encoding
60used by the XML data. Expat doesn't support as many encodings as
61Python does, and its repertoire of encodings can't be extended; it
Fred Drake5ed1dac2001-02-08 15:40:33 +000062supports UTF-8, UTF-16, ISO-8859-1 (Latin1), and ASCII. If
63\var{encoding} is given it will override the implicit or explicit
64encoding of the document.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000065
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000066Expat can optionally do XML namespace processing for you, enabled by
Fred Drakeefffe8e2000-10-29 05:10:30 +000067providing a value for \var{namespace_separator}. The value must be a
68one-character string; a \exception{ValueError} will be raised if the
69string has an illegal length (\code{None} is considered the same as
70omission). When namespace processing is enabled, element type names
71and attribute names that belong to a namespace will be expanded. The
72element name passed to the element handlers
Fred Drake5ed1dac2001-02-08 15:40:33 +000073\member{StartElementHandler} and \member{EndElementHandler}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000074will be the concatenation of the namespace URI, the namespace
75separator character, and the local part of the name. If the namespace
Fred Drakeefffe8e2000-10-29 05:10:30 +000076separator is a zero byte (\code{chr(0)}) then the namespace URI and
Fred Drake5ed1dac2001-02-08 15:40:33 +000077the local part will be concatenated without any separator.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000078
Fred Drake2fef3ab2000-11-28 06:38:22 +000079For example, if \var{namespace_separator} is set to a space character
80(\character{ }) and the following document is parsed:
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000081
82\begin{verbatim}
83<?xml version="1.0"?>
84<root xmlns = "http://default-namespace.org/"
85 xmlns:py = "http://www.python.org/ns/">
86 <py:elem1 />
87 <elem2 xmlns="" />
88</root>
89\end{verbatim}
90
Fred Drake5ed1dac2001-02-08 15:40:33 +000091\member{StartElementHandler} will receive the following strings
Fred Draked79c33a2000-09-25 14:14:30 +000092for each element:
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000093
94\begin{verbatim}
95http://default-namespace.org/ root
96http://www.python.org/ns/ elem1
97elem2
98\end{verbatim}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000099\end{funcdesc}
100
Fred Drakef08cbb12000-12-23 22:19:05 +0000101
Fred Drakedce695a2002-06-20 21:06:03 +0000102\begin{seealso}
103 \seetitle[http://www.libexpat.org/]{The Expat XML Parser}
104 {Home page of the Expat project.}
105\end{seealso}
106
107
Fred Drakef08cbb12000-12-23 22:19:05 +0000108\subsection{XMLParser Objects \label{xmlparser-objects}}
109
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000110\class{xmlparser} objects have the following methods:
111
Fred Drake2fef3ab2000-11-28 06:38:22 +0000112\begin{methoddesc}[xmlparser]{Parse}{data\optional{, isfinal}}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000113Parses the contents of the string \var{data}, calling the appropriate
114handler functions to process the parsed data. \var{isfinal} must be
Fred Drakef08cbb12000-12-23 22:19:05 +0000115true on the final call to this method. \var{data} can be the empty
Fred Drakec05cbb02000-07-05 02:03:34 +0000116string at any time.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000117\end{methoddesc}
118
Fred Drakeefffe8e2000-10-29 05:10:30 +0000119\begin{methoddesc}[xmlparser]{ParseFile}{file}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000120Parse XML data reading from the object \var{file}. \var{file} only
121needs to provide the \method{read(\var{nbytes})} method, returning the
122empty string when there's no more data.
123\end{methoddesc}
124
Fred Drakeefffe8e2000-10-29 05:10:30 +0000125\begin{methoddesc}[xmlparser]{SetBase}{base}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000126Sets the base to be used for resolving relative URIs in system
127identifiers in declarations. Resolving relative identifiers is left
128to the application: this value will be passed through as the
129\var{base} argument to the \function{ExternalEntityRefHandler},
130\function{NotationDeclHandler}, and
131\function{UnparsedEntityDeclHandler} functions.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000132\end{methoddesc}
133
Fred Drakeefffe8e2000-10-29 05:10:30 +0000134\begin{methoddesc}[xmlparser]{GetBase}{}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000135Returns a string containing the base set by a previous call to
136\method{SetBase()}, or \code{None} if
137\method{SetBase()} hasn't been called.
138\end{methoddesc}
139
Fred Drake1d8ad2b2001-02-14 18:54:32 +0000140\begin{methoddesc}[xmlparser]{GetInputContext}{}
141Returns the input data that generated the current event as a string.
142The data is in the encoding of the entity which contains the text.
143When called while an event handler is not active, the return value is
144\code{None}.
145\versionadded{2.1}
146\end{methoddesc}
147
Fred Drakef08cbb12000-12-23 22:19:05 +0000148\begin{methoddesc}[xmlparser]{ExternalEntityParserCreate}{context\optional{,
149 encoding}}
150Create a ``child'' parser which can be used to parse an external
151parsed entity referred to by content parsed by the parent parser. The
Fred Drakeb162d182001-01-04 05:48:08 +0000152\var{context} parameter should be the string passed to the
Fred Drakef08cbb12000-12-23 22:19:05 +0000153\method{ExternalEntityRefHandler()} handler function, described below.
Guido van Rossum4ca94712007-07-23 17:42:32 +0000154The child parser is created with the \member{ordered_attributes}
155and \member{specified_attributes} set to the
Fred Drake5ed1dac2001-02-08 15:40:33 +0000156values of this parser.
Fred Drakef08cbb12000-12-23 22:19:05 +0000157\end{methoddesc}
158
Fred Draked62d5072004-08-10 17:18:32 +0000159\begin{methoddesc}[xmlparser]{UseForeignDTD}{\optional{flag}}
160Calling this with a true value for \var{flag} (the default) will cause
161Expat to call the \member{ExternalEntityRefHandler} with
162\constant{None} for all arguments to allow an alternate DTD to be
163loaded. If the document does not contain a document type declaration,
164the \member{ExternalEntityRefHandler} will still be called, but the
165\member{StartDoctypeDeclHandler} and \member{EndDoctypeDeclHandler}
166will not be called.
167
168Passing a false value for \var{flag} will cancel a previous call that
169passed a true value, but otherwise has no effect.
170
171This method can only be called before the \method{Parse()} or
172\method{ParseFile()} methods are called; calling it after either of
173those have been called causes \exception{ExpatError} to be raised with
174the \member{code} attribute set to
175\constant{errors.XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING}.
176
177\versionadded{2.3}
178\end{methoddesc}
179
Fred Drakeefffe8e2000-10-29 05:10:30 +0000180
Fred Draked79c33a2000-09-25 14:14:30 +0000181\class{xmlparser} objects have the following attributes:
Andrew M. Kuchling0690c862000-08-17 23:15:21 +0000182
Fred Drakef0b095d2002-07-17 20:31:52 +0000183\begin{memberdesc}[xmlparser]{buffer_size}
184The size of the buffer used when \member{buffer_text} is true. This
185value cannot be changed at this time.
186\versionadded{2.3}
187\end{memberdesc}
188
189\begin{memberdesc}[xmlparser]{buffer_text}
190Setting this to true causes the \class{xmlparser} object to buffer
191textual content returned by Expat to avoid multiple calls to the
192\method{CharacterDataHandler()} callback whenever possible. This can
193improve performance substantially since Expat normally breaks
194character data into chunks at every line ending. This attribute is
195false by default, and may be changed at any time.
196\versionadded{2.3}
197\end{memberdesc}
198
199\begin{memberdesc}[xmlparser]{buffer_used}
200If \member{buffer_text} is enabled, the number of bytes stored in the
201buffer. These bytes represent UTF-8 encoded text. This attribute has
202no meaningful interpretation when \member{buffer_text} is false.
203\versionadded{2.3}
204\end{memberdesc}
205
Fred Drake5ed1dac2001-02-08 15:40:33 +0000206\begin{memberdesc}[xmlparser]{ordered_attributes}
207Setting this attribute to a non-zero integer causes the attributes to
208be reported as a list rather than a dictionary. The attributes are
209presented in the order found in the document text. For each
210attribute, two list entries are presented: the attribute name and the
211attribute value. (Older versions of this module also used this
212format.) By default, this attribute is false; it may be changed at
213any time.
214\versionadded{2.1}
215\end{memberdesc}
216
Fred Drake5ed1dac2001-02-08 15:40:33 +0000217\begin{memberdesc}[xmlparser]{specified_attributes}
218If set to a non-zero integer, the parser will report only those
219attributes which were specified in the document instance and not those
220which were derived from attribute declarations. Applications which
221set this need to be especially careful to use what additional
222information is available from the declarations as needed to comply
223with the standards for the behavior of XML processors. By default,
224this attribute is false; it may be changed at any time.
225\versionadded{2.1}
226\end{memberdesc}
227
Andrew M. Kuchling0690c862000-08-17 23:15:21 +0000228The following attributes contain values relating to the most recent
229error encountered by an \class{xmlparser} object, and will only have
230correct values once a call to \method{Parse()} or \method{ParseFile()}
Fred Drake523ec572001-02-15 05:37:51 +0000231has raised a \exception{xml.parsers.expat.ExpatError} exception.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000232
Fred Drakeefffe8e2000-10-29 05:10:30 +0000233\begin{memberdesc}[xmlparser]{ErrorByteIndex}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000234Byte index at which an error occurred.
Fred Drakeefffe8e2000-10-29 05:10:30 +0000235\end{memberdesc}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000236
Fred Drakeefffe8e2000-10-29 05:10:30 +0000237\begin{memberdesc}[xmlparser]{ErrorCode}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000238Numeric code specifying the problem. This value can be passed to the
239\function{ErrorString()} function, or compared to one of the constants
Fred Drake523ec572001-02-15 05:37:51 +0000240defined in the \code{errors} object.
Fred Drakeefffe8e2000-10-29 05:10:30 +0000241\end{memberdesc}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000242
Fred Drakeefffe8e2000-10-29 05:10:30 +0000243\begin{memberdesc}[xmlparser]{ErrorColumnNumber}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000244Column number at which an error occurred.
Fred Drakeefffe8e2000-10-29 05:10:30 +0000245\end{memberdesc}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000246
Fred Drakeefffe8e2000-10-29 05:10:30 +0000247\begin{memberdesc}[xmlparser]{ErrorLineNumber}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000248Line number at which an error occurred.
Fred Drakeefffe8e2000-10-29 05:10:30 +0000249\end{memberdesc}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000250
Dave Cole3203efb2004-08-26 00:37:31 +0000251The following attributes contain values relating to the current parse
252location in an \class{xmlparser} object. During a callback reporting
253a parse event they indicate the location of the first of the sequence
254of characters that generated the event. When called outside of a
255callback, the position indicated will be just past the last parse
256event (regardless of whether there was an associated callback).
257\versionadded{2.4}
258
259\begin{memberdesc}[xmlparser]{CurrentByteIndex}
260Current byte index in the parser input.
261\end{memberdesc}
262
263\begin{memberdesc}[xmlparser]{CurrentColumnNumber}
264Current column number in the parser input.
265\end{memberdesc}
266
267\begin{memberdesc}[xmlparser]{CurrentLineNumber}
268Current line number in the parser input.
269\end{memberdesc}
270
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000271Here is the list of handlers that can be set. To set a handler on an
Fred Drakec05cbb02000-07-05 02:03:34 +0000272\class{xmlparser} object \var{o}, use
273\code{\var{o}.\var{handlername} = \var{func}}. \var{handlername} must
274be taken from the following list, and \var{func} must be a callable
275object accepting the correct number of arguments. The arguments are
276all strings, unless otherwise stated.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000277
Fred Drake5ed1dac2001-02-08 15:40:33 +0000278\begin{methoddesc}[xmlparser]{XmlDeclHandler}{version, encoding, standalone}
279Called when the XML declaration is parsed. The XML declaration is the
280(optional) declaration of the applicable version of the XML
281recommendation, the encoding of the document text, and an optional
282``standalone'' declaration. \var{version} and \var{encoding} will be
Guido van Rossum4ca94712007-07-23 17:42:32 +0000283strings, and \var{standalone} will be \code{1} if the document is
Fred Drake5ed1dac2001-02-08 15:40:33 +0000284declared standalone, \code{0} if it is declared not to be standalone,
285or \code{-1} if the standalone clause was omitted.
286This is only available with Expat version 1.95.0 or newer.
287\versionadded{2.1}
288\end{methoddesc}
289
290\begin{methoddesc}[xmlparser]{StartDoctypeDeclHandler}{doctypeName,
291 systemId, publicId,
292 has_internal_subset}
293Called when Expat begins parsing the document type declaration
294(\code{<!DOCTYPE \ldots}). The \var{doctypeName} is provided exactly
295as presented. The \var{systemId} and \var{publicId} parameters give
296the system and public identifiers if specified, or \code{None} if
297omitted. \var{has_internal_subset} will be true if the document
298contains and internal document declaration subset.
299This requires Expat version 1.2 or newer.
300\end{methoddesc}
301
302\begin{methoddesc}[xmlparser]{EndDoctypeDeclHandler}{}
Raymond Hettinger68804312005-01-01 00:28:46 +0000303Called when Expat is done parsing the document type declaration.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000304This requires Expat version 1.2 or newer.
305\end{methoddesc}
306
307\begin{methoddesc}[xmlparser]{ElementDeclHandler}{name, model}
308Called once for each element type declaration. \var{name} is the name
309of the element type, and \var{model} is a representation of the
310content model.
311\end{methoddesc}
312
313\begin{methoddesc}[xmlparser]{AttlistDeclHandler}{elname, attname,
314 type, default, required}
315Called for each declared attribute for an element type. If an
316attribute list declaration declares three attributes, this handler is
317called three times, once for each attribute. \var{elname} is the name
318of the element to which the declaration applies and \var{attname} is
319the name of the attribute declared. The attribute type is a string
320passed as \var{type}; the possible values are \code{'CDATA'},
321\code{'ID'}, \code{'IDREF'}, ...
322\var{default} gives the default value for the attribute used when the
323attribute is not specified by the document instance, or \code{None} if
324there is no default value (\code{\#IMPLIED} values). If the attribute
325is required to be given in the document instance, \var{required} will
326be true.
327This requires Expat version 1.95.0 or newer.
328\end{methoddesc}
329
Fred Drakeefffe8e2000-10-29 05:10:30 +0000330\begin{methoddesc}[xmlparser]{StartElementHandler}{name, attributes}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000331Called for the start of every element. \var{name} is a string
332containing the element name, and \var{attributes} is a dictionary
333mapping attribute names to their values.
334\end{methoddesc}
335
Fred Drakeefffe8e2000-10-29 05:10:30 +0000336\begin{methoddesc}[xmlparser]{EndElementHandler}{name}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000337Called for the end of every element.
338\end{methoddesc}
339
Fred Drakeefffe8e2000-10-29 05:10:30 +0000340\begin{methoddesc}[xmlparser]{ProcessingInstructionHandler}{target, data}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000341Called for every processing instruction.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000342\end{methoddesc}
343
Fred Drakeefffe8e2000-10-29 05:10:30 +0000344\begin{methoddesc}[xmlparser]{CharacterDataHandler}{data}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000345Called for character data. This will be called for normal character
346data, CDATA marked content, and ignorable whitespace. Applications
347which must distinguish these cases can use the
348\member{StartCdataSectionHandler}, \member{EndCdataSectionHandler},
349and \member{ElementDeclHandler} callbacks to collect the required
350information.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000351\end{methoddesc}
352
Fred Drakeefffe8e2000-10-29 05:10:30 +0000353\begin{methoddesc}[xmlparser]{UnparsedEntityDeclHandler}{entityName, base,
354 systemId, publicId,
355 notationName}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000356Called for unparsed (NDATA) entity declarations. This is only present
357for version 1.2 of the Expat library; for more recent versions, use
358\member{EntityDeclHandler} instead. (The underlying function in the
359Expat library has been declared obsolete.)
360\end{methoddesc}
361
362\begin{methoddesc}[xmlparser]{EntityDeclHandler}{entityName,
363 is_parameter_entity, value,
364 base, systemId,
365 publicId,
366 notationName}
367Called for all entity declarations. For parameter and internal
368entities, \var{value} will be a string giving the declared contents
369of the entity; this will be \code{None} for external entities. The
370\var{notationName} parameter will be \code{None} for parsed entities,
371and the name of the notation for unparsed entities.
Raymond Hettinger68804312005-01-01 00:28:46 +0000372\var{is_parameter_entity} will be true if the entity is a parameter
Fred Drake5ed1dac2001-02-08 15:40:33 +0000373entity or false for general entities (most applications only need to
374be concerned with general entities).
375This is only available starting with version 1.95.0 of the Expat
376library.
377\versionadded{2.1}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000378\end{methoddesc}
379
Fred Drakeefffe8e2000-10-29 05:10:30 +0000380\begin{methoddesc}[xmlparser]{NotationDeclHandler}{notationName, base,
381 systemId, publicId}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000382Called for notation declarations. \var{notationName}, \var{base}, and
383\var{systemId}, and \var{publicId} are strings if given. If the
384public identifier is omitted, \var{publicId} will be \code{None}.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000385\end{methoddesc}
386
Fred Drakeefffe8e2000-10-29 05:10:30 +0000387\begin{methoddesc}[xmlparser]{StartNamespaceDeclHandler}{prefix, uri}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000388Called when an element contains a namespace declaration. Namespace
389declarations are processed before the \member{StartElementHandler} is
390called for the element on which declarations are placed.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000391\end{methoddesc}
392
Fred Drakeefffe8e2000-10-29 05:10:30 +0000393\begin{methoddesc}[xmlparser]{EndNamespaceDeclHandler}{prefix}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000394Called when the closing tag is reached for an element
Fred Drake5ed1dac2001-02-08 15:40:33 +0000395that contained a namespace declaration. This is called once for each
396namespace declaration on the element in the reverse of the order for
397which the \member{StartNamespaceDeclHandler} was called to indicate
398the start of each namespace declaration's scope. Calls to this
399handler are made after the corresponding \member{EndElementHandler}
400for the end of the element.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000401\end{methoddesc}
402
Fred Drakeefffe8e2000-10-29 05:10:30 +0000403\begin{methoddesc}[xmlparser]{CommentHandler}{data}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000404Called for comments. \var{data} is the text of the comment, excluding
Fred Drake523ec572001-02-15 05:37:51 +0000405the leading `\code{<!-}\code{-}' and trailing `\code{-}\code{->}'.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000406\end{methoddesc}
407
Fred Drakeefffe8e2000-10-29 05:10:30 +0000408\begin{methoddesc}[xmlparser]{StartCdataSectionHandler}{}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000409Called at the start of a CDATA section. This and
Georg Brandl08caadc2005-12-16 19:23:33 +0000410\member{EndCdataSectionHandler} are needed to be able to identify
Fred Drake5ed1dac2001-02-08 15:40:33 +0000411the syntactical start and end for CDATA sections.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000412\end{methoddesc}
413
Fred Drakeefffe8e2000-10-29 05:10:30 +0000414\begin{methoddesc}[xmlparser]{EndCdataSectionHandler}{}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000415Called at the end of a CDATA section.
416\end{methoddesc}
417
Fred Drakeefffe8e2000-10-29 05:10:30 +0000418\begin{methoddesc}[xmlparser]{DefaultHandler}{data}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000419Called for any characters in the XML document for
420which no applicable handler has been specified. This means
421characters that are part of a construct which could be reported, but
422for which no handler has been supplied.
423\end{methoddesc}
424
Fred Drakeefffe8e2000-10-29 05:10:30 +0000425\begin{methoddesc}[xmlparser]{DefaultHandlerExpand}{data}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000426This is the same as the \function{DefaultHandler},
427but doesn't inhibit expansion of internal entities.
428The entity reference will not be passed to the default handler.
429\end{methoddesc}
430
Fred Drake5ed1dac2001-02-08 15:40:33 +0000431\begin{methoddesc}[xmlparser]{NotStandaloneHandler}{} Called if the
432XML document hasn't been declared as being a standalone document.
433This happens when there is an external subset or a reference to a
434parameter entity, but the XML declaration does not set standalone to
435\code{yes} in an XML declaration. If this handler returns \code{0},
436then the parser will throw an \constant{XML_ERROR_NOT_STANDALONE}
437error. If this handler is not set, no exception is raised by the
438parser for this condition.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000439\end{methoddesc}
440
Fred Drakeefffe8e2000-10-29 05:10:30 +0000441\begin{methoddesc}[xmlparser]{ExternalEntityRefHandler}{context, base,
442 systemId, publicId}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000443Called for references to external entities. \var{base} is the current
444base, as set by a previous call to \method{SetBase()}. The public and
445system identifiers, \var{systemId} and \var{publicId}, are strings if
446given; if the public identifier is not given, \var{publicId} will be
Fred Drake523ec572001-02-15 05:37:51 +0000447\code{None}. The \var{context} value is opaque and should only be
448used as described below.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000449
450For external entities to be parsed, this handler must be implemented.
451It is responsible for creating the sub-parser using
Fred Drake523ec572001-02-15 05:37:51 +0000452\code{ExternalEntityParserCreate(\var{context})}, initializing it with
453the appropriate callbacks, and parsing the entity. This handler
454should return an integer; if it returns \code{0}, the parser will
455throw an \constant{XML_ERROR_EXTERNAL_ENTITY_HANDLING} error,
456otherwise parsing will continue.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000457
458If this handler is not provided, external entities are reported by the
459\member{DefaultHandler} callback, if provided.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000460\end{methoddesc}
461
462
Fred Drake1d8ad2b2001-02-14 18:54:32 +0000463\subsection{ExpatError Exceptions \label{expaterror-objects}}
464\sectionauthor{Fred L. Drake, Jr.}{fdrake@acm.org}
465
466\exception{ExpatError} exceptions have a number of interesting
467attributes:
468
469\begin{memberdesc}[ExpatError]{code}
470 Expat's internal error number for the specific error. This will
471 match one of the constants defined in the \code{errors} object from
472 this module.
473 \versionadded{2.1}
474\end{memberdesc}
475
476\begin{memberdesc}[ExpatError]{lineno}
477 Line number on which the error was detected. The first line is
478 numbered \code{1}.
479 \versionadded{2.1}
480\end{memberdesc}
481
482\begin{memberdesc}[ExpatError]{offset}
483 Character offset into the line where the error occurred. The first
484 column is numbered \code{0}.
485 \versionadded{2.1}
486\end{memberdesc}
487
488
Fred Drake7fbc85c2000-09-23 04:47:56 +0000489\subsection{Example \label{expat-example}}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000490
Fred Drakec05cbb02000-07-05 02:03:34 +0000491The following program defines three handlers that just print out their
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000492arguments.
493
494\begin{verbatim}
Fred Drake7fbc85c2000-09-23 04:47:56 +0000495import xml.parsers.expat
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000496
497# 3 handler functions
498def start_element(name, attrs):
499 print 'Start element:', name, attrs
500def end_element(name):
501 print 'End element:', name
502def char_data(data):
503 print 'Character data:', repr(data)
504
Fred Drake7fbc85c2000-09-23 04:47:56 +0000505p = xml.parsers.expat.ParserCreate()
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000506
507p.StartElementHandler = start_element
Fred Drake7fbc85c2000-09-23 04:47:56 +0000508p.EndElementHandler = end_element
509p.CharacterDataHandler = char_data
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000510
511p.Parse("""<?xml version="1.0"?>
512<parent id="top"><child1 name="paul">Text goes here</child1>
513<child2 name="fred">More text</child2>
Fred Drakea41b2bb2002-12-03 22:57:37 +0000514</parent>""", 1)
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000515\end{verbatim}
516
517The output from this program is:
518
519\begin{verbatim}
520Start element: parent {'id': 'top'}
521Start element: child1 {'name': 'paul'}
522Character data: 'Text goes here'
523End element: child1
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000524Character data: '\n'
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000525Start element: child2 {'name': 'fred'}
526Character data: 'More text'
527End element: child2
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000528Character data: '\n'
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000529End element: parent
530\end{verbatim}
Fred Drakec05cbb02000-07-05 02:03:34 +0000531
532
Fred Drake5ed1dac2001-02-08 15:40:33 +0000533\subsection{Content Model Descriptions \label{expat-content-models}}
534\sectionauthor{Fred L. Drake, Jr.}{fdrake@acm.org}
535
536Content modules are described using nested tuples. Each tuple
537contains four values: the type, the quantifier, the name, and a tuple
538of children. Children are simply additional content module
539descriptions.
540
541The values of the first two fields are constants defined in the
542\code{model} object of the \module{xml.parsers.expat} module. These
543constants can be collected in two groups: the model type group and the
544quantifier group.
545
546The constants in the model type group are:
547
548\begin{datadescni}{XML_CTYPE_ANY}
549The element named by the model name was declared to have a content
550model of \code{ANY}.
551\end{datadescni}
552
553\begin{datadescni}{XML_CTYPE_CHOICE}
554The named element allows a choice from a number of options; this is
555used for content models such as \code{(A | B | C)}.
556\end{datadescni}
557
558\begin{datadescni}{XML_CTYPE_EMPTY}
559Elements which are declared to be \code{EMPTY} have this model type.
560\end{datadescni}
561
562\begin{datadescni}{XML_CTYPE_MIXED}
563\end{datadescni}
564
565\begin{datadescni}{XML_CTYPE_NAME}
566\end{datadescni}
567
568\begin{datadescni}{XML_CTYPE_SEQ}
569Models which represent a series of models which follow one after the
570other are indicated with this model type. This is used for models
571such as \code{(A, B, C)}.
572\end{datadescni}
573
574
575The constants in the quantifier group are:
576
577\begin{datadescni}{XML_CQUANT_NONE}
Fred Drakee0af35e2001-09-20 20:43:28 +0000578No modifier is given, so it can appear exactly once, as for \code{A}.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000579\end{datadescni}
580
581\begin{datadescni}{XML_CQUANT_OPT}
Fred Drakee0af35e2001-09-20 20:43:28 +0000582The model is optional: it can appear once or not at all, as for
Fred Drake5ed1dac2001-02-08 15:40:33 +0000583\code{A?}.
584\end{datadescni}
585
586\begin{datadescni}{XML_CQUANT_PLUS}
Fred Drakee0af35e2001-09-20 20:43:28 +0000587The model must occur one or more times (like \code{A+}).
Fred Drake5ed1dac2001-02-08 15:40:33 +0000588\end{datadescni}
589
590\begin{datadescni}{XML_CQUANT_REP}
591The model must occur zero or more times, as for \code{A*}.
592\end{datadescni}
593
594
Fred Drake7fbc85c2000-09-23 04:47:56 +0000595\subsection{Expat error constants \label{expat-errors}}
Fred Drakec05cbb02000-07-05 02:03:34 +0000596
Fred Drake1d8ad2b2001-02-14 18:54:32 +0000597The following constants are provided in the \code{errors} object of
598the \refmodule{xml.parsers.expat} module. These constants are useful
599in interpreting some of the attributes of the \exception{ExpatError}
600exception objects raised when an error has occurred.
Fred Drakec05cbb02000-07-05 02:03:34 +0000601
Fred Drake7fbc85c2000-09-23 04:47:56 +0000602The \code{errors} object has the following attributes:
Fred Drakec05cbb02000-07-05 02:03:34 +0000603
Fred Drake5ed1dac2001-02-08 15:40:33 +0000604\begin{datadescni}{XML_ERROR_ASYNC_ENTITY}
605\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000606
Fred Drake5ed1dac2001-02-08 15:40:33 +0000607\begin{datadescni}{XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF}
608An entity reference in an attribute value referred to an external
609entity instead of an internal entity.
610\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000611
Fred Drake5ed1dac2001-02-08 15:40:33 +0000612\begin{datadescni}{XML_ERROR_BAD_CHAR_REF}
Fred Drakee0af35e2001-09-20 20:43:28 +0000613A character reference referred to a character which is illegal in XML
Raymond Hettingerbf3a7522003-05-12 03:23:51 +0000614(for example, character \code{0}, or `\code{\&\#0;}').
Fred Drake5ed1dac2001-02-08 15:40:33 +0000615\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000616
Fred Drake5ed1dac2001-02-08 15:40:33 +0000617\begin{datadescni}{XML_ERROR_BINARY_ENTITY_REF}
Fred Drakee0af35e2001-09-20 20:43:28 +0000618An entity reference referred to an entity which was declared with a
619notation, so cannot be parsed.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000620\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000621
Fred Drake5ed1dac2001-02-08 15:40:33 +0000622\begin{datadescni}{XML_ERROR_DUPLICATE_ATTRIBUTE}
Fred Drakeacab3d62000-07-11 16:30:30 +0000623An attribute was used more than once in a start tag.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000624\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000625
Fred Drake5ed1dac2001-02-08 15:40:33 +0000626\begin{datadescni}{XML_ERROR_INCORRECT_ENCODING}
627\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000628
Fred Drake5ed1dac2001-02-08 15:40:33 +0000629\begin{datadescni}{XML_ERROR_INVALID_TOKEN}
Fred Drakee0af35e2001-09-20 20:43:28 +0000630Raised when an input byte could not properly be assigned to a
631character; for example, a NUL byte (value \code{0}) in a UTF-8 input
632stream.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000633\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000634
Fred Drake5ed1dac2001-02-08 15:40:33 +0000635\begin{datadescni}{XML_ERROR_JUNK_AFTER_DOC_ELEMENT}
Fred Drakeacab3d62000-07-11 16:30:30 +0000636Something other than whitespace occurred after the document element.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000637\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000638
Fred Drake5ed1dac2001-02-08 15:40:33 +0000639\begin{datadescni}{XML_ERROR_MISPLACED_XML_PI}
Fred Drakee0af35e2001-09-20 20:43:28 +0000640An XML declaration was found somewhere other than the start of the
641input data.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000642\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000643
Fred Drake5ed1dac2001-02-08 15:40:33 +0000644\begin{datadescni}{XML_ERROR_NO_ELEMENTS}
Fred Drakee0af35e2001-09-20 20:43:28 +0000645The document contains no elements (XML requires all documents to
646contain exactly one top-level element)..
Fred Drake5ed1dac2001-02-08 15:40:33 +0000647\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000648
Fred Drake5ed1dac2001-02-08 15:40:33 +0000649\begin{datadescni}{XML_ERROR_NO_MEMORY}
Fred Drakeacab3d62000-07-11 16:30:30 +0000650Expat was not able to allocate memory internally.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000651\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000652
Fred Drake5ed1dac2001-02-08 15:40:33 +0000653\begin{datadescni}{XML_ERROR_PARAM_ENTITY_REF}
Fred Drakee0af35e2001-09-20 20:43:28 +0000654A parameter entity reference was found where it was not allowed.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000655\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000656
Fred Drake5ed1dac2001-02-08 15:40:33 +0000657\begin{datadescni}{XML_ERROR_PARTIAL_CHAR}
Fred Drakefb568ca2004-08-10 16:47:18 +0000658An incomplete character was found in the input.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000659\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000660
Fred Drake5ed1dac2001-02-08 15:40:33 +0000661\begin{datadescni}{XML_ERROR_RECURSIVE_ENTITY_REF}
Fred Drakee0af35e2001-09-20 20:43:28 +0000662An entity reference contained another reference to the same entity;
663possibly via a different name, and possibly indirectly.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000664\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000665
Fred Drake5ed1dac2001-02-08 15:40:33 +0000666\begin{datadescni}{XML_ERROR_SYNTAX}
Fred Drakeacab3d62000-07-11 16:30:30 +0000667Some unspecified syntax error was encountered.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000668\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000669
Fred Drake5ed1dac2001-02-08 15:40:33 +0000670\begin{datadescni}{XML_ERROR_TAG_MISMATCH}
Fred Drakeacab3d62000-07-11 16:30:30 +0000671An end tag did not match the innermost open start tag.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000672\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000673
Fred Drake5ed1dac2001-02-08 15:40:33 +0000674\begin{datadescni}{XML_ERROR_UNCLOSED_TOKEN}
Fred Drakee0af35e2001-09-20 20:43:28 +0000675Some token (such as a start tag) was not closed before the end of the
676stream or the next token was encountered.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000677\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000678
Fred Drake5ed1dac2001-02-08 15:40:33 +0000679\begin{datadescni}{XML_ERROR_UNDEFINED_ENTITY}
Fred Drakeacab3d62000-07-11 16:30:30 +0000680A reference was made to a entity which was not defined.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000681\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000682
Fred Drake5ed1dac2001-02-08 15:40:33 +0000683\begin{datadescni}{XML_ERROR_UNKNOWN_ENCODING}
Fred Drakeacab3d62000-07-11 16:30:30 +0000684The document encoding is not supported by Expat.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000685\end{datadescni}
Fred Drakefb568ca2004-08-10 16:47:18 +0000686
687\begin{datadescni}{XML_ERROR_UNCLOSED_CDATA_SECTION}
688A CDATA marked section was not closed.
689\end{datadescni}
690
691\begin{datadescni}{XML_ERROR_EXTERNAL_ENTITY_HANDLING}
692\end{datadescni}
693
694\begin{datadescni}{XML_ERROR_NOT_STANDALONE}
695The parser determined that the document was not ``standalone'' though
696it declared itself to be in the XML declaration, and the
697\member{NotStandaloneHandler} was set and returned \code{0}.
698\end{datadescni}
699
700\begin{datadescni}{XML_ERROR_UNEXPECTED_STATE}
701\end{datadescni}
702
703\begin{datadescni}{XML_ERROR_ENTITY_DECLARED_IN_PE}
704\end{datadescni}
705
706\begin{datadescni}{XML_ERROR_FEATURE_REQUIRES_XML_DTD}
707An operation was requested that requires DTD support to be compiled
708in, but Expat was configured without DTD support. This should never
709be reported by a standard build of the \module{xml.parsers.expat}
710module.
711\end{datadescni}
712
713\begin{datadescni}{XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING}
714A behavioral change was requested after parsing started that can only
715be changed before parsing has started. This is (currently) only
716raised by \method{UseForeignDTD()}.
717\end{datadescni}
718
719\begin{datadescni}{XML_ERROR_UNBOUND_PREFIX}
720An undeclared prefix was found when namespace processing was enabled.
721\end{datadescni}
722
723\begin{datadescni}{XML_ERROR_UNDECLARING_PREFIX}
724The document attempted to remove the namespace declaration associated
725with a prefix.
726\end{datadescni}
727
728\begin{datadescni}{XML_ERROR_INCOMPLETE_PE}
729A parameter entity contained incomplete markup.
730\end{datadescni}
731
732\begin{datadescni}{XML_ERROR_XML_DECL}
733The document contained no document element at all.
734\end{datadescni}
735
736\begin{datadescni}{XML_ERROR_TEXT_DECL}
737There was an error parsing a text declaration in an external entity.
738\end{datadescni}
739
740\begin{datadescni}{XML_ERROR_PUBLICID}
741Characters were found in the public id that are not allowed.
742\end{datadescni}
743
744\begin{datadescni}{XML_ERROR_SUSPENDED}
745The requested operation was made on a suspended parser, but isn't
746allowed. This includes attempts to provide additional input or to
747stop the parser.
748\end{datadescni}
749
750\begin{datadescni}{XML_ERROR_NOT_SUSPENDED}
751An attempt to resume the parser was made when the parser had not been
752suspended.
753\end{datadescni}
754
755\begin{datadescni}{XML_ERROR_ABORTED}
756This should not be reported to Python applications.
757\end{datadescni}
758
759\begin{datadescni}{XML_ERROR_FINISHED}
760The requested operation was made on a parser which was finished
761parsing input, but isn't allowed. This includes attempts to provide
762additional input or to stop the parser.
763\end{datadescni}
764
765\begin{datadescni}{XML_ERROR_SUSPEND_PE}
766\end{datadescni}