blob: 5b1c7372a3eab58b14146f30ed10f708ab1876e2 [file] [log] [blame]
Fred Drake7fbc85c2000-09-23 04:47:56 +00001\section{\module{xml.parsers.expat} ---
Fred Drakeefffe8e2000-10-29 05:10:30 +00002 Fast XML parsing using Expat}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +00003
Fred Drake5ed1dac2001-02-08 15:40:33 +00004% Markup notes:
5%
6% Many of the attributes of the XMLParser objects are callbacks.
7% Since signature information must be presented, these are described
8% using the methoddesc environment. Since they are attributes which
9% are set by client code, in-text references to these attributes
10% should be marked using the \member macro and should not include the
11% parentheses used when marking functions and methods.
12
Fred Drake7fbc85c2000-09-23 04:47:56 +000013\declaremodule{standard}{xml.parsers.expat}
14\modulesynopsis{An interface to the Expat non-validating XML parser.}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000015\moduleauthor{Paul Prescod}{paul@prescod.net}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000016
Fred Drake7fbc85c2000-09-23 04:47:56 +000017\versionadded{2.0}
18
Fred Drakeefffe8e2000-10-29 05:10:30 +000019The \module{xml.parsers.expat} module is a Python interface to the
20Expat\index{Expat} non-validating XML parser.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000021The module provides a single extension type, \class{xmlparser}, that
22represents the current state of an XML parser. After an
23\class{xmlparser} object has been created, various attributes of the object
24can be set to handler functions. When an XML document is then fed to
25the parser, the handler functions are called for the character data
26and markup in the XML document.
Fred Drake7fbc85c2000-09-23 04:47:56 +000027
28This module uses the \module{pyexpat}\refbimodindex{pyexpat} module to
29provide access to the Expat parser. Direct use of the
30\module{pyexpat} module is deprecated.
Fred Drakeefffe8e2000-10-29 05:10:30 +000031
32This module provides one exception and one type object:
33
Fred Drake1d8ad2b2001-02-14 18:54:32 +000034\begin{excdesc}{ExpatError}
Fred Drakee0af35e2001-09-20 20:43:28 +000035 The exception raised when Expat reports an error. See section
36 \ref{expaterror-objects}, ``ExpatError Exceptions,'' for more
37 information on interpreting Expat errors.
Fred Drakeefffe8e2000-10-29 05:10:30 +000038\end{excdesc}
39
Fred Drake1d8ad2b2001-02-14 18:54:32 +000040\begin{excdesc}{error}
41 Alias for \exception{ExpatError}.
42\end{excdesc}
43
Fred Drakeefffe8e2000-10-29 05:10:30 +000044\begin{datadesc}{XMLParserType}
45 The type of the return values from the \function{ParserCreate()}
46 function.
47\end{datadesc}
48
49
Fred Drake7fbc85c2000-09-23 04:47:56 +000050The \module{xml.parsers.expat} module contains two functions:
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000051
52\begin{funcdesc}{ErrorString}{errno}
53Returns an explanatory string for a given error number \var{errno}.
54\end{funcdesc}
55
Fred Drakeefffe8e2000-10-29 05:10:30 +000056\begin{funcdesc}{ParserCreate}{\optional{encoding\optional{,
57 namespace_separator}}}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000058Creates and returns a new \class{xmlparser} object.
59\var{encoding}, if specified, must be a string naming the encoding
60used by the XML data. Expat doesn't support as many encodings as
61Python does, and its repertoire of encodings can't be extended; it
Fred Drake5ed1dac2001-02-08 15:40:33 +000062supports UTF-8, UTF-16, ISO-8859-1 (Latin1), and ASCII. If
63\var{encoding} is given it will override the implicit or explicit
64encoding of the document.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000065
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000066Expat can optionally do XML namespace processing for you, enabled by
Fred Drakeefffe8e2000-10-29 05:10:30 +000067providing a value for \var{namespace_separator}. The value must be a
68one-character string; a \exception{ValueError} will be raised if the
69string has an illegal length (\code{None} is considered the same as
70omission). When namespace processing is enabled, element type names
71and attribute names that belong to a namespace will be expanded. The
72element name passed to the element handlers
Fred Drake5ed1dac2001-02-08 15:40:33 +000073\member{StartElementHandler} and \member{EndElementHandler}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000074will be the concatenation of the namespace URI, the namespace
75separator character, and the local part of the name. If the namespace
Fred Drakeefffe8e2000-10-29 05:10:30 +000076separator is a zero byte (\code{chr(0)}) then the namespace URI and
Fred Drake5ed1dac2001-02-08 15:40:33 +000077the local part will be concatenated without any separator.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000078
Fred Drake2fef3ab2000-11-28 06:38:22 +000079For example, if \var{namespace_separator} is set to a space character
80(\character{ }) and the following document is parsed:
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000081
82\begin{verbatim}
83<?xml version="1.0"?>
84<root xmlns = "http://default-namespace.org/"
85 xmlns:py = "http://www.python.org/ns/">
86 <py:elem1 />
87 <elem2 xmlns="" />
88</root>
89\end{verbatim}
90
Fred Drake5ed1dac2001-02-08 15:40:33 +000091\member{StartElementHandler} will receive the following strings
Fred Draked79c33a2000-09-25 14:14:30 +000092for each element:
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000093
94\begin{verbatim}
95http://default-namespace.org/ root
96http://www.python.org/ns/ elem1
97elem2
98\end{verbatim}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000099\end{funcdesc}
100
Fred Drakef08cbb12000-12-23 22:19:05 +0000101
Fred Drakedce695aa2002-06-20 21:06:03 +0000102\begin{seealso}
103 \seetitle[http://www.libexpat.org/]{The Expat XML Parser}
104 {Home page of the Expat project.}
105\end{seealso}
106
107
Fred Drakef08cbb12000-12-23 22:19:05 +0000108\subsection{XMLParser Objects \label{xmlparser-objects}}
109
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000110\class{xmlparser} objects have the following methods:
111
Fred Drake2fef3ab2000-11-28 06:38:22 +0000112\begin{methoddesc}[xmlparser]{Parse}{data\optional{, isfinal}}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000113Parses the contents of the string \var{data}, calling the appropriate
114handler functions to process the parsed data. \var{isfinal} must be
Fred Drakef08cbb12000-12-23 22:19:05 +0000115true on the final call to this method. \var{data} can be the empty
Fred Drakec05cbb02000-07-05 02:03:34 +0000116string at any time.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000117\end{methoddesc}
118
Fred Drakeefffe8e2000-10-29 05:10:30 +0000119\begin{methoddesc}[xmlparser]{ParseFile}{file}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000120Parse XML data reading from the object \var{file}. \var{file} only
121needs to provide the \method{read(\var{nbytes})} method, returning the
122empty string when there's no more data.
123\end{methoddesc}
124
Fred Drakeefffe8e2000-10-29 05:10:30 +0000125\begin{methoddesc}[xmlparser]{SetBase}{base}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000126Sets the base to be used for resolving relative URIs in system
127identifiers in declarations. Resolving relative identifiers is left
128to the application: this value will be passed through as the
129\var{base} argument to the \function{ExternalEntityRefHandler},
130\function{NotationDeclHandler}, and
131\function{UnparsedEntityDeclHandler} functions.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000132\end{methoddesc}
133
Fred Drakeefffe8e2000-10-29 05:10:30 +0000134\begin{methoddesc}[xmlparser]{GetBase}{}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000135Returns a string containing the base set by a previous call to
136\method{SetBase()}, or \code{None} if
137\method{SetBase()} hasn't been called.
138\end{methoddesc}
139
Fred Drake1d8ad2b2001-02-14 18:54:32 +0000140\begin{methoddesc}[xmlparser]{GetInputContext}{}
141Returns the input data that generated the current event as a string.
142The data is in the encoding of the entity which contains the text.
143When called while an event handler is not active, the return value is
144\code{None}.
145\versionadded{2.1}
146\end{methoddesc}
147
Fred Drakef08cbb12000-12-23 22:19:05 +0000148\begin{methoddesc}[xmlparser]{ExternalEntityParserCreate}{context\optional{,
149 encoding}}
150Create a ``child'' parser which can be used to parse an external
151parsed entity referred to by content parsed by the parent parser. The
Fred Drakeb162d182001-01-04 05:48:08 +0000152\var{context} parameter should be the string passed to the
Fred Drakef08cbb12000-12-23 22:19:05 +0000153\method{ExternalEntityRefHandler()} handler function, described below.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000154The child parser is created with the \member{ordered_attributes},
155\member{returns_unicode} and \member{specified_attributes} set to the
156values of this parser.
Fred Drakef08cbb12000-12-23 22:19:05 +0000157\end{methoddesc}
158
Fred Draked62d5072004-08-10 17:18:32 +0000159\begin{methoddesc}[xmlparser]{UseForeignDTD}{\optional{flag}}
160Calling this with a true value for \var{flag} (the default) will cause
161Expat to call the \member{ExternalEntityRefHandler} with
162\constant{None} for all arguments to allow an alternate DTD to be
163loaded. If the document does not contain a document type declaration,
164the \member{ExternalEntityRefHandler} will still be called, but the
165\member{StartDoctypeDeclHandler} and \member{EndDoctypeDeclHandler}
166will not be called.
167
168Passing a false value for \var{flag} will cancel a previous call that
169passed a true value, but otherwise has no effect.
170
171This method can only be called before the \method{Parse()} or
172\method{ParseFile()} methods are called; calling it after either of
173those have been called causes \exception{ExpatError} to be raised with
174the \member{code} attribute set to
175\constant{errors.XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING}.
176
177\versionadded{2.3}
178\end{methoddesc}
179
Fred Drakeefffe8e2000-10-29 05:10:30 +0000180
Fred Draked79c33a2000-09-25 14:14:30 +0000181\class{xmlparser} objects have the following attributes:
Andrew M. Kuchling0690c862000-08-17 23:15:21 +0000182
Fred Drakef0b095d2002-07-17 20:31:52 +0000183\begin{memberdesc}[xmlparser]{buffer_size}
184The size of the buffer used when \member{buffer_text} is true. This
185value cannot be changed at this time.
186\versionadded{2.3}
187\end{memberdesc}
188
189\begin{memberdesc}[xmlparser]{buffer_text}
190Setting this to true causes the \class{xmlparser} object to buffer
191textual content returned by Expat to avoid multiple calls to the
192\method{CharacterDataHandler()} callback whenever possible. This can
193improve performance substantially since Expat normally breaks
194character data into chunks at every line ending. This attribute is
195false by default, and may be changed at any time.
196\versionadded{2.3}
197\end{memberdesc}
198
199\begin{memberdesc}[xmlparser]{buffer_used}
200If \member{buffer_text} is enabled, the number of bytes stored in the
201buffer. These bytes represent UTF-8 encoded text. This attribute has
202no meaningful interpretation when \member{buffer_text} is false.
203\versionadded{2.3}
204\end{memberdesc}
205
Fred Drake5ed1dac2001-02-08 15:40:33 +0000206\begin{memberdesc}[xmlparser]{ordered_attributes}
207Setting this attribute to a non-zero integer causes the attributes to
208be reported as a list rather than a dictionary. The attributes are
209presented in the order found in the document text. For each
210attribute, two list entries are presented: the attribute name and the
211attribute value. (Older versions of this module also used this
212format.) By default, this attribute is false; it may be changed at
213any time.
214\versionadded{2.1}
215\end{memberdesc}
216
Fred Drakeefffe8e2000-10-29 05:10:30 +0000217\begin{memberdesc}[xmlparser]{returns_unicode}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000218If this attribute is set to a non-zero integer, the handler functions
219will be passed Unicode strings. If \member{returns_unicode} is 0,
2208-bit strings containing UTF-8 encoded data will be passed to the
221handlers.
Fred Drakeb62966c2000-12-07 00:00:21 +0000222\versionchanged[Can be changed at any time to affect the result
Fred Drakee0af35e2001-09-20 20:43:28 +0000223 type]{1.6}
Fred Drakeefffe8e2000-10-29 05:10:30 +0000224\end{memberdesc}
Andrew M. Kuchling0690c862000-08-17 23:15:21 +0000225
Fred Drake5ed1dac2001-02-08 15:40:33 +0000226\begin{memberdesc}[xmlparser]{specified_attributes}
227If set to a non-zero integer, the parser will report only those
228attributes which were specified in the document instance and not those
229which were derived from attribute declarations. Applications which
230set this need to be especially careful to use what additional
231information is available from the declarations as needed to comply
232with the standards for the behavior of XML processors. By default,
233this attribute is false; it may be changed at any time.
234\versionadded{2.1}
235\end{memberdesc}
236
Andrew M. Kuchling0690c862000-08-17 23:15:21 +0000237The following attributes contain values relating to the most recent
238error encountered by an \class{xmlparser} object, and will only have
239correct values once a call to \method{Parse()} or \method{ParseFile()}
Fred Drake523ec572001-02-15 05:37:51 +0000240has raised a \exception{xml.parsers.expat.ExpatError} exception.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000241
Fred Drakeefffe8e2000-10-29 05:10:30 +0000242\begin{memberdesc}[xmlparser]{ErrorByteIndex}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000243Byte index at which an error occurred.
Fred Drakeefffe8e2000-10-29 05:10:30 +0000244\end{memberdesc}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000245
Fred Drakeefffe8e2000-10-29 05:10:30 +0000246\begin{memberdesc}[xmlparser]{ErrorCode}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000247Numeric code specifying the problem. This value can be passed to the
248\function{ErrorString()} function, or compared to one of the constants
Fred Drake523ec572001-02-15 05:37:51 +0000249defined in the \code{errors} object.
Fred Drakeefffe8e2000-10-29 05:10:30 +0000250\end{memberdesc}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000251
Fred Drakeefffe8e2000-10-29 05:10:30 +0000252\begin{memberdesc}[xmlparser]{ErrorColumnNumber}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000253Column number at which an error occurred.
Fred Drakeefffe8e2000-10-29 05:10:30 +0000254\end{memberdesc}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000255
Fred Drakeefffe8e2000-10-29 05:10:30 +0000256\begin{memberdesc}[xmlparser]{ErrorLineNumber}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000257Line number at which an error occurred.
Fred Drakeefffe8e2000-10-29 05:10:30 +0000258\end{memberdesc}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000259
260Here is the list of handlers that can be set. To set a handler on an
Fred Drakec05cbb02000-07-05 02:03:34 +0000261\class{xmlparser} object \var{o}, use
262\code{\var{o}.\var{handlername} = \var{func}}. \var{handlername} must
263be taken from the following list, and \var{func} must be a callable
264object accepting the correct number of arguments. The arguments are
265all strings, unless otherwise stated.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000266
Fred Drake5ed1dac2001-02-08 15:40:33 +0000267\begin{methoddesc}[xmlparser]{XmlDeclHandler}{version, encoding, standalone}
268Called when the XML declaration is parsed. The XML declaration is the
269(optional) declaration of the applicable version of the XML
270recommendation, the encoding of the document text, and an optional
271``standalone'' declaration. \var{version} and \var{encoding} will be
272strings of the type dictated by the \member{returns_unicode}
273attribute, and \var{standalone} will be \code{1} if the document is
274declared standalone, \code{0} if it is declared not to be standalone,
275or \code{-1} if the standalone clause was omitted.
276This is only available with Expat version 1.95.0 or newer.
277\versionadded{2.1}
278\end{methoddesc}
279
280\begin{methoddesc}[xmlparser]{StartDoctypeDeclHandler}{doctypeName,
281 systemId, publicId,
282 has_internal_subset}
283Called when Expat begins parsing the document type declaration
284(\code{<!DOCTYPE \ldots}). The \var{doctypeName} is provided exactly
285as presented. The \var{systemId} and \var{publicId} parameters give
286the system and public identifiers if specified, or \code{None} if
287omitted. \var{has_internal_subset} will be true if the document
288contains and internal document declaration subset.
289This requires Expat version 1.2 or newer.
290\end{methoddesc}
291
292\begin{methoddesc}[xmlparser]{EndDoctypeDeclHandler}{}
293Called when Expat is done parsing the document type delaration.
294This requires Expat version 1.2 or newer.
295\end{methoddesc}
296
297\begin{methoddesc}[xmlparser]{ElementDeclHandler}{name, model}
298Called once for each element type declaration. \var{name} is the name
299of the element type, and \var{model} is a representation of the
300content model.
301\end{methoddesc}
302
303\begin{methoddesc}[xmlparser]{AttlistDeclHandler}{elname, attname,
304 type, default, required}
305Called for each declared attribute for an element type. If an
306attribute list declaration declares three attributes, this handler is
307called three times, once for each attribute. \var{elname} is the name
308of the element to which the declaration applies and \var{attname} is
309the name of the attribute declared. The attribute type is a string
310passed as \var{type}; the possible values are \code{'CDATA'},
311\code{'ID'}, \code{'IDREF'}, ...
312\var{default} gives the default value for the attribute used when the
313attribute is not specified by the document instance, or \code{None} if
314there is no default value (\code{\#IMPLIED} values). If the attribute
315is required to be given in the document instance, \var{required} will
316be true.
317This requires Expat version 1.95.0 or newer.
318\end{methoddesc}
319
Fred Drakeefffe8e2000-10-29 05:10:30 +0000320\begin{methoddesc}[xmlparser]{StartElementHandler}{name, attributes}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000321Called for the start of every element. \var{name} is a string
322containing the element name, and \var{attributes} is a dictionary
323mapping attribute names to their values.
324\end{methoddesc}
325
Fred Drakeefffe8e2000-10-29 05:10:30 +0000326\begin{methoddesc}[xmlparser]{EndElementHandler}{name}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000327Called for the end of every element.
328\end{methoddesc}
329
Fred Drakeefffe8e2000-10-29 05:10:30 +0000330\begin{methoddesc}[xmlparser]{ProcessingInstructionHandler}{target, data}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000331Called for every processing instruction.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000332\end{methoddesc}
333
Fred Drakeefffe8e2000-10-29 05:10:30 +0000334\begin{methoddesc}[xmlparser]{CharacterDataHandler}{data}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000335Called for character data. This will be called for normal character
336data, CDATA marked content, and ignorable whitespace. Applications
337which must distinguish these cases can use the
338\member{StartCdataSectionHandler}, \member{EndCdataSectionHandler},
339and \member{ElementDeclHandler} callbacks to collect the required
340information.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000341\end{methoddesc}
342
Fred Drakeefffe8e2000-10-29 05:10:30 +0000343\begin{methoddesc}[xmlparser]{UnparsedEntityDeclHandler}{entityName, base,
344 systemId, publicId,
345 notationName}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000346Called for unparsed (NDATA) entity declarations. This is only present
347for version 1.2 of the Expat library; for more recent versions, use
348\member{EntityDeclHandler} instead. (The underlying function in the
349Expat library has been declared obsolete.)
350\end{methoddesc}
351
352\begin{methoddesc}[xmlparser]{EntityDeclHandler}{entityName,
353 is_parameter_entity, value,
354 base, systemId,
355 publicId,
356 notationName}
357Called for all entity declarations. For parameter and internal
358entities, \var{value} will be a string giving the declared contents
359of the entity; this will be \code{None} for external entities. The
360\var{notationName} parameter will be \code{None} for parsed entities,
361and the name of the notation for unparsed entities.
362\var{is_parameter_entity} will be true if the entity is a paremeter
363entity or false for general entities (most applications only need to
364be concerned with general entities).
365This is only available starting with version 1.95.0 of the Expat
366library.
367\versionadded{2.1}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000368\end{methoddesc}
369
Fred Drakeefffe8e2000-10-29 05:10:30 +0000370\begin{methoddesc}[xmlparser]{NotationDeclHandler}{notationName, base,
371 systemId, publicId}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000372Called for notation declarations. \var{notationName}, \var{base}, and
373\var{systemId}, and \var{publicId} are strings if given. If the
374public identifier is omitted, \var{publicId} will be \code{None}.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000375\end{methoddesc}
376
Fred Drakeefffe8e2000-10-29 05:10:30 +0000377\begin{methoddesc}[xmlparser]{StartNamespaceDeclHandler}{prefix, uri}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000378Called when an element contains a namespace declaration. Namespace
379declarations are processed before the \member{StartElementHandler} is
380called for the element on which declarations are placed.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000381\end{methoddesc}
382
Fred Drakeefffe8e2000-10-29 05:10:30 +0000383\begin{methoddesc}[xmlparser]{EndNamespaceDeclHandler}{prefix}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000384Called when the closing tag is reached for an element
Fred Drake5ed1dac2001-02-08 15:40:33 +0000385that contained a namespace declaration. This is called once for each
386namespace declaration on the element in the reverse of the order for
387which the \member{StartNamespaceDeclHandler} was called to indicate
388the start of each namespace declaration's scope. Calls to this
389handler are made after the corresponding \member{EndElementHandler}
390for the end of the element.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000391\end{methoddesc}
392
Fred Drakeefffe8e2000-10-29 05:10:30 +0000393\begin{methoddesc}[xmlparser]{CommentHandler}{data}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000394Called for comments. \var{data} is the text of the comment, excluding
Fred Drake523ec572001-02-15 05:37:51 +0000395the leading `\code{<!-}\code{-}' and trailing `\code{-}\code{->}'.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000396\end{methoddesc}
397
Fred Drakeefffe8e2000-10-29 05:10:30 +0000398\begin{methoddesc}[xmlparser]{StartCdataSectionHandler}{}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000399Called at the start of a CDATA section. This and
400\member{StartCdataSectionHandler} are needed to be able to identify
401the syntactical start and end for CDATA sections.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000402\end{methoddesc}
403
Fred Drakeefffe8e2000-10-29 05:10:30 +0000404\begin{methoddesc}[xmlparser]{EndCdataSectionHandler}{}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000405Called at the end of a CDATA section.
406\end{methoddesc}
407
Fred Drakeefffe8e2000-10-29 05:10:30 +0000408\begin{methoddesc}[xmlparser]{DefaultHandler}{data}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000409Called for any characters in the XML document for
410which no applicable handler has been specified. This means
411characters that are part of a construct which could be reported, but
412for which no handler has been supplied.
413\end{methoddesc}
414
Fred Drakeefffe8e2000-10-29 05:10:30 +0000415\begin{methoddesc}[xmlparser]{DefaultHandlerExpand}{data}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000416This is the same as the \function{DefaultHandler},
417but doesn't inhibit expansion of internal entities.
418The entity reference will not be passed to the default handler.
419\end{methoddesc}
420
Fred Drake5ed1dac2001-02-08 15:40:33 +0000421\begin{methoddesc}[xmlparser]{NotStandaloneHandler}{} Called if the
422XML document hasn't been declared as being a standalone document.
423This happens when there is an external subset or a reference to a
424parameter entity, but the XML declaration does not set standalone to
425\code{yes} in an XML declaration. If this handler returns \code{0},
426then the parser will throw an \constant{XML_ERROR_NOT_STANDALONE}
427error. If this handler is not set, no exception is raised by the
428parser for this condition.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000429\end{methoddesc}
430
Fred Drakeefffe8e2000-10-29 05:10:30 +0000431\begin{methoddesc}[xmlparser]{ExternalEntityRefHandler}{context, base,
432 systemId, publicId}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000433Called for references to external entities. \var{base} is the current
434base, as set by a previous call to \method{SetBase()}. The public and
435system identifiers, \var{systemId} and \var{publicId}, are strings if
436given; if the public identifier is not given, \var{publicId} will be
Fred Drake523ec572001-02-15 05:37:51 +0000437\code{None}. The \var{context} value is opaque and should only be
438used as described below.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000439
440For external entities to be parsed, this handler must be implemented.
441It is responsible for creating the sub-parser using
Fred Drake523ec572001-02-15 05:37:51 +0000442\code{ExternalEntityParserCreate(\var{context})}, initializing it with
443the appropriate callbacks, and parsing the entity. This handler
444should return an integer; if it returns \code{0}, the parser will
445throw an \constant{XML_ERROR_EXTERNAL_ENTITY_HANDLING} error,
446otherwise parsing will continue.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000447
448If this handler is not provided, external entities are reported by the
449\member{DefaultHandler} callback, if provided.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000450\end{methoddesc}
451
452
Fred Drake1d8ad2b2001-02-14 18:54:32 +0000453\subsection{ExpatError Exceptions \label{expaterror-objects}}
454\sectionauthor{Fred L. Drake, Jr.}{fdrake@acm.org}
455
456\exception{ExpatError} exceptions have a number of interesting
457attributes:
458
459\begin{memberdesc}[ExpatError]{code}
460 Expat's internal error number for the specific error. This will
461 match one of the constants defined in the \code{errors} object from
462 this module.
463 \versionadded{2.1}
464\end{memberdesc}
465
466\begin{memberdesc}[ExpatError]{lineno}
467 Line number on which the error was detected. The first line is
468 numbered \code{1}.
469 \versionadded{2.1}
470\end{memberdesc}
471
472\begin{memberdesc}[ExpatError]{offset}
473 Character offset into the line where the error occurred. The first
474 column is numbered \code{0}.
475 \versionadded{2.1}
476\end{memberdesc}
477
478
Fred Drake7fbc85c2000-09-23 04:47:56 +0000479\subsection{Example \label{expat-example}}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000480
Fred Drakec05cbb02000-07-05 02:03:34 +0000481The following program defines three handlers that just print out their
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000482arguments.
483
484\begin{verbatim}
Fred Drake7fbc85c2000-09-23 04:47:56 +0000485import xml.parsers.expat
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000486
487# 3 handler functions
488def start_element(name, attrs):
489 print 'Start element:', name, attrs
490def end_element(name):
491 print 'End element:', name
492def char_data(data):
493 print 'Character data:', repr(data)
494
Fred Drake7fbc85c2000-09-23 04:47:56 +0000495p = xml.parsers.expat.ParserCreate()
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000496
497p.StartElementHandler = start_element
Fred Drake7fbc85c2000-09-23 04:47:56 +0000498p.EndElementHandler = end_element
499p.CharacterDataHandler = char_data
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000500
501p.Parse("""<?xml version="1.0"?>
502<parent id="top"><child1 name="paul">Text goes here</child1>
503<child2 name="fred">More text</child2>
Fred Drakea41b2bb2002-12-03 22:57:37 +0000504</parent>""", 1)
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000505\end{verbatim}
506
507The output from this program is:
508
509\begin{verbatim}
510Start element: parent {'id': 'top'}
511Start element: child1 {'name': 'paul'}
512Character data: 'Text goes here'
513End element: child1
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000514Character data: '\n'
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000515Start element: child2 {'name': 'fred'}
516Character data: 'More text'
517End element: child2
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000518Character data: '\n'
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000519End element: parent
520\end{verbatim}
Fred Drakec05cbb02000-07-05 02:03:34 +0000521
522
Fred Drake5ed1dac2001-02-08 15:40:33 +0000523\subsection{Content Model Descriptions \label{expat-content-models}}
524\sectionauthor{Fred L. Drake, Jr.}{fdrake@acm.org}
525
526Content modules are described using nested tuples. Each tuple
527contains four values: the type, the quantifier, the name, and a tuple
528of children. Children are simply additional content module
529descriptions.
530
531The values of the first two fields are constants defined in the
532\code{model} object of the \module{xml.parsers.expat} module. These
533constants can be collected in two groups: the model type group and the
534quantifier group.
535
536The constants in the model type group are:
537
538\begin{datadescni}{XML_CTYPE_ANY}
539The element named by the model name was declared to have a content
540model of \code{ANY}.
541\end{datadescni}
542
543\begin{datadescni}{XML_CTYPE_CHOICE}
544The named element allows a choice from a number of options; this is
545used for content models such as \code{(A | B | C)}.
546\end{datadescni}
547
548\begin{datadescni}{XML_CTYPE_EMPTY}
549Elements which are declared to be \code{EMPTY} have this model type.
550\end{datadescni}
551
552\begin{datadescni}{XML_CTYPE_MIXED}
553\end{datadescni}
554
555\begin{datadescni}{XML_CTYPE_NAME}
556\end{datadescni}
557
558\begin{datadescni}{XML_CTYPE_SEQ}
559Models which represent a series of models which follow one after the
560other are indicated with this model type. This is used for models
561such as \code{(A, B, C)}.
562\end{datadescni}
563
564
565The constants in the quantifier group are:
566
567\begin{datadescni}{XML_CQUANT_NONE}
Fred Drakee0af35e2001-09-20 20:43:28 +0000568No modifier is given, so it can appear exactly once, as for \code{A}.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000569\end{datadescni}
570
571\begin{datadescni}{XML_CQUANT_OPT}
Fred Drakee0af35e2001-09-20 20:43:28 +0000572The model is optional: it can appear once or not at all, as for
Fred Drake5ed1dac2001-02-08 15:40:33 +0000573\code{A?}.
574\end{datadescni}
575
576\begin{datadescni}{XML_CQUANT_PLUS}
Fred Drakee0af35e2001-09-20 20:43:28 +0000577The model must occur one or more times (like \code{A+}).
Fred Drake5ed1dac2001-02-08 15:40:33 +0000578\end{datadescni}
579
580\begin{datadescni}{XML_CQUANT_REP}
581The model must occur zero or more times, as for \code{A*}.
582\end{datadescni}
583
584
Fred Drake7fbc85c2000-09-23 04:47:56 +0000585\subsection{Expat error constants \label{expat-errors}}
Fred Drakec05cbb02000-07-05 02:03:34 +0000586
Fred Drake1d8ad2b2001-02-14 18:54:32 +0000587The following constants are provided in the \code{errors} object of
588the \refmodule{xml.parsers.expat} module. These constants are useful
589in interpreting some of the attributes of the \exception{ExpatError}
590exception objects raised when an error has occurred.
Fred Drakec05cbb02000-07-05 02:03:34 +0000591
Fred Drake7fbc85c2000-09-23 04:47:56 +0000592The \code{errors} object has the following attributes:
Fred Drakec05cbb02000-07-05 02:03:34 +0000593
Fred Drake5ed1dac2001-02-08 15:40:33 +0000594\begin{datadescni}{XML_ERROR_ASYNC_ENTITY}
595\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000596
Fred Drake5ed1dac2001-02-08 15:40:33 +0000597\begin{datadescni}{XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF}
598An entity reference in an attribute value referred to an external
599entity instead of an internal entity.
600\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000601
Fred Drake5ed1dac2001-02-08 15:40:33 +0000602\begin{datadescni}{XML_ERROR_BAD_CHAR_REF}
Fred Drakee0af35e2001-09-20 20:43:28 +0000603A character reference referred to a character which is illegal in XML
Raymond Hettingerbf3a7522003-05-12 03:23:51 +0000604(for example, character \code{0}, or `\code{\&\#0;}').
Fred Drake5ed1dac2001-02-08 15:40:33 +0000605\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000606
Fred Drake5ed1dac2001-02-08 15:40:33 +0000607\begin{datadescni}{XML_ERROR_BINARY_ENTITY_REF}
Fred Drakee0af35e2001-09-20 20:43:28 +0000608An entity reference referred to an entity which was declared with a
609notation, so cannot be parsed.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000610\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000611
Fred Drake5ed1dac2001-02-08 15:40:33 +0000612\begin{datadescni}{XML_ERROR_DUPLICATE_ATTRIBUTE}
Fred Drakeacab3d62000-07-11 16:30:30 +0000613An attribute was used more than once in a start tag.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000614\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000615
Fred Drake5ed1dac2001-02-08 15:40:33 +0000616\begin{datadescni}{XML_ERROR_INCORRECT_ENCODING}
617\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000618
Fred Drake5ed1dac2001-02-08 15:40:33 +0000619\begin{datadescni}{XML_ERROR_INVALID_TOKEN}
Fred Drakee0af35e2001-09-20 20:43:28 +0000620Raised when an input byte could not properly be assigned to a
621character; for example, a NUL byte (value \code{0}) in a UTF-8 input
622stream.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000623\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000624
Fred Drake5ed1dac2001-02-08 15:40:33 +0000625\begin{datadescni}{XML_ERROR_JUNK_AFTER_DOC_ELEMENT}
Fred Drakeacab3d62000-07-11 16:30:30 +0000626Something other than whitespace occurred after the document element.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000627\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000628
Fred Drake5ed1dac2001-02-08 15:40:33 +0000629\begin{datadescni}{XML_ERROR_MISPLACED_XML_PI}
Fred Drakee0af35e2001-09-20 20:43:28 +0000630An XML declaration was found somewhere other than the start of the
631input data.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000632\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000633
Fred Drake5ed1dac2001-02-08 15:40:33 +0000634\begin{datadescni}{XML_ERROR_NO_ELEMENTS}
Fred Drakee0af35e2001-09-20 20:43:28 +0000635The document contains no elements (XML requires all documents to
636contain exactly one top-level element)..
Fred Drake5ed1dac2001-02-08 15:40:33 +0000637\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000638
Fred Drake5ed1dac2001-02-08 15:40:33 +0000639\begin{datadescni}{XML_ERROR_NO_MEMORY}
Fred Drakeacab3d62000-07-11 16:30:30 +0000640Expat was not able to allocate memory internally.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000641\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000642
Fred Drake5ed1dac2001-02-08 15:40:33 +0000643\begin{datadescni}{XML_ERROR_PARAM_ENTITY_REF}
Fred Drakee0af35e2001-09-20 20:43:28 +0000644A parameter entity reference was found where it was not allowed.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000645\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000646
Fred Drake5ed1dac2001-02-08 15:40:33 +0000647\begin{datadescni}{XML_ERROR_PARTIAL_CHAR}
Fred Drakefb568ca2004-08-10 16:47:18 +0000648An incomplete character was found in the input.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000649\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000650
Fred Drake5ed1dac2001-02-08 15:40:33 +0000651\begin{datadescni}{XML_ERROR_RECURSIVE_ENTITY_REF}
Fred Drakee0af35e2001-09-20 20:43:28 +0000652An entity reference contained another reference to the same entity;
653possibly via a different name, and possibly indirectly.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000654\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000655
Fred Drake5ed1dac2001-02-08 15:40:33 +0000656\begin{datadescni}{XML_ERROR_SYNTAX}
Fred Drakeacab3d62000-07-11 16:30:30 +0000657Some unspecified syntax error was encountered.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000658\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000659
Fred Drake5ed1dac2001-02-08 15:40:33 +0000660\begin{datadescni}{XML_ERROR_TAG_MISMATCH}
Fred Drakeacab3d62000-07-11 16:30:30 +0000661An end tag did not match the innermost open start tag.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000662\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000663
Fred Drake5ed1dac2001-02-08 15:40:33 +0000664\begin{datadescni}{XML_ERROR_UNCLOSED_TOKEN}
Fred Drakee0af35e2001-09-20 20:43:28 +0000665Some token (such as a start tag) was not closed before the end of the
666stream or the next token was encountered.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000667\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000668
Fred Drake5ed1dac2001-02-08 15:40:33 +0000669\begin{datadescni}{XML_ERROR_UNDEFINED_ENTITY}
Fred Drakeacab3d62000-07-11 16:30:30 +0000670A reference was made to a entity which was not defined.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000671\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000672
Fred Drake5ed1dac2001-02-08 15:40:33 +0000673\begin{datadescni}{XML_ERROR_UNKNOWN_ENCODING}
Fred Drakeacab3d62000-07-11 16:30:30 +0000674The document encoding is not supported by Expat.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000675\end{datadescni}
Fred Drakefb568ca2004-08-10 16:47:18 +0000676
677\begin{datadescni}{XML_ERROR_UNCLOSED_CDATA_SECTION}
678A CDATA marked section was not closed.
679\end{datadescni}
680
681\begin{datadescni}{XML_ERROR_EXTERNAL_ENTITY_HANDLING}
682\end{datadescni}
683
684\begin{datadescni}{XML_ERROR_NOT_STANDALONE}
685The parser determined that the document was not ``standalone'' though
686it declared itself to be in the XML declaration, and the
687\member{NotStandaloneHandler} was set and returned \code{0}.
688\end{datadescni}
689
690\begin{datadescni}{XML_ERROR_UNEXPECTED_STATE}
691\end{datadescni}
692
693\begin{datadescni}{XML_ERROR_ENTITY_DECLARED_IN_PE}
694\end{datadescni}
695
696\begin{datadescni}{XML_ERROR_FEATURE_REQUIRES_XML_DTD}
697An operation was requested that requires DTD support to be compiled
698in, but Expat was configured without DTD support. This should never
699be reported by a standard build of the \module{xml.parsers.expat}
700module.
701\end{datadescni}
702
703\begin{datadescni}{XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING}
704A behavioral change was requested after parsing started that can only
705be changed before parsing has started. This is (currently) only
706raised by \method{UseForeignDTD()}.
707\end{datadescni}
708
709\begin{datadescni}{XML_ERROR_UNBOUND_PREFIX}
710An undeclared prefix was found when namespace processing was enabled.
711\end{datadescni}
712
713\begin{datadescni}{XML_ERROR_UNDECLARING_PREFIX}
714The document attempted to remove the namespace declaration associated
715with a prefix.
716\end{datadescni}
717
718\begin{datadescni}{XML_ERROR_INCOMPLETE_PE}
719A parameter entity contained incomplete markup.
720\end{datadescni}
721
722\begin{datadescni}{XML_ERROR_XML_DECL}
723The document contained no document element at all.
724\end{datadescni}
725
726\begin{datadescni}{XML_ERROR_TEXT_DECL}
727There was an error parsing a text declaration in an external entity.
728\end{datadescni}
729
730\begin{datadescni}{XML_ERROR_PUBLICID}
731Characters were found in the public id that are not allowed.
732\end{datadescni}
733
734\begin{datadescni}{XML_ERROR_SUSPENDED}
735The requested operation was made on a suspended parser, but isn't
736allowed. This includes attempts to provide additional input or to
737stop the parser.
738\end{datadescni}
739
740\begin{datadescni}{XML_ERROR_NOT_SUSPENDED}
741An attempt to resume the parser was made when the parser had not been
742suspended.
743\end{datadescni}
744
745\begin{datadescni}{XML_ERROR_ABORTED}
746This should not be reported to Python applications.
747\end{datadescni}
748
749\begin{datadescni}{XML_ERROR_FINISHED}
750The requested operation was made on a parser which was finished
751parsing input, but isn't allowed. This includes attempts to provide
752additional input or to stop the parser.
753\end{datadescni}
754
755\begin{datadescni}{XML_ERROR_SUSPEND_PE}
756\end{datadescni}