blob: fafdfcbf7e651c0faa361a3019ac72370dd536aa [file] [log] [blame]
Fred Drake7fbc85c2000-09-23 04:47:56 +00001\section{\module{xml.parsers.expat} ---
Fred Drakeefffe8e2000-10-29 05:10:30 +00002 Fast XML parsing using Expat}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +00003
Fred Drake5ed1dac2001-02-08 15:40:33 +00004% Markup notes:
5%
6% Many of the attributes of the XMLParser objects are callbacks.
7% Since signature information must be presented, these are described
8% using the methoddesc environment. Since they are attributes which
9% are set by client code, in-text references to these attributes
10% should be marked using the \member macro and should not include the
11% parentheses used when marking functions and methods.
12
Fred Drake7fbc85c2000-09-23 04:47:56 +000013\declaremodule{standard}{xml.parsers.expat}
14\modulesynopsis{An interface to the Expat non-validating XML parser.}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000015\moduleauthor{Paul Prescod}{paul@prescod.net}
16\sectionauthor{A.M. Kuchling}{amk1@bigfoot.com}
17
Fred Drake7fbc85c2000-09-23 04:47:56 +000018\versionadded{2.0}
19
Fred Drakeefffe8e2000-10-29 05:10:30 +000020The \module{xml.parsers.expat} module is a Python interface to the
21Expat\index{Expat} non-validating XML parser.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000022The module provides a single extension type, \class{xmlparser}, that
23represents the current state of an XML parser. After an
24\class{xmlparser} object has been created, various attributes of the object
25can be set to handler functions. When an XML document is then fed to
26the parser, the handler functions are called for the character data
27and markup in the XML document.
Fred Drake7fbc85c2000-09-23 04:47:56 +000028
29This module uses the \module{pyexpat}\refbimodindex{pyexpat} module to
30provide access to the Expat parser. Direct use of the
31\module{pyexpat} module is deprecated.
Fred Drakeefffe8e2000-10-29 05:10:30 +000032
33This module provides one exception and one type object:
34
35\begin{excdesc}{error}
36 The exception raised when Expat reports an error.
37\end{excdesc}
38
39\begin{datadesc}{XMLParserType}
40 The type of the return values from the \function{ParserCreate()}
41 function.
42\end{datadesc}
43
44
Fred Drake7fbc85c2000-09-23 04:47:56 +000045The \module{xml.parsers.expat} module contains two functions:
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000046
47\begin{funcdesc}{ErrorString}{errno}
48Returns an explanatory string for a given error number \var{errno}.
49\end{funcdesc}
50
Fred Drakeefffe8e2000-10-29 05:10:30 +000051\begin{funcdesc}{ParserCreate}{\optional{encoding\optional{,
52 namespace_separator}}}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000053Creates and returns a new \class{xmlparser} object.
54\var{encoding}, if specified, must be a string naming the encoding
55used by the XML data. Expat doesn't support as many encodings as
56Python does, and its repertoire of encodings can't be extended; it
Fred Drake5ed1dac2001-02-08 15:40:33 +000057supports UTF-8, UTF-16, ISO-8859-1 (Latin1), and ASCII. If
58\var{encoding} is given it will override the implicit or explicit
59encoding of the document.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000060
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000061Expat can optionally do XML namespace processing for you, enabled by
Fred Drakeefffe8e2000-10-29 05:10:30 +000062providing a value for \var{namespace_separator}. The value must be a
63one-character string; a \exception{ValueError} will be raised if the
64string has an illegal length (\code{None} is considered the same as
65omission). When namespace processing is enabled, element type names
66and attribute names that belong to a namespace will be expanded. The
67element name passed to the element handlers
Fred Drake5ed1dac2001-02-08 15:40:33 +000068\member{StartElementHandler} and \member{EndElementHandler}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000069will be the concatenation of the namespace URI, the namespace
70separator character, and the local part of the name. If the namespace
Fred Drakeefffe8e2000-10-29 05:10:30 +000071separator is a zero byte (\code{chr(0)}) then the namespace URI and
Fred Drake5ed1dac2001-02-08 15:40:33 +000072the local part will be concatenated without any separator.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000073
Fred Drake2fef3ab2000-11-28 06:38:22 +000074For example, if \var{namespace_separator} is set to a space character
75(\character{ }) and the following document is parsed:
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000076
77\begin{verbatim}
78<?xml version="1.0"?>
79<root xmlns = "http://default-namespace.org/"
80 xmlns:py = "http://www.python.org/ns/">
81 <py:elem1 />
82 <elem2 xmlns="" />
83</root>
84\end{verbatim}
85
Fred Drake5ed1dac2001-02-08 15:40:33 +000086\member{StartElementHandler} will receive the following strings
Fred Draked79c33a2000-09-25 14:14:30 +000087for each element:
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000088
89\begin{verbatim}
90http://default-namespace.org/ root
91http://www.python.org/ns/ elem1
92elem2
93\end{verbatim}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000094\end{funcdesc}
95
Fred Drakef08cbb12000-12-23 22:19:05 +000096
97\subsection{XMLParser Objects \label{xmlparser-objects}}
98
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +000099\class{xmlparser} objects have the following methods:
100
Fred Drake2fef3ab2000-11-28 06:38:22 +0000101\begin{methoddesc}[xmlparser]{Parse}{data\optional{, isfinal}}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000102Parses the contents of the string \var{data}, calling the appropriate
103handler functions to process the parsed data. \var{isfinal} must be
Fred Drakef08cbb12000-12-23 22:19:05 +0000104true on the final call to this method. \var{data} can be the empty
Fred Drakec05cbb02000-07-05 02:03:34 +0000105string at any time.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000106\end{methoddesc}
107
Fred Drakeefffe8e2000-10-29 05:10:30 +0000108\begin{methoddesc}[xmlparser]{ParseFile}{file}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000109Parse XML data reading from the object \var{file}. \var{file} only
110needs to provide the \method{read(\var{nbytes})} method, returning the
111empty string when there's no more data.
112\end{methoddesc}
113
Fred Drakeefffe8e2000-10-29 05:10:30 +0000114\begin{methoddesc}[xmlparser]{SetBase}{base}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000115Sets the base to be used for resolving relative URIs in system
116identifiers in declarations. Resolving relative identifiers is left
117to the application: this value will be passed through as the
118\var{base} argument to the \function{ExternalEntityRefHandler},
119\function{NotationDeclHandler}, and
120\function{UnparsedEntityDeclHandler} functions.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000121\end{methoddesc}
122
Fred Drakeefffe8e2000-10-29 05:10:30 +0000123\begin{methoddesc}[xmlparser]{GetBase}{}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000124Returns a string containing the base set by a previous call to
125\method{SetBase()}, or \code{None} if
126\method{SetBase()} hasn't been called.
127\end{methoddesc}
128
Fred Drakef08cbb12000-12-23 22:19:05 +0000129\begin{methoddesc}[xmlparser]{ExternalEntityParserCreate}{context\optional{,
130 encoding}}
131Create a ``child'' parser which can be used to parse an external
132parsed entity referred to by content parsed by the parent parser. The
Fred Drakeb162d182001-01-04 05:48:08 +0000133\var{context} parameter should be the string passed to the
Fred Drakef08cbb12000-12-23 22:19:05 +0000134\method{ExternalEntityRefHandler()} handler function, described below.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000135The child parser is created with the \member{ordered_attributes},
136\member{returns_unicode} and \member{specified_attributes} set to the
137values of this parser.
Fred Drakef08cbb12000-12-23 22:19:05 +0000138\end{methoddesc}
139
Fred Drakeefffe8e2000-10-29 05:10:30 +0000140
Fred Draked79c33a2000-09-25 14:14:30 +0000141\class{xmlparser} objects have the following attributes:
Andrew M. Kuchling0690c862000-08-17 23:15:21 +0000142
Fred Drake5ed1dac2001-02-08 15:40:33 +0000143\begin{memberdesc}[xmlparser]{ordered_attributes}
144Setting this attribute to a non-zero integer causes the attributes to
145be reported as a list rather than a dictionary. The attributes are
146presented in the order found in the document text. For each
147attribute, two list entries are presented: the attribute name and the
148attribute value. (Older versions of this module also used this
149format.) By default, this attribute is false; it may be changed at
150any time.
151\versionadded{2.1}
152\end{memberdesc}
153
Fred Drakeefffe8e2000-10-29 05:10:30 +0000154\begin{memberdesc}[xmlparser]{returns_unicode}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000155If this attribute is set to a non-zero integer, the handler functions
156will be passed Unicode strings. If \member{returns_unicode} is 0,
1578-bit strings containing UTF-8 encoded data will be passed to the
158handlers.
Fred Drakeb62966c2000-12-07 00:00:21 +0000159\versionchanged[Can be changed at any time to affect the result
160 type.]{1.6}
Fred Drakeefffe8e2000-10-29 05:10:30 +0000161\end{memberdesc}
Andrew M. Kuchling0690c862000-08-17 23:15:21 +0000162
Fred Drake5ed1dac2001-02-08 15:40:33 +0000163\begin{memberdesc}[xmlparser]{specified_attributes}
164If set to a non-zero integer, the parser will report only those
165attributes which were specified in the document instance and not those
166which were derived from attribute declarations. Applications which
167set this need to be especially careful to use what additional
168information is available from the declarations as needed to comply
169with the standards for the behavior of XML processors. By default,
170this attribute is false; it may be changed at any time.
171\versionadded{2.1}
172\end{memberdesc}
173
Andrew M. Kuchling0690c862000-08-17 23:15:21 +0000174The following attributes contain values relating to the most recent
175error encountered by an \class{xmlparser} object, and will only have
176correct values once a call to \method{Parse()} or \method{ParseFile()}
Fred Drake7fbc85c2000-09-23 04:47:56 +0000177has raised a \exception{xml.parsers.expat.error} exception.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000178
Fred Drakeefffe8e2000-10-29 05:10:30 +0000179\begin{memberdesc}[xmlparser]{ErrorByteIndex}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000180Byte index at which an error occurred.
Fred Drakeefffe8e2000-10-29 05:10:30 +0000181\end{memberdesc}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000182
Fred Drakeefffe8e2000-10-29 05:10:30 +0000183\begin{memberdesc}[xmlparser]{ErrorCode}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000184Numeric code specifying the problem. This value can be passed to the
185\function{ErrorString()} function, or compared to one of the constants
Fred Drake7fbc85c2000-09-23 04:47:56 +0000186defined in the \module{errors} object.
Fred Drakeefffe8e2000-10-29 05:10:30 +0000187\end{memberdesc}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000188
Fred Drakeefffe8e2000-10-29 05:10:30 +0000189\begin{memberdesc}[xmlparser]{ErrorColumnNumber}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000190Column number at which an error occurred.
Fred Drakeefffe8e2000-10-29 05:10:30 +0000191\end{memberdesc}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000192
Fred Drakeefffe8e2000-10-29 05:10:30 +0000193\begin{memberdesc}[xmlparser]{ErrorLineNumber}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000194Line number at which an error occurred.
Fred Drakeefffe8e2000-10-29 05:10:30 +0000195\end{memberdesc}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000196
197Here is the list of handlers that can be set. To set a handler on an
Fred Drakec05cbb02000-07-05 02:03:34 +0000198\class{xmlparser} object \var{o}, use
199\code{\var{o}.\var{handlername} = \var{func}}. \var{handlername} must
200be taken from the following list, and \var{func} must be a callable
201object accepting the correct number of arguments. The arguments are
202all strings, unless otherwise stated.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000203
Fred Drake5ed1dac2001-02-08 15:40:33 +0000204\begin{methoddesc}[xmlparser]{XmlDeclHandler}{version, encoding, standalone}
205Called when the XML declaration is parsed. The XML declaration is the
206(optional) declaration of the applicable version of the XML
207recommendation, the encoding of the document text, and an optional
208``standalone'' declaration. \var{version} and \var{encoding} will be
209strings of the type dictated by the \member{returns_unicode}
210attribute, and \var{standalone} will be \code{1} if the document is
211declared standalone, \code{0} if it is declared not to be standalone,
212or \code{-1} if the standalone clause was omitted.
213This is only available with Expat version 1.95.0 or newer.
214\versionadded{2.1}
215\end{methoddesc}
216
217\begin{methoddesc}[xmlparser]{StartDoctypeDeclHandler}{doctypeName,
218 systemId, publicId,
219 has_internal_subset}
220Called when Expat begins parsing the document type declaration
221(\code{<!DOCTYPE \ldots}). The \var{doctypeName} is provided exactly
222as presented. The \var{systemId} and \var{publicId} parameters give
223the system and public identifiers if specified, or \code{None} if
224omitted. \var{has_internal_subset} will be true if the document
225contains and internal document declaration subset.
226This requires Expat version 1.2 or newer.
227\end{methoddesc}
228
229\begin{methoddesc}[xmlparser]{EndDoctypeDeclHandler}{}
230Called when Expat is done parsing the document type delaration.
231This requires Expat version 1.2 or newer.
232\end{methoddesc}
233
234\begin{methoddesc}[xmlparser]{ElementDeclHandler}{name, model}
235Called once for each element type declaration. \var{name} is the name
236of the element type, and \var{model} is a representation of the
237content model.
238\end{methoddesc}
239
240\begin{methoddesc}[xmlparser]{AttlistDeclHandler}{elname, attname,
241 type, default, required}
242Called for each declared attribute for an element type. If an
243attribute list declaration declares three attributes, this handler is
244called three times, once for each attribute. \var{elname} is the name
245of the element to which the declaration applies and \var{attname} is
246the name of the attribute declared. The attribute type is a string
247passed as \var{type}; the possible values are \code{'CDATA'},
248\code{'ID'}, \code{'IDREF'}, ...
249\var{default} gives the default value for the attribute used when the
250attribute is not specified by the document instance, or \code{None} if
251there is no default value (\code{\#IMPLIED} values). If the attribute
252is required to be given in the document instance, \var{required} will
253be true.
254This requires Expat version 1.95.0 or newer.
255\end{methoddesc}
256
Fred Drakeefffe8e2000-10-29 05:10:30 +0000257\begin{methoddesc}[xmlparser]{StartElementHandler}{name, attributes}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000258Called for the start of every element. \var{name} is a string
259containing the element name, and \var{attributes} is a dictionary
260mapping attribute names to their values.
261\end{methoddesc}
262
Fred Drakeefffe8e2000-10-29 05:10:30 +0000263\begin{methoddesc}[xmlparser]{EndElementHandler}{name}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000264Called for the end of every element.
265\end{methoddesc}
266
Fred Drakeefffe8e2000-10-29 05:10:30 +0000267\begin{methoddesc}[xmlparser]{ProcessingInstructionHandler}{target, data}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000268Called for every processing instruction.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000269\end{methoddesc}
270
Fred Drakeefffe8e2000-10-29 05:10:30 +0000271\begin{methoddesc}[xmlparser]{CharacterDataHandler}{data}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000272Called for character data. This will be called for normal character
273data, CDATA marked content, and ignorable whitespace. Applications
274which must distinguish these cases can use the
275\member{StartCdataSectionHandler}, \member{EndCdataSectionHandler},
276and \member{ElementDeclHandler} callbacks to collect the required
277information.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000278\end{methoddesc}
279
Fred Drakeefffe8e2000-10-29 05:10:30 +0000280\begin{methoddesc}[xmlparser]{UnparsedEntityDeclHandler}{entityName, base,
281 systemId, publicId,
282 notationName}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000283Called for unparsed (NDATA) entity declarations. This is only present
284for version 1.2 of the Expat library; for more recent versions, use
285\member{EntityDeclHandler} instead. (The underlying function in the
286Expat library has been declared obsolete.)
287\end{methoddesc}
288
289\begin{methoddesc}[xmlparser]{EntityDeclHandler}{entityName,
290 is_parameter_entity, value,
291 base, systemId,
292 publicId,
293 notationName}
294Called for all entity declarations. For parameter and internal
295entities, \var{value} will be a string giving the declared contents
296of the entity; this will be \code{None} for external entities. The
297\var{notationName} parameter will be \code{None} for parsed entities,
298and the name of the notation for unparsed entities.
299\var{is_parameter_entity} will be true if the entity is a paremeter
300entity or false for general entities (most applications only need to
301be concerned with general entities).
302This is only available starting with version 1.95.0 of the Expat
303library.
304\versionadded{2.1}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000305\end{methoddesc}
306
Fred Drakeefffe8e2000-10-29 05:10:30 +0000307\begin{methoddesc}[xmlparser]{NotationDeclHandler}{notationName, base,
308 systemId, publicId}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000309Called for notation declarations. \var{notationName}, \var{base}, and
310\var{systemId}, and \var{publicId} are strings if given. If the
311public identifier is omitted, \var{publicId} will be \code{None}.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000312\end{methoddesc}
313
Fred Drakeefffe8e2000-10-29 05:10:30 +0000314\begin{methoddesc}[xmlparser]{StartNamespaceDeclHandler}{prefix, uri}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000315Called when an element contains a namespace declaration. Namespace
316declarations are processed before the \member{StartElementHandler} is
317called for the element on which declarations are placed.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000318\end{methoddesc}
319
Fred Drakeefffe8e2000-10-29 05:10:30 +0000320\begin{methoddesc}[xmlparser]{EndNamespaceDeclHandler}{prefix}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000321Called when the closing tag is reached for an element
Fred Drake5ed1dac2001-02-08 15:40:33 +0000322that contained a namespace declaration. This is called once for each
323namespace declaration on the element in the reverse of the order for
324which the \member{StartNamespaceDeclHandler} was called to indicate
325the start of each namespace declaration's scope. Calls to this
326handler are made after the corresponding \member{EndElementHandler}
327for the end of the element.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000328\end{methoddesc}
329
Fred Drakeefffe8e2000-10-29 05:10:30 +0000330\begin{methoddesc}[xmlparser]{CommentHandler}{data}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000331Called for comments. \var{data} is the text of the comment, excluding
332the leading `\code{<!--}' and trailing `\code{-->}'.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000333\end{methoddesc}
334
Fred Drakeefffe8e2000-10-29 05:10:30 +0000335\begin{methoddesc}[xmlparser]{StartCdataSectionHandler}{}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000336Called at the start of a CDATA section. This and
337\member{StartCdataSectionHandler} are needed to be able to identify
338the syntactical start and end for CDATA sections.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000339\end{methoddesc}
340
Fred Drakeefffe8e2000-10-29 05:10:30 +0000341\begin{methoddesc}[xmlparser]{EndCdataSectionHandler}{}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000342Called at the end of a CDATA section.
343\end{methoddesc}
344
Fred Drakeefffe8e2000-10-29 05:10:30 +0000345\begin{methoddesc}[xmlparser]{DefaultHandler}{data}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000346Called for any characters in the XML document for
347which no applicable handler has been specified. This means
348characters that are part of a construct which could be reported, but
349for which no handler has been supplied.
350\end{methoddesc}
351
Fred Drakeefffe8e2000-10-29 05:10:30 +0000352\begin{methoddesc}[xmlparser]{DefaultHandlerExpand}{data}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000353This is the same as the \function{DefaultHandler},
354but doesn't inhibit expansion of internal entities.
355The entity reference will not be passed to the default handler.
356\end{methoddesc}
357
Fred Drake5ed1dac2001-02-08 15:40:33 +0000358\begin{methoddesc}[xmlparser]{NotStandaloneHandler}{} Called if the
359XML document hasn't been declared as being a standalone document.
360This happens when there is an external subset or a reference to a
361parameter entity, but the XML declaration does not set standalone to
362\code{yes} in an XML declaration. If this handler returns \code{0},
363then the parser will throw an \constant{XML_ERROR_NOT_STANDALONE}
364error. If this handler is not set, no exception is raised by the
365parser for this condition.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000366\end{methoddesc}
367
Fred Drakeefffe8e2000-10-29 05:10:30 +0000368\begin{methoddesc}[xmlparser]{ExternalEntityRefHandler}{context, base,
369 systemId, publicId}
Fred Drake5ed1dac2001-02-08 15:40:33 +0000370Called for references to external entities. \var{base} is the current
371base, as set by a previous call to \method{SetBase()}. The public and
372system identifiers, \var{systemId} and \var{publicId}, are strings if
373given; if the public identifier is not given, \var{publicId} will be
374\code{None}.
375
376For external entities to be parsed, this handler must be implemented.
377It is responsible for creating the sub-parser using
378\code{ExternalEntityRefHandler(\var{context})}, initializing it with
379the appropriate callbacks, and parsing the entity. If this handler
380returns \code{0}, the parser will throw an
381\constant{XML_ERROR_EXTERNAL_ENTITY_HANDLING} error.
382
383If this handler is not provided, external entities are reported by the
384\member{DefaultHandler} callback, if provided.
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000385\end{methoddesc}
386
387
Fred Drake7fbc85c2000-09-23 04:47:56 +0000388\subsection{Example \label{expat-example}}
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000389
Fred Drakec05cbb02000-07-05 02:03:34 +0000390The following program defines three handlers that just print out their
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000391arguments.
392
393\begin{verbatim}
Fred Drake7fbc85c2000-09-23 04:47:56 +0000394import xml.parsers.expat
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000395
396# 3 handler functions
397def start_element(name, attrs):
398 print 'Start element:', name, attrs
399def end_element(name):
400 print 'End element:', name
401def char_data(data):
402 print 'Character data:', repr(data)
403
Fred Drake7fbc85c2000-09-23 04:47:56 +0000404p = xml.parsers.expat.ParserCreate()
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000405
406p.StartElementHandler = start_element
Fred Drake7fbc85c2000-09-23 04:47:56 +0000407p.EndElementHandler = end_element
408p.CharacterDataHandler = char_data
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000409
410p.Parse("""<?xml version="1.0"?>
411<parent id="top"><child1 name="paul">Text goes here</child1>
412<child2 name="fred">More text</child2>
413</parent>""")
414\end{verbatim}
415
416The output from this program is:
417
418\begin{verbatim}
419Start element: parent {'id': 'top'}
420Start element: child1 {'name': 'paul'}
421Character data: 'Text goes here'
422End element: child1
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000423Character data: '\n'
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000424Start element: child2 {'name': 'fred'}
425Character data: 'More text'
426End element: child2
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000427Character data: '\n'
Andrew M. Kuchling6b14eeb2000-06-11 02:42:07 +0000428End element: parent
429\end{verbatim}
Fred Drakec05cbb02000-07-05 02:03:34 +0000430
431
Fred Drake5ed1dac2001-02-08 15:40:33 +0000432\subsection{Content Model Descriptions \label{expat-content-models}}
433\sectionauthor{Fred L. Drake, Jr.}{fdrake@acm.org}
434
435Content modules are described using nested tuples. Each tuple
436contains four values: the type, the quantifier, the name, and a tuple
437of children. Children are simply additional content module
438descriptions.
439
440The values of the first two fields are constants defined in the
441\code{model} object of the \module{xml.parsers.expat} module. These
442constants can be collected in two groups: the model type group and the
443quantifier group.
444
445The constants in the model type group are:
446
447\begin{datadescni}{XML_CTYPE_ANY}
448The element named by the model name was declared to have a content
449model of \code{ANY}.
450\end{datadescni}
451
452\begin{datadescni}{XML_CTYPE_CHOICE}
453The named element allows a choice from a number of options; this is
454used for content models such as \code{(A | B | C)}.
455\end{datadescni}
456
457\begin{datadescni}{XML_CTYPE_EMPTY}
458Elements which are declared to be \code{EMPTY} have this model type.
459\end{datadescni}
460
461\begin{datadescni}{XML_CTYPE_MIXED}
462\end{datadescni}
463
464\begin{datadescni}{XML_CTYPE_NAME}
465\end{datadescni}
466
467\begin{datadescni}{XML_CTYPE_SEQ}
468Models which represent a series of models which follow one after the
469other are indicated with this model type. This is used for models
470such as \code{(A, B, C)}.
471\end{datadescni}
472
473
474The constants in the quantifier group are:
475
476\begin{datadescni}{XML_CQUANT_NONE}
477\end{datadescni}
478
479\begin{datadescni}{XML_CQUANT_OPT}
480The model is option: it can appear once or not at all, as for
481\code{A?}.
482\end{datadescni}
483
484\begin{datadescni}{XML_CQUANT_PLUS}
485The model must occur one or more times (\code{A+}).
486\end{datadescni}
487
488\begin{datadescni}{XML_CQUANT_REP}
489The model must occur zero or more times, as for \code{A*}.
490\end{datadescni}
491
492
Fred Drake7fbc85c2000-09-23 04:47:56 +0000493\subsection{Expat error constants \label{expat-errors}}
Fred Drakec05cbb02000-07-05 02:03:34 +0000494\sectionauthor{A.M. Kuchling}{amk1@bigfoot.com}
495
496The following table lists the error constants in the
Fred Drake7fbc85c2000-09-23 04:47:56 +0000497\code{errors} object of the \module{xml.parsers.expat} module. These
498constants are useful in interpreting some of the attributes of the
499parser object after an error has occurred.
Fred Drakec05cbb02000-07-05 02:03:34 +0000500
Fred Drake7fbc85c2000-09-23 04:47:56 +0000501The \code{errors} object has the following attributes:
Fred Drakec05cbb02000-07-05 02:03:34 +0000502
Fred Drake5ed1dac2001-02-08 15:40:33 +0000503\begin{datadescni}{XML_ERROR_ASYNC_ENTITY}
504\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000505
Fred Drake5ed1dac2001-02-08 15:40:33 +0000506\begin{datadescni}{XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF}
507An entity reference in an attribute value referred to an external
508entity instead of an internal entity.
509\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000510
Fred Drake5ed1dac2001-02-08 15:40:33 +0000511\begin{datadescni}{XML_ERROR_BAD_CHAR_REF}
512\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000513
Fred Drake5ed1dac2001-02-08 15:40:33 +0000514\begin{datadescni}{XML_ERROR_BINARY_ENTITY_REF}
515\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000516
Fred Drake5ed1dac2001-02-08 15:40:33 +0000517\begin{datadescni}{XML_ERROR_DUPLICATE_ATTRIBUTE}
Fred Drakeacab3d62000-07-11 16:30:30 +0000518An attribute was used more than once in a start tag.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000519\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000520
Fred Drake5ed1dac2001-02-08 15:40:33 +0000521\begin{datadescni}{XML_ERROR_INCORRECT_ENCODING}
522\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000523
Fred Drake5ed1dac2001-02-08 15:40:33 +0000524\begin{datadescni}{XML_ERROR_INVALID_TOKEN}
525\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000526
Fred Drake5ed1dac2001-02-08 15:40:33 +0000527\begin{datadescni}{XML_ERROR_JUNK_AFTER_DOC_ELEMENT}
Fred Drakeacab3d62000-07-11 16:30:30 +0000528Something other than whitespace occurred after the document element.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000529\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000530
Fred Drake5ed1dac2001-02-08 15:40:33 +0000531\begin{datadescni}{XML_ERROR_MISPLACED_XML_PI}
532\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000533
Fred Drake5ed1dac2001-02-08 15:40:33 +0000534\begin{datadescni}{XML_ERROR_NO_ELEMENTS}
535The document contains no elements.
536\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000537
Fred Drake5ed1dac2001-02-08 15:40:33 +0000538\begin{datadescni}{XML_ERROR_NO_MEMORY}
Fred Drakeacab3d62000-07-11 16:30:30 +0000539Expat was not able to allocate memory internally.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000540\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000541
Fred Drake5ed1dac2001-02-08 15:40:33 +0000542\begin{datadescni}{XML_ERROR_PARAM_ENTITY_REF}
543\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000544
Fred Drake5ed1dac2001-02-08 15:40:33 +0000545\begin{datadescni}{XML_ERROR_PARTIAL_CHAR}
546\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000547
Fred Drake5ed1dac2001-02-08 15:40:33 +0000548\begin{datadescni}{XML_ERROR_RECURSIVE_ENTITY_REF}
549\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000550
Fred Drake5ed1dac2001-02-08 15:40:33 +0000551\begin{datadescni}{XML_ERROR_SYNTAX}
Fred Drakeacab3d62000-07-11 16:30:30 +0000552Some unspecified syntax error was encountered.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000553\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000554
Fred Drake5ed1dac2001-02-08 15:40:33 +0000555\begin{datadescni}{XML_ERROR_TAG_MISMATCH}
Fred Drakeacab3d62000-07-11 16:30:30 +0000556An end tag did not match the innermost open start tag.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000557\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000558
Fred Drake5ed1dac2001-02-08 15:40:33 +0000559\begin{datadescni}{XML_ERROR_UNCLOSED_TOKEN}
560\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000561
Fred Drake5ed1dac2001-02-08 15:40:33 +0000562\begin{datadescni}{XML_ERROR_UNDEFINED_ENTITY}
Fred Drakeacab3d62000-07-11 16:30:30 +0000563A reference was made to a entity which was not defined.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000564\end{datadescni}
Fred Drakeacab3d62000-07-11 16:30:30 +0000565
Fred Drake5ed1dac2001-02-08 15:40:33 +0000566\begin{datadescni}{XML_ERROR_UNKNOWN_ENCODING}
Fred Drakeacab3d62000-07-11 16:30:30 +0000567The document encoding is not supported by Expat.
Fred Drake5ed1dac2001-02-08 15:40:33 +0000568\end{datadescni}