blob: f9e91b6d470423d643f4745fb9e19c41e794b4e3 [file] [log] [blame]
Fred Drake45cd9de2000-06-29 19:34:54 +00001"""
2This module contains the core classes of version 2.0 of SAX for Python.
3This file provides only default classes with absolutely minimum
4functionality, from which drivers and applications can be subclassed.
5
6Many of these classes are empty and are included only as documentation
7of the interfaces.
8
9$Id$
10"""
11
12version = '2.0beta'
Lars Gustäbelb4d6bb092000-09-21 08:18:55 +000013
Fred Drake45cd9de2000-06-29 19:34:54 +000014#============================================================================
15#
16# HANDLER INTERFACES
17#
18#============================================================================
Lars Gustäbelb4d6bb092000-09-21 08:18:55 +000019
Lars Gustäbele292a242000-09-24 20:19:45 +000020# ===== ERRORHANDLER =====
Lars Gustäbelb4d6bb092000-09-21 08:18:55 +000021
Fred Drake45cd9de2000-06-29 19:34:54 +000022class ErrorHandler:
Lars Gustäbelbb757132000-09-24 20:38:18 +000023 """Basic interface for SAX error handlers.
24
25 If you create an object that implements this interface, then
26 register the object with your XMLReader, the parser will call the
27 methods in your object to report all warnings and errors. There
28 are three levels of errors available: warnings, (possibly)
29 recoverable errors, and unrecoverable errors. All methods take a
30 SAXParseException as the only parameter."""
Fred Drake45cd9de2000-06-29 19:34:54 +000031
32 def error(self, exception):
Skip Montanaroa2dccfb2000-07-06 02:55:41 +000033 "Handle a recoverable error."
Fred Drake45cd9de2000-06-29 19:34:54 +000034 raise exception
35
36 def fatalError(self, exception):
Skip Montanaroa2dccfb2000-07-06 02:55:41 +000037 "Handle a non-recoverable error."
Fred Drake45cd9de2000-06-29 19:34:54 +000038 raise exception
39
40 def warning(self, exception):
Fred Drake16f63292000-10-23 18:09:50 +000041 "Handle a warning."
Fred Drake45cd9de2000-06-29 19:34:54 +000042 print exception
43
Fred Drake16f63292000-10-23 18:09:50 +000044
Fred Drake45cd9de2000-06-29 19:34:54 +000045# ===== CONTENTHANDLER =====
46
47class ContentHandler:
48 """Interface for receiving logical document content events.
49
50 This is the main callback interface in SAX, and the one most
51 important to applications. The order of events in this interface
52 mirrors the order of the information in the document."""
53
54 def __init__(self):
55 self._locator = None
Fred Drake16f63292000-10-23 18:09:50 +000056
Fred Drake45cd9de2000-06-29 19:34:54 +000057 def setDocumentLocator(self, locator):
58 """Called by the parser to give the application a locator for
59 locating the origin of document events.
60
61 SAX parsers are strongly encouraged (though not absolutely
62 required) to supply a locator: if it does so, it must supply
63 the locator to the application by invoking this method before
64 invoking any of the other methods in the DocumentHandler
65 interface.
66
67 The locator allows the application to determine the end
68 position of any document-related event, even if the parser is
69 not reporting an error. Typically, the application will use
70 this information for reporting its own errors (such as
71 character content that does not match an application's
72 business rules). The information returned by the locator is
73 probably not sufficient for use with a search engine.
Fred Drake16f63292000-10-23 18:09:50 +000074
Fred Drake45cd9de2000-06-29 19:34:54 +000075 Note that the locator will return correct information only
76 during the invocation of the events in this interface. The
77 application should not attempt to use it at any other time."""
Fred Drake16f63292000-10-23 18:09:50 +000078 self._locator = locator
Fred Drake45cd9de2000-06-29 19:34:54 +000079
80 def startDocument(self):
81 """Receive notification of the beginning of a document.
Fred Drake16f63292000-10-23 18:09:50 +000082
Fred Drake45cd9de2000-06-29 19:34:54 +000083 The SAX parser will invoke this method only once, before any
84 other methods in this interface or in DTDHandler (except for
85 setDocumentLocator)."""
86
87 def endDocument(self):
88 """Receive notification of the end of a document.
Fred Drake16f63292000-10-23 18:09:50 +000089
Fred Drake45cd9de2000-06-29 19:34:54 +000090 The SAX parser will invoke this method only once, and it will
91 be the last method invoked during the parse. The parser shall
92 not invoke this method until it has either abandoned parsing
93 (because of an unrecoverable error) or reached the end of
94 input."""
95
96 def startPrefixMapping(self, prefix, uri):
97 """Begin the scope of a prefix-URI Namespace mapping.
Fred Drake16f63292000-10-23 18:09:50 +000098
Fred Drake45cd9de2000-06-29 19:34:54 +000099 The information from this event is not necessary for normal
100 Namespace processing: the SAX XML reader will automatically
101 replace prefixes for element and attribute names when the
102 http://xml.org/sax/features/namespaces feature is true (the
103 default).
Fred Drake16f63292000-10-23 18:09:50 +0000104
Fred Drake45cd9de2000-06-29 19:34:54 +0000105 There are cases, however, when applications need to use
106 prefixes in character data or in attribute values, where they
107 cannot safely be expanded automatically; the
108 start/endPrefixMapping event supplies the information to the
109 application to expand prefixes in those contexts itself, if
110 necessary.
111
112 Note that start/endPrefixMapping events are not guaranteed to
113 be properly nested relative to each-other: all
114 startPrefixMapping events will occur before the corresponding
115 startElement event, and all endPrefixMapping events will occur
116 after the corresponding endElement event, but their order is
117 not guaranteed."""
118
119 def endPrefixMapping(self, prefix):
120 """End the scope of a prefix-URI mapping.
Fred Drake16f63292000-10-23 18:09:50 +0000121
Fred Drake45cd9de2000-06-29 19:34:54 +0000122 See startPrefixMapping for details. This event will always
123 occur after the corresponding endElement event, but the order
124 of endPrefixMapping events is not otherwise guaranteed."""
125
126 def startElement(self, name, attrs):
Lars Gustäbelb4d6bb092000-09-21 08:18:55 +0000127 """Signals the start of an element in non-namespace mode.
Fred Drake45cd9de2000-06-29 19:34:54 +0000128
Lars Gustäbelb4d6bb092000-09-21 08:18:55 +0000129 The name parameter contains the raw XML 1.0 name of the
130 element type as a string and the attrs parameter holds an
131 instance of the Attributes class containing the attributes of
132 the element."""
Fred Drake45cd9de2000-06-29 19:34:54 +0000133
Lars Gustäbelb4d6bb092000-09-21 08:18:55 +0000134 def endElement(self, name):
135 """Signals the end of an element in non-namespace mode.
Fred Drake45cd9de2000-06-29 19:34:54 +0000136
137 The name parameter contains the name of the element type, just
138 as with the startElement event."""
139
Lars Gustäbelb4d6bb092000-09-21 08:18:55 +0000140 def startElementNS(self, name, qname, attrs):
141 """Signals the start of an element in namespace mode.
142
143 The name parameter contains the name of the element type as a
144 (uri, localname) tuple, the qname parameter the raw XML 1.0
145 name used in the source document, and the attrs parameter
146 holds an instance of the Attributes class containing the
Martin v. Löwis2ebfd092002-06-30 07:38:50 +0000147 attributes of the element.
148
149 The uri part of the name tuple is None for elements which have
150 no namespace."""
Lars Gustäbelb4d6bb092000-09-21 08:18:55 +0000151
152 def endElementNS(self, name, qname):
153 """Signals the end of an element in namespace mode.
154
155 The name parameter contains the name of the element type, just
156 as with the startElementNS event."""
Fred Drake16f63292000-10-23 18:09:50 +0000157
Fred Drake45cd9de2000-06-29 19:34:54 +0000158 def characters(self, content):
159 """Receive notification of character data.
Fred Drake16f63292000-10-23 18:09:50 +0000160
Fred Drake45cd9de2000-06-29 19:34:54 +0000161 The Parser will call this method to report each chunk of
162 character data. SAX parsers may return all contiguous
163 character data in a single chunk, or they may split it into
164 several chunks; however, all of the characters in any single
165 event must come from the same external entity so that the
166 Locator provides useful information."""
167
Lars Gustäbel358f4da2000-09-24 11:06:27 +0000168 def ignorableWhitespace(self, whitespace):
Fred Drake45cd9de2000-06-29 19:34:54 +0000169 """Receive notification of ignorable whitespace in element content.
Fred Drake16f63292000-10-23 18:09:50 +0000170
Fred Drake45cd9de2000-06-29 19:34:54 +0000171 Validating Parsers must use this method to report each chunk
172 of ignorable whitespace (see the W3C XML 1.0 recommendation,
173 section 2.10): non-validating parsers may also use this method
174 if they are capable of parsing and using content models.
Fred Drake16f63292000-10-23 18:09:50 +0000175
Fred Drake45cd9de2000-06-29 19:34:54 +0000176 SAX parsers may return all contiguous whitespace in a single
177 chunk, or they may split it into several chunks; however, all
178 of the characters in any single event must come from the same
179 external entity, so that the Locator provides useful
Fred Drakee143bbb2004-05-06 03:47:48 +0000180 information."""
Fred Drake45cd9de2000-06-29 19:34:54 +0000181
182 def processingInstruction(self, target, data):
183 """Receive notification of a processing instruction.
Fred Drake16f63292000-10-23 18:09:50 +0000184
Fred Drake45cd9de2000-06-29 19:34:54 +0000185 The Parser will invoke this method once for each processing
186 instruction found: note that processing instructions may occur
187 before or after the main document element.
188
189 A SAX parser should never report an XML declaration (XML 1.0,
190 section 2.8) or a text declaration (XML 1.0, section 4.3.1)
191 using this method."""
192
193 def skippedEntity(self, name):
194 """Receive notification of a skipped entity.
Fred Drake16f63292000-10-23 18:09:50 +0000195
Fred Drake45cd9de2000-06-29 19:34:54 +0000196 The Parser will invoke this method once for each entity
197 skipped. Non-validating processors may skip entities if they
198 have not seen the declarations (because, for example, the
199 entity was declared in an external DTD subset). All processors
200 may skip external entities, depending on the values of the
201 http://xml.org/sax/features/external-general-entities and the
202 http://xml.org/sax/features/external-parameter-entities
203 properties."""
Lars Gustäbele292a242000-09-24 20:19:45 +0000204
Fred Drake16f63292000-10-23 18:09:50 +0000205
Lars Gustäbele292a242000-09-24 20:19:45 +0000206# ===== DTDHandler =====
207
208class DTDHandler:
209 """Handle DTD events.
210
211 This interface specifies only those DTD events required for basic
212 parsing (unparsed entities and attributes)."""
213
214 def notationDecl(self, name, publicId, systemId):
Thomas Wouters84da8aa2000-09-25 00:11:37 +0000215 "Handle a notation declaration event."
Lars Gustäbele292a242000-09-24 20:19:45 +0000216
217 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
Thomas Wouters84da8aa2000-09-25 00:11:37 +0000218 "Handle an unparsed entity declaration event."
Lars Gustäbele292a242000-09-24 20:19:45 +0000219
Fred Drake16f63292000-10-23 18:09:50 +0000220
Lars Gustäbele292a242000-09-24 20:19:45 +0000221# ===== ENTITYRESOLVER =====
Fred Drake16f63292000-10-23 18:09:50 +0000222
Lars Gustäbele292a242000-09-24 20:19:45 +0000223class EntityResolver:
224 """Basic interface for resolving entities. If you create an object
225 implementing this interface, then register the object with your
226 Parser, the parser will call the method in your object to
227 resolve all external entities. Note that DefaultHandler implements
228 this interface with the default behaviour."""
Fred Drake16f63292000-10-23 18:09:50 +0000229
Lars Gustäbele292a242000-09-24 20:19:45 +0000230 def resolveEntity(self, publicId, systemId):
Thomas Wouters84da8aa2000-09-25 00:11:37 +0000231 """Resolve the system identifier of an entity and return either
Lars Gustäbele292a242000-09-24 20:19:45 +0000232 the system identifier to read from as a string, or an InputSource
233 to read from."""
Thomas Wouters84da8aa2000-09-25 00:11:37 +0000234 return systemId
Lars Gustäbele292a242000-09-24 20:19:45 +0000235
236
Fred Drake45cd9de2000-06-29 19:34:54 +0000237#============================================================================
238#
239# CORE FEATURES
240#
241#============================================================================
242
243feature_namespaces = "http://xml.org/sax/features/namespaces"
244# true: Perform Namespace processing (default).
245# false: Optionally do not perform Namespace processing
246# (implies namespace-prefixes).
247# access: (parsing) read-only; (not parsing) read/write
248
249feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes"
250# true: Report the original prefixed names and attributes used for Namespace
251# declarations.
252# false: Do not report attributes used for Namespace declarations, and
253# optionally do not report original prefixed names (default).
254# access: (parsing) read-only; (not parsing) read/write
255
256feature_string_interning = "http://xml.org/sax/features/string-interning"
257# true: All element names, prefixes, attribute names, Namespace URIs, and
258# local names are interned using the built-in intern function.
259# false: Names are not necessarily interned, although they may be (default).
260# access: (parsing) read-only; (not parsing) read/write
261
262feature_validation = "http://xml.org/sax/features/validation"
263# true: Report all validation errors (implies external-general-entities and
264# external-parameter-entities).
265# false: Do not report validation errors.
266# access: (parsing) read-only; (not parsing) read/write
267
268feature_external_ges = "http://xml.org/sax/features/external-general-entities"
269# true: Include all external general (text) entities.
270# false: Do not include external general entities.
271# access: (parsing) read-only; (not parsing) read/write
272
273feature_external_pes = "http://xml.org/sax/features/external-parameter-entities"
274# true: Include all external parameter entities, including the external
275# DTD subset.
276# false: Do not include any external parameter entities, even the external
277# DTD subset.
278# access: (parsing) read-only; (not parsing) read/write
279
280all_features = [feature_namespaces,
281 feature_namespace_prefixes,
282 feature_string_interning,
283 feature_validation,
284 feature_external_ges,
285 feature_external_pes]
286
287
288#============================================================================
289#
290# CORE PROPERTIES
291#
292#============================================================================
293
294property_lexical_handler = "http://xml.org/sax/properties/lexical-handler"
295# data type: xml.sax.sax2lib.LexicalHandler
296# description: An optional extension handler for lexical events like comments.
297# access: read/write
298
299property_declaration_handler = "http://xml.org/sax/properties/declaration-handler"
300# data type: xml.sax.sax2lib.DeclHandler
301# description: An optional extension handler for DTD-related events other
302# than notations and unparsed entities.
303# access: read/write
304
305property_dom_node = "http://xml.org/sax/properties/dom-node"
306# data type: org.w3c.dom.Node
307# description: When parsing, the current DOM node being visited if this is
308# a DOM iterator; when not parsing, the root DOM node for
309# iteration.
310# access: (parsing) read-only; (not parsing) read/write
311
312property_xml_string = "http://xml.org/sax/properties/xml-string"
313# data type: String
314# description: The literal string of characters that was the source for
315# the current event.
316# access: read-only
317
Martin v. Löwis2ebfd092002-06-30 07:38:50 +0000318property_encoding = "http://www.python.org/sax/properties/encoding"
319# data type: String
320# description: The name of the encoding to assume for input data.
321# access: write: set the encoding, e.g. established by a higher-level
322# protocol. May change during parsing (e.g. after
323# processing a META tag)
324# read: return the current encoding (possibly established through
325# auto-detection.
Tim Peters0eadaac2003-04-24 16:02:54 +0000326# initial value: UTF-8
Martin v. Löwis2ebfd092002-06-30 07:38:50 +0000327#
328
329property_interning_dict = "http://www.python.org/sax/properties/interning-dict"
330# data type: Dictionary
331# description: The dictionary used to intern common strings in the document
332# access: write: Request that the parser uses a specific dictionary, to
333# allow interning across different documents
334# read: return the current interning dictionary, or None
335#
336
Fred Drake45cd9de2000-06-29 19:34:54 +0000337all_properties = [property_lexical_handler,
338 property_dom_node,
339 property_declaration_handler,
Martin v. Löwis2ebfd092002-06-30 07:38:50 +0000340 property_xml_string,
341 property_encoding,
342 property_interning_dict]