blob: e334ac9fea0d362ce2dbc494eecd2187d1dec88f [file] [log] [blame]
Fred Drake45cd9de2000-06-29 19:34:54 +00001"""
Martin v. Löwis3f1b5282003-01-25 16:51:50 +00002SAX driver for the pyexpat C module. This driver works with
Lars Gustäbelbb757132000-09-24 20:38:18 +00003pyexpat.__version__ == '2.22'.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Fred Drake45cd9de2000-06-29 19:34:54 +00006version = "0.20"
7
Thomas Wouters0e3f5912006-08-11 14:57:12 +00008from xml.sax._exceptions import *
9from xml.sax.handler import feature_validation, feature_namespaces
10from xml.sax.handler import feature_namespace_prefixes
11from xml.sax.handler import feature_external_ges, feature_external_pes
12from xml.sax.handler import feature_string_interning
13from xml.sax.handler import property_xml_string, property_interning_dict
Martin v. Löwisfb73bb12001-06-17 07:05:43 +000014
Thomas Wouters0e3f5912006-08-11 14:57:12 +000015# xml.parsers.expat does not raise ImportError in Jython
Martin v. Löwisfb73bb12001-06-17 07:05:43 +000016import sys
Fred Drakec974bf42001-07-30 22:41:23 +000017if sys.platform[:4] == "java":
Martin v. Löwisfb73bb12001-06-17 07:05:43 +000018 raise SAXReaderNotAvailable("expat not available in Java", None)
19del sys
20
Martin v. Löwis962c9e72000-10-06 17:41:52 +000021try:
Thomas Wouters0e3f5912006-08-11 14:57:12 +000022 from xml.parsers import expat
Brett Cannoncd171c82013-07-04 17:43:24 -040023except ImportError:
Fred Drakec974bf42001-07-30 22:41:23 +000024 raise SAXReaderNotAvailable("expat not supported", None)
Jeremy Hyltone3c37d62001-07-30 21:49:22 +000025else:
26 if not hasattr(expat, "ParserCreate"):
Fred Drakec974bf42001-07-30 22:41:23 +000027 raise SAXReaderNotAvailable("expat not supported", None)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000028from xml.sax import xmlreader, saxutils, handler
Fred Drake45cd9de2000-06-29 19:34:54 +000029
Lars Gustäbel32bf12e2000-09-24 18:39:23 +000030AttributesImpl = xmlreader.AttributesImpl
31AttributesNSImpl = xmlreader.AttributesNSImpl
32
Brett Cannoncd171c82013-07-04 17:43:24 -040033# If we're using a sufficiently recent version of Python, we can use
34# weak references to avoid cycles between the parser and content
35# handler, otherwise we'll just have to pretend.
36try:
37 import _weakref
38except ImportError:
39 def _mkproxy(o):
40 return o
41else:
42 import weakref
43 _mkproxy = weakref.proxy
44 del weakref, _weakref
Fred Drake012c81f2002-04-04 17:57:08 +000045
Serhiy Storchakaab914782015-05-06 09:36:06 +030046class _ClosedParser:
47 pass
48
Fred Drake012c81f2002-04-04 17:57:08 +000049# --- ExpatLocator
50
51class ExpatLocator(xmlreader.Locator):
52 """Locator for use with the ExpatParser class.
53
54 This uses a weak reference to the parser object to avoid creating
55 a circular reference between the parser and the content handler.
56 """
57 def __init__(self, parser):
Brett Cannoncd171c82013-07-04 17:43:24 -040058 self._ref = _mkproxy(parser)
Fred Drake012c81f2002-04-04 17:57:08 +000059
60 def getColumnNumber(self):
Martin v. Löwis18476a32002-06-30 07:21:24 +000061 parser = self._ref
62 if parser._parser is None:
Fred Drake012c81f2002-04-04 17:57:08 +000063 return None
64 return parser._parser.ErrorColumnNumber
65
66 def getLineNumber(self):
Martin v. Löwis18476a32002-06-30 07:21:24 +000067 parser = self._ref
68 if parser._parser is None:
Fred Drake012c81f2002-04-04 17:57:08 +000069 return 1
Fred Drakeda204da2002-04-04 19:12:31 +000070 return parser._parser.ErrorLineNumber
Fred Drake012c81f2002-04-04 17:57:08 +000071
72 def getPublicId(self):
Martin v. Löwis18476a32002-06-30 07:21:24 +000073 parser = self._ref
Fred Drake012c81f2002-04-04 17:57:08 +000074 if parser is None:
75 return None
76 return parser._source.getPublicId()
77
78 def getSystemId(self):
Martin v. Löwis18476a32002-06-30 07:21:24 +000079 parser = self._ref
Fred Drake012c81f2002-04-04 17:57:08 +000080 if parser is None:
81 return None
82 return parser._source.getSystemId()
83
Martin v. Löwis3f0969f2000-09-29 19:00:40 +000084
Fred Drake45cd9de2000-06-29 19:34:54 +000085# --- ExpatParser
86
Fred Drakeddb48672000-09-23 05:32:26 +000087class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
Martin v. Löwis3f1b5282003-01-25 16:51:50 +000088 """SAX driver for the pyexpat C module."""
Fred Drake45cd9de2000-06-29 19:34:54 +000089
90 def __init__(self, namespaceHandling=0, bufsize=2**16-20):
91 xmlreader.IncrementalParser.__init__(self, bufsize)
Lars Gustäbele292a242000-09-24 20:19:45 +000092 self._source = xmlreader.InputSource()
Fred Drake45cd9de2000-06-29 19:34:54 +000093 self._parser = None
94 self._namespaces = namespaceHandling
Martin v. Löwis05917252001-01-27 08:56:24 +000095 self._lex_handler_prop = None
Serhiy Storchakaeb897462019-09-01 12:11:43 +030096 self._parsing = False
Lars Gustäbele292a242000-09-24 20:19:45 +000097 self._entity_stack = []
Christian Heimes17b1d5d2018-09-23 09:50:25 +020098 self._external_ges = 0
Martin v. Löwis18476a32002-06-30 07:21:24 +000099 self._interning = None
Fred Drake45cd9de2000-06-29 19:34:54 +0000100
101 # XMLReader methods
102
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000103 def parse(self, source):
Lars Gustäbelbb757132000-09-24 20:38:18 +0000104 "Parse an XML document from a URL or an InputSource."
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000105 source = saxutils.prepare_input_source(source)
106
107 self._source = source
Victor Stinneref9c0e72017-05-05 09:46:47 +0200108 try:
109 self.reset()
110 self._cont_handler.setDocumentLocator(ExpatLocator(self))
111 xmlreader.IncrementalParser.parse(self, source)
112 except:
113 # bpo-30264: Close the source on error to not leak resources:
114 # xml.sax.parse() doesn't give access to the underlying parser
115 # to the caller
116 self._close_source()
117 raise
Fred Drake45cd9de2000-06-29 19:34:54 +0000118
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000119 def prepareParser(self, source):
Benjamin Peterson2a691a82008-03-31 01:51:45 +0000120 if source.getSystemId() is not None:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000121 self._parser.SetBase(source.getSystemId())
Fred Drake16f63292000-10-23 18:09:50 +0000122
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000123 # Redefined setContentHandler to allow changing handlers during parsing
Martin v. Löwisfb73bb12001-06-17 07:05:43 +0000124
125 def setContentHandler(self, handler):
126 xmlreader.IncrementalParser.setContentHandler(self, handler)
127 if self._parsing:
128 self._reset_cont_handler()
129
Fred Drake45cd9de2000-06-29 19:34:54 +0000130 def getFeature(self, name):
Martin v. Löwis18476a32002-06-30 07:21:24 +0000131 if name == feature_namespaces:
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000132 return self._namespaces
Martin v. Löwis18476a32002-06-30 07:21:24 +0000133 elif name == feature_string_interning:
134 return self._interning is not None
135 elif name in (feature_validation, feature_external_pes,
136 feature_namespace_prefixes):
137 return 0
138 elif name == feature_external_ges:
139 return self._external_ges
Fred Drake45cd9de2000-06-29 19:34:54 +0000140 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
141
142 def setFeature(self, name, state):
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000143 if self._parsing:
144 raise SAXNotSupportedException("Cannot set features while parsing")
Martin v. Löwis18476a32002-06-30 07:21:24 +0000145
146 if name == feature_namespaces:
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000147 self._namespaces = state
Martin v. Löwis18476a32002-06-30 07:21:24 +0000148 elif name == feature_external_ges:
149 self._external_ges = state
150 elif name == feature_string_interning:
151 if state:
152 if self._interning is None:
153 self._interning = {}
154 else:
155 self._interning = None
156 elif name == feature_validation:
157 if state:
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000158 raise SAXNotSupportedException(
159 "expat does not support validation")
Martin v. Löwis18476a32002-06-30 07:21:24 +0000160 elif name == feature_external_pes:
161 if state:
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000162 raise SAXNotSupportedException(
163 "expat does not read external parameter entities")
Martin v. Löwis18476a32002-06-30 07:21:24 +0000164 elif name == feature_namespace_prefixes:
165 if state:
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000166 raise SAXNotSupportedException(
167 "expat does not report namespace prefixes")
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000168 else:
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000169 raise SAXNotRecognizedException(
170 "Feature '%s' not recognized" % name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000171
172 def getProperty(self, name):
Martin v. Löwis05917252001-01-27 08:56:24 +0000173 if name == handler.property_lexical_handler:
174 return self._lex_handler_prop
Martin v. Löwis18476a32002-06-30 07:21:24 +0000175 elif name == property_interning_dict:
176 return self._interning
177 elif name == property_xml_string:
178 if self._parser:
179 if hasattr(self._parser, "GetInputContext"):
180 return self._parser.GetInputContext()
181 else:
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000182 raise SAXNotRecognizedException(
183 "This version of expat does not support getting"
184 " the XML string")
Martin v. Löwis18476a32002-06-30 07:21:24 +0000185 else:
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000186 raise SAXNotSupportedException(
187 "XML string cannot be returned when not parsing")
Fred Drake45cd9de2000-06-29 19:34:54 +0000188 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
189
190 def setProperty(self, name, value):
Martin v. Löwis05917252001-01-27 08:56:24 +0000191 if name == handler.property_lexical_handler:
192 self._lex_handler_prop = value
Martin v. Löwisfb73bb12001-06-17 07:05:43 +0000193 if self._parsing:
194 self._reset_lex_handler_prop()
Martin v. Löwis18476a32002-06-30 07:21:24 +0000195 elif name == property_interning_dict:
196 self._interning = value
197 elif name == property_xml_string:
198 raise SAXNotSupportedException("Property '%s' cannot be set" %
199 name)
Martin v. Löwis05917252001-01-27 08:56:24 +0000200 else:
Martin v. Löwis18476a32002-06-30 07:21:24 +0000201 raise SAXNotRecognizedException("Property '%s' not recognized" %
202 name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000203
204 # IncrementalParser methods
205
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300206 def feed(self, data, isFinal=False):
Fred Drake45cd9de2000-06-29 19:34:54 +0000207 if not self._parsing:
Fred Drake45cd9de2000-06-29 19:34:54 +0000208 self.reset()
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300209 self._parsing = True
Fred Drake45cd9de2000-06-29 19:34:54 +0000210 self._cont_handler.startDocument()
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000211
Martin v. Löwisee1dc152000-10-06 21:08:59 +0000212 try:
213 # The isFinal parameter is internal to the expat reader.
214 # If it is set to true, expat will check validity of the entire
215 # document. When feeding chunks, they are not normally final -
216 # except when invoked from close.
217 self._parser.Parse(data, isFinal)
Guido van Rossumb940e112007-01-10 16:19:56 +0000218 except expat.error as e:
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000219 exc = SAXParseException(expat.ErrorString(e.code), e, self)
Martin v. Löwis05917252001-01-27 08:56:24 +0000220 # FIXME: when to invoke error()?
Martin v. Löwis04f49432000-10-09 16:45:54 +0000221 self._err_handler.fatalError(exc)
Fred Drake45cd9de2000-06-29 19:34:54 +0000222
Victor Stinneref9c0e72017-05-05 09:46:47 +0200223 def _close_source(self):
224 source = self._source
225 try:
226 file = source.getCharacterStream()
227 if file is not None:
228 file.close()
229 finally:
230 file = source.getByteStream()
231 if file is not None:
232 file.close()
233
Fred Drake45cd9de2000-06-29 19:34:54 +0000234 def close(self):
Serhiy Storchakaab914782015-05-06 09:36:06 +0300235 if (self._entity_stack or self._parser is None or
236 isinstance(self._parser, _ClosedParser)):
Martin v. Löwisee1dc152000-10-06 21:08:59 +0000237 # If we are completing an external entity, do nothing here
238 return
Serhiy Storchaka61de0872015-04-02 21:00:13 +0300239 try:
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300240 self.feed(b"", isFinal=True)
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300241 self._cont_handler.endDocument()
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300242 self._parsing = False
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300243 # break cycle created by expat handlers pointing to our methods
244 self._parser = None
Serhiy Storchakaab914782015-05-06 09:36:06 +0300245 finally:
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300246 self._parsing = False
Serhiy Storchakaab914782015-05-06 09:36:06 +0300247 if self._parser is not None:
248 # Keep ErrorColumnNumber and ErrorLineNumber after closing.
249 parser = _ClosedParser()
250 parser.ErrorColumnNumber = self._parser.ErrorColumnNumber
251 parser.ErrorLineNumber = self._parser.ErrorLineNumber
252 self._parser = parser
Victor Stinneref9c0e72017-05-05 09:46:47 +0200253 self._close_source()
Fred Drake16f63292000-10-23 18:09:50 +0000254
Martin v. Löwisfb73bb12001-06-17 07:05:43 +0000255 def _reset_cont_handler(self):
256 self._parser.ProcessingInstructionHandler = \
257 self._cont_handler.processingInstruction
258 self._parser.CharacterDataHandler = self._cont_handler.characters
259
260 def _reset_lex_handler_prop(self):
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000261 lex = self._lex_handler_prop
262 parser = self._parser
263 if lex is None:
264 parser.CommentHandler = None
265 parser.StartCdataSectionHandler = None
266 parser.EndCdataSectionHandler = None
267 parser.StartDoctypeDeclHandler = None
268 parser.EndDoctypeDeclHandler = None
269 else:
270 parser.CommentHandler = lex.comment
271 parser.StartCdataSectionHandler = lex.startCDATA
272 parser.EndCdataSectionHandler = lex.endCDATA
273 parser.StartDoctypeDeclHandler = self.start_doctype_decl
274 parser.EndDoctypeDeclHandler = lex.endDTD
Martin v. Löwisfb73bb12001-06-17 07:05:43 +0000275
Fred Drake45cd9de2000-06-29 19:34:54 +0000276 def reset(self):
277 if self._namespaces:
Andrew M. Kuchling593d6b32005-12-04 19:53:45 +0000278 self._parser = expat.ParserCreate(self._source.getEncoding(), " ",
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000279 intern=self._interning)
280 self._parser.namespace_prefixes = 1
Fred Drake45cd9de2000-06-29 19:34:54 +0000281 self._parser.StartElementHandler = self.start_element_ns
282 self._parser.EndElementHandler = self.end_element_ns
283 else:
Andrew M. Kuchling593d6b32005-12-04 19:53:45 +0000284 self._parser = expat.ParserCreate(self._source.getEncoding(),
285 intern = self._interning)
Paul Prescod6c4753f2000-07-04 03:39:33 +0000286 self._parser.StartElementHandler = self.start_element
287 self._parser.EndElementHandler = self.end_element
Fred Drake45cd9de2000-06-29 19:34:54 +0000288
Martin v. Löwisfb73bb12001-06-17 07:05:43 +0000289 self._reset_cont_handler()
Fred Drake45cd9de2000-06-29 19:34:54 +0000290 self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
291 self._parser.NotationDeclHandler = self.notation_decl
292 self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
293 self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
Martin v. Löwis70d39a62001-01-27 09:01:20 +0000294
Martin v. Löwis05917252001-01-27 08:56:24 +0000295 self._decl_handler_prop = None
296 if self._lex_handler_prop:
Martin v. Löwisfb73bb12001-06-17 07:05:43 +0000297 self._reset_lex_handler_prop()
Martin v. Löwis70d39a62001-01-27 09:01:20 +0000298# self._parser.DefaultHandler =
299# self._parser.DefaultHandlerExpand =
300# self._parser.NotStandaloneHandler =
Fred Drake45cd9de2000-06-29 19:34:54 +0000301 self._parser.ExternalEntityRefHandler = self.external_entity_ref
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000302 try:
303 self._parser.SkippedEntityHandler = self.skipped_entity_handler
304 except AttributeError:
305 # This pyexpat does not support SkippedEntity
306 pass
307 self._parser.SetParamEntityParsing(
308 expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
Lars Gustäbelbb757132000-09-24 20:38:18 +0000309
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300310 self._parsing = False
Lars Gustäbelbb757132000-09-24 20:38:18 +0000311 self._entity_stack = []
Fred Drake16f63292000-10-23 18:09:50 +0000312
Fred Drake45cd9de2000-06-29 19:34:54 +0000313 # Locator methods
314
315 def getColumnNumber(self):
Martin v. Löwis05917252001-01-27 08:56:24 +0000316 if self._parser is None:
317 return None
Fred Drake45cd9de2000-06-29 19:34:54 +0000318 return self._parser.ErrorColumnNumber
319
320 def getLineNumber(self):
Martin v. Löwis05917252001-01-27 08:56:24 +0000321 if self._parser is None:
322 return 1
Fred Drake45cd9de2000-06-29 19:34:54 +0000323 return self._parser.ErrorLineNumber
324
325 def getPublicId(self):
326 return self._source.getPublicId()
327
328 def getSystemId(self):
Martin v. Löwisee1dc152000-10-06 21:08:59 +0000329 return self._source.getSystemId()
Fred Drake16f63292000-10-23 18:09:50 +0000330
Fred Drake45cd9de2000-06-29 19:34:54 +0000331 # event handlers
Fred Drake45cd9de2000-06-29 19:34:54 +0000332 def start_element(self, name, attrs):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000333 self._cont_handler.startElement(name, AttributesImpl(attrs))
Fred Drake45cd9de2000-06-29 19:34:54 +0000334
335 def end_element(self, name):
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000336 self._cont_handler.endElement(name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000337
338 def start_element_ns(self, name, attrs):
Neal Norwitzab199622002-05-31 20:46:39 +0000339 pair = name.split()
Fred Drake45cd9de2000-06-29 19:34:54 +0000340 if len(pair) == 1:
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000341 # no namespace
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000342 pair = (None, name)
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000343 elif len(pair) == 3:
344 pair = pair[0], pair[1]
Lars Gustäbeld2f5a9a2000-10-19 07:36:29 +0000345 else:
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000346 # default namespace
Lars Gustäbeld2f5a9a2000-10-19 07:36:29 +0000347 pair = tuple(pair)
Fred Drake45cd9de2000-06-29 19:34:54 +0000348
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000349 newattrs = {}
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000350 qnames = {}
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000351 for (aname, value) in attrs.items():
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000352 parts = aname.split()
353 length = len(parts)
354 if length == 1:
355 # no namespace
356 qname = aname
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000357 apair = (None, aname)
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000358 elif length == 3:
359 qname = "%s:%s" % (parts[2], parts[1])
360 apair = parts[0], parts[1]
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000361 else:
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000362 # default namespace
363 qname = parts[1]
364 apair = tuple(parts)
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000365
366 newattrs[apair] = value
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000367 qnames[apair] = qname
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000368
Fred Drake16f63292000-10-23 18:09:50 +0000369 self._cont_handler.startElementNS(pair, None,
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000370 AttributesNSImpl(newattrs, qnames))
Fred Drake45cd9de2000-06-29 19:34:54 +0000371
372 def end_element_ns(self, name):
Neal Norwitzab199622002-05-31 20:46:39 +0000373 pair = name.split()
Fred Drake45cd9de2000-06-29 19:34:54 +0000374 if len(pair) == 1:
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000375 pair = (None, name)
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000376 elif len(pair) == 3:
377 pair = pair[0], pair[1]
Martin v. Löwis05917252001-01-27 08:56:24 +0000378 else:
379 pair = tuple(pair)
Fred Drake16f63292000-10-23 18:09:50 +0000380
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000381 self._cont_handler.endElementNS(pair, None)
Fred Drake45cd9de2000-06-29 19:34:54 +0000382
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000383 # this is not used (call directly to ContentHandler)
Fred Drake45cd9de2000-06-29 19:34:54 +0000384 def processing_instruction(self, target, data):
385 self._cont_handler.processingInstruction(target, data)
386
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000387 # this is not used (call directly to ContentHandler)
Fred Drake45cd9de2000-06-29 19:34:54 +0000388 def character_data(self, data):
389 self._cont_handler.characters(data)
390
391 def start_namespace_decl(self, prefix, uri):
392 self._cont_handler.startPrefixMapping(prefix, uri)
393
394 def end_namespace_decl(self, prefix):
395 self._cont_handler.endPrefixMapping(prefix)
Fred Drake16f63292000-10-23 18:09:50 +0000396
Martin v. Löwis456ab1d2004-05-06 01:54:36 +0000397 def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000398 self._lex_handler_prop.startDTD(name, pubid, sysid)
399
Fred Drake45cd9de2000-06-29 19:34:54 +0000400 def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
401 self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
402
403 def notation_decl(self, name, base, sysid, pubid):
404 self._dtd_handler.notationDecl(name, pubid, sysid)
405
406 def external_entity_ref(self, context, base, sysid, pubid):
Martin v. Löwis18476a32002-06-30 07:21:24 +0000407 if not self._external_ges:
408 return 1
Martin v. Löwisd1b516c2002-06-30 07:27:30 +0000409
Fred Drake45cd9de2000-06-29 19:34:54 +0000410 source = self._ent_handler.resolveEntity(pubid, sysid)
Lars Gustäbele292a242000-09-24 20:19:45 +0000411 source = saxutils.prepare_input_source(source,
412 self._source.getSystemId() or
413 "")
Fred Drake16f63292000-10-23 18:09:50 +0000414
Lars Gustäbele292a242000-09-24 20:19:45 +0000415 self._entity_stack.append((self._parser, self._source))
416 self._parser = self._parser.ExternalEntityParserCreate(context)
417 self._source = source
418
419 try:
420 xmlreader.IncrementalParser.parse(self, source)
Lars Gustäbele292a242000-09-24 20:19:45 +0000421 except:
422 return 0 # FIXME: save error info here?
423
424 (self._parser, self._source) = self._entity_stack[-1]
425 del self._entity_stack[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +0000426 return 1
Fred Drake16f63292000-10-23 18:09:50 +0000427
Martin v. Löwis3f1b5282003-01-25 16:51:50 +0000428 def skipped_entity_handler(self, name, is_pe):
429 if is_pe:
430 # The SAX spec requires to report skipped PEs with a '%'
431 name = '%'+name
432 self._cont_handler.skippedEntity(name)
433
Fred Drake45cd9de2000-06-29 19:34:54 +0000434# ---
Fred Drake16f63292000-10-23 18:09:50 +0000435
Fred Drake45cd9de2000-06-29 19:34:54 +0000436def create_parser(*args, **kwargs):
Guido van Rossum68468eb2003-02-27 20:14:51 +0000437 return ExpatParser(*args, **kwargs)
Fred Drake16f63292000-10-23 18:09:50 +0000438
Fred Drake45cd9de2000-06-29 19:34:54 +0000439# ---
440
441if __name__ == "__main__":
Benjamin Peterson0289b152009-06-28 17:22:03 +0000442 import xml.sax.saxutils
Fred Drake45cd9de2000-06-29 19:34:54 +0000443 p = create_parser()
Benjamin Peterson0289b152009-06-28 17:22:03 +0000444 p.setContentHandler(xml.sax.saxutils.XMLGenerator())
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000445 p.setErrorHandler(xml.sax.ErrorHandler())
Benjamin Peterson0289b152009-06-28 17:22:03 +0000446 p.parse("http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml")