blob: d641c191f6f9e50ac590ebcbd9217aeb7656a88d [file] [log] [blame]
Fred Drake45cd9de2000-06-29 19:34:54 +00001"""
2SAX driver for the Pyexpat C module. This driver works with
Lars Gustäbelbb757132000-09-24 20:38:18 +00003pyexpat.__version__ == '2.22'.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Fred Drake45cd9de2000-06-29 19:34:54 +00006version = "0.20"
7
Lars Gustäbelf43cf312000-09-24 18:29:24 +00008from xml.sax._exceptions import *
Martin v. Löwisfb73bb12001-06-17 07:05:43 +00009
10# xml.parsers.expat does not raise ImportError in Jython
11import sys
Fred Drakec974bf42001-07-30 22:41:23 +000012if sys.platform[:4] == "java":
Martin v. Löwisfb73bb12001-06-17 07:05:43 +000013 raise SAXReaderNotAvailable("expat not available in Java", None)
14del sys
15
Martin v. Löwis962c9e72000-10-06 17:41:52 +000016try:
17 from xml.parsers import expat
18except ImportError:
Fred Drakec974bf42001-07-30 22:41:23 +000019 raise SAXReaderNotAvailable("expat not supported", None)
Jeremy Hyltone3c37d62001-07-30 21:49:22 +000020else:
21 if not hasattr(expat, "ParserCreate"):
Fred Drakec974bf42001-07-30 22:41:23 +000022 raise SAXReaderNotAvailable("expat not supported", None)
Martin v. Löwis2066fa02000-09-24 21:17:39 +000023from xml.sax import xmlreader, saxutils, handler
Fred Drake45cd9de2000-06-29 19:34:54 +000024
Lars Gustäbel32bf12e2000-09-24 18:39:23 +000025AttributesImpl = xmlreader.AttributesImpl
26AttributesNSImpl = xmlreader.AttributesNSImpl
27
Martin v. Löwis3f0969f2000-09-29 19:00:40 +000028import string
Fred Drake012c81f2002-04-04 17:57:08 +000029import weakref
30
31# --- ExpatLocator
32
33class ExpatLocator(xmlreader.Locator):
34 """Locator for use with the ExpatParser class.
35
36 This uses a weak reference to the parser object to avoid creating
37 a circular reference between the parser and the content handler.
38 """
39 def __init__(self, parser):
40 self._ref = weakref.ref(parser)
41
42 def getColumnNumber(self):
43 parser = self._ref()
44 if parser is None or parser._parser is None:
45 return None
46 return parser._parser.ErrorColumnNumber
47
48 def getLineNumber(self):
49 parser = self._ref()
50 if parser is None or parser._parser is None:
51 return 1
Fred Drakeda204da2002-04-04 19:12:31 +000052 return parser._parser.ErrorLineNumber
Fred Drake012c81f2002-04-04 17:57:08 +000053
54 def getPublicId(self):
55 parser = self._ref()
56 if parser is None:
57 return None
58 return parser._source.getPublicId()
59
60 def getSystemId(self):
61 parser = self._ref()
62 if parser is None:
63 return None
64 return parser._source.getSystemId()
65
Martin v. Löwis3f0969f2000-09-29 19:00:40 +000066
Fred Drake45cd9de2000-06-29 19:34:54 +000067# --- ExpatParser
68
Fred Drakeddb48672000-09-23 05:32:26 +000069class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
Fred Drake45cd9de2000-06-29 19:34:54 +000070 "SAX driver for the Pyexpat C module."
71
72 def __init__(self, namespaceHandling=0, bufsize=2**16-20):
73 xmlreader.IncrementalParser.__init__(self, bufsize)
Lars Gustäbele292a242000-09-24 20:19:45 +000074 self._source = xmlreader.InputSource()
Fred Drake45cd9de2000-06-29 19:34:54 +000075 self._parser = None
76 self._namespaces = namespaceHandling
Martin v. Löwis05917252001-01-27 08:56:24 +000077 self._lex_handler_prop = None
Fred Drake45cd9de2000-06-29 19:34:54 +000078 self._parsing = 0
Lars Gustäbele292a242000-09-24 20:19:45 +000079 self._entity_stack = []
Fred Drake45cd9de2000-06-29 19:34:54 +000080
81 # XMLReader methods
82
Lars Gustäbel523b0a62000-09-24 18:54:49 +000083 def parse(self, source):
Lars Gustäbelbb757132000-09-24 20:38:18 +000084 "Parse an XML document from a URL or an InputSource."
Lars Gustäbel523b0a62000-09-24 18:54:49 +000085 source = saxutils.prepare_input_source(source)
86
87 self._source = source
Fred Drake45cd9de2000-06-29 19:34:54 +000088 self.reset()
Fred Drake012c81f2002-04-04 17:57:08 +000089 self._cont_handler.setDocumentLocator(ExpatLocator(self))
Fred Drake16f63292000-10-23 18:09:50 +000090 xmlreader.IncrementalParser.parse(self, source)
Fred Drake45cd9de2000-06-29 19:34:54 +000091
Lars Gustäbel523b0a62000-09-24 18:54:49 +000092 def prepareParser(self, source):
93 if source.getSystemId() != None:
94 self._parser.SetBase(source.getSystemId())
Fred Drake16f63292000-10-23 18:09:50 +000095
Martin v. Löwisfb73bb12001-06-17 07:05:43 +000096 # Redefined setContentHandle to allow changing handlers during parsing
97
98 def setContentHandler(self, handler):
99 xmlreader.IncrementalParser.setContentHandler(self, handler)
100 if self._parsing:
101 self._reset_cont_handler()
102
Fred Drake45cd9de2000-06-29 19:34:54 +0000103 def getFeature(self, name):
Martin v. Löwis2066fa02000-09-24 21:17:39 +0000104 if name == handler.feature_namespaces:
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000105 return self._namespaces
Fred Drake45cd9de2000-06-29 19:34:54 +0000106 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
107
108 def setFeature(self, name, state):
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000109 if self._parsing:
110 raise SAXNotSupportedException("Cannot set features while parsing")
Martin v. Löwis2066fa02000-09-24 21:17:39 +0000111 if name == handler.feature_namespaces:
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000112 self._namespaces = state
113 else:
114 raise SAXNotRecognizedException("Feature '%s' not recognized" %
115 name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000116
117 def getProperty(self, name):
Martin v. Löwis05917252001-01-27 08:56:24 +0000118 if name == handler.property_lexical_handler:
119 return self._lex_handler_prop
Fred Drake45cd9de2000-06-29 19:34:54 +0000120 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
121
122 def setProperty(self, name, value):
Martin v. Löwis05917252001-01-27 08:56:24 +0000123 if name == handler.property_lexical_handler:
124 self._lex_handler_prop = value
Martin v. Löwisfb73bb12001-06-17 07:05:43 +0000125 if self._parsing:
126 self._reset_lex_handler_prop()
Martin v. Löwis05917252001-01-27 08:56:24 +0000127 else:
128 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000129
130 # IncrementalParser methods
131
Martin v. Löwisee1dc152000-10-06 21:08:59 +0000132 def feed(self, data, isFinal = 0):
Fred Drake45cd9de2000-06-29 19:34:54 +0000133 if not self._parsing:
Fred Drake45cd9de2000-06-29 19:34:54 +0000134 self.reset()
Lars Gustäbel55b4efd2000-10-14 10:28:01 +0000135 self._parsing = 1
Fred Drake45cd9de2000-06-29 19:34:54 +0000136 self._cont_handler.startDocument()
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000137
Martin v. Löwisee1dc152000-10-06 21:08:59 +0000138 try:
139 # The isFinal parameter is internal to the expat reader.
140 # If it is set to true, expat will check validity of the entire
141 # document. When feeding chunks, they are not normally final -
142 # except when invoked from close.
143 self._parser.Parse(data, isFinal)
144 except expat.error:
145 error_code = self._parser.ErrorCode
Martin v. Löwis04f49432000-10-09 16:45:54 +0000146 exc = SAXParseException(expat.ErrorString(error_code), None, self)
Martin v. Löwis05917252001-01-27 08:56:24 +0000147 # FIXME: when to invoke error()?
Martin v. Löwis04f49432000-10-09 16:45:54 +0000148 self._err_handler.fatalError(exc)
Fred Drake45cd9de2000-06-29 19:34:54 +0000149
150 def close(self):
Martin v. Löwisee1dc152000-10-06 21:08:59 +0000151 if self._entity_stack:
152 # If we are completing an external entity, do nothing here
153 return
154 self.feed("", isFinal = 1)
155 self._cont_handler.endDocument()
156 self._parsing = 0
Martin v. Löwis05917252001-01-27 08:56:24 +0000157 # break cycle created by expat handlers pointing to our methods
158 self._parser = None
Fred Drake16f63292000-10-23 18:09:50 +0000159
Martin v. Löwisfb73bb12001-06-17 07:05:43 +0000160 def _reset_cont_handler(self):
161 self._parser.ProcessingInstructionHandler = \
162 self._cont_handler.processingInstruction
163 self._parser.CharacterDataHandler = self._cont_handler.characters
164
165 def _reset_lex_handler_prop(self):
166 self._parser.CommentHandler = self._lex_handler_prop.comment
167 self._parser.StartCdataSectionHandler = self._lex_handler_prop.startCDATA
168 self._parser.EndCdataSectionHandler = self._lex_handler_prop.endCDATA
169
Fred Drake45cd9de2000-06-29 19:34:54 +0000170 def reset(self):
171 if self._namespaces:
Fred Drake96ea1962000-09-23 04:49:30 +0000172 self._parser = expat.ParserCreate(None, " ")
Fred Drake45cd9de2000-06-29 19:34:54 +0000173 self._parser.StartElementHandler = self.start_element_ns
174 self._parser.EndElementHandler = self.end_element_ns
175 else:
Fred Drake96ea1962000-09-23 04:49:30 +0000176 self._parser = expat.ParserCreate()
Paul Prescod6c4753f2000-07-04 03:39:33 +0000177 self._parser.StartElementHandler = self.start_element
178 self._parser.EndElementHandler = self.end_element
Fred Drake45cd9de2000-06-29 19:34:54 +0000179
Martin v. Löwisfb73bb12001-06-17 07:05:43 +0000180 self._reset_cont_handler()
Fred Drake45cd9de2000-06-29 19:34:54 +0000181 self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
182 self._parser.NotationDeclHandler = self.notation_decl
183 self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
184 self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
Martin v. Löwis70d39a62001-01-27 09:01:20 +0000185
Martin v. Löwis05917252001-01-27 08:56:24 +0000186 self._decl_handler_prop = None
187 if self._lex_handler_prop:
Martin v. Löwisfb73bb12001-06-17 07:05:43 +0000188 self._reset_lex_handler_prop()
Martin v. Löwis70d39a62001-01-27 09:01:20 +0000189# self._parser.DefaultHandler =
190# self._parser.DefaultHandlerExpand =
191# self._parser.NotStandaloneHandler =
Fred Drake45cd9de2000-06-29 19:34:54 +0000192 self._parser.ExternalEntityRefHandler = self.external_entity_ref
Lars Gustäbelbb757132000-09-24 20:38:18 +0000193
Lars Gustäbel55b4efd2000-10-14 10:28:01 +0000194 self._parsing = 0
Lars Gustäbelbb757132000-09-24 20:38:18 +0000195 self._entity_stack = []
Fred Drake16f63292000-10-23 18:09:50 +0000196
Fred Drake45cd9de2000-06-29 19:34:54 +0000197 # Locator methods
198
199 def getColumnNumber(self):
Martin v. Löwis05917252001-01-27 08:56:24 +0000200 if self._parser is None:
201 return None
Fred Drake45cd9de2000-06-29 19:34:54 +0000202 return self._parser.ErrorColumnNumber
203
204 def getLineNumber(self):
Martin v. Löwis05917252001-01-27 08:56:24 +0000205 if self._parser is None:
206 return 1
Fred Drake45cd9de2000-06-29 19:34:54 +0000207 return self._parser.ErrorLineNumber
208
209 def getPublicId(self):
210 return self._source.getPublicId()
211
212 def getSystemId(self):
Martin v. Löwisee1dc152000-10-06 21:08:59 +0000213 return self._source.getSystemId()
Fred Drake16f63292000-10-23 18:09:50 +0000214
Fred Drake45cd9de2000-06-29 19:34:54 +0000215 # event handlers
Fred Drake45cd9de2000-06-29 19:34:54 +0000216 def start_element(self, name, attrs):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000217 self._cont_handler.startElement(name, AttributesImpl(attrs))
Fred Drake45cd9de2000-06-29 19:34:54 +0000218
219 def end_element(self, name):
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000220 self._cont_handler.endElement(name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000221
222 def start_element_ns(self, name, attrs):
Martin v. Löwis3f0969f2000-09-29 19:00:40 +0000223 pair = string.split(name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000224 if len(pair) == 1:
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000225 pair = (None, name)
Lars Gustäbeld2f5a9a2000-10-19 07:36:29 +0000226 else:
227 pair = tuple(pair)
Fred Drake45cd9de2000-06-29 19:34:54 +0000228
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000229 newattrs = {}
230 for (aname, value) in attrs.items():
Martin v. Löwis3f0969f2000-09-29 19:00:40 +0000231 apair = string.split(aname)
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000232 if len(apair) == 1:
233 apair = (None, aname)
234 else:
235 apair = tuple(apair)
236
237 newattrs[apair] = value
238
Fred Drake16f63292000-10-23 18:09:50 +0000239 self._cont_handler.startElementNS(pair, None,
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000240 AttributesNSImpl(newattrs, {}))
Fred Drake45cd9de2000-06-29 19:34:54 +0000241
242 def end_element_ns(self, name):
Martin v. Löwis3f0969f2000-09-29 19:00:40 +0000243 pair = string.split(name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000244 if len(pair) == 1:
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000245 pair = (None, name)
Martin v. Löwis05917252001-01-27 08:56:24 +0000246 else:
247 pair = tuple(pair)
Fred Drake16f63292000-10-23 18:09:50 +0000248
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000249 self._cont_handler.endElementNS(pair, None)
Fred Drake45cd9de2000-06-29 19:34:54 +0000250
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000251 # this is not used (call directly to ContentHandler)
Fred Drake45cd9de2000-06-29 19:34:54 +0000252 def processing_instruction(self, target, data):
253 self._cont_handler.processingInstruction(target, data)
254
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000255 # this is not used (call directly to ContentHandler)
Fred Drake45cd9de2000-06-29 19:34:54 +0000256 def character_data(self, data):
257 self._cont_handler.characters(data)
258
259 def start_namespace_decl(self, prefix, uri):
260 self._cont_handler.startPrefixMapping(prefix, uri)
261
262 def end_namespace_decl(self, prefix):
263 self._cont_handler.endPrefixMapping(prefix)
Fred Drake16f63292000-10-23 18:09:50 +0000264
Fred Drake45cd9de2000-06-29 19:34:54 +0000265 def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
266 self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
267
268 def notation_decl(self, name, base, sysid, pubid):
269 self._dtd_handler.notationDecl(name, pubid, sysid)
270
271 def external_entity_ref(self, context, base, sysid, pubid):
Fred Drake45cd9de2000-06-29 19:34:54 +0000272 source = self._ent_handler.resolveEntity(pubid, sysid)
Lars Gustäbele292a242000-09-24 20:19:45 +0000273 source = saxutils.prepare_input_source(source,
274 self._source.getSystemId() or
275 "")
Fred Drake16f63292000-10-23 18:09:50 +0000276
Lars Gustäbele292a242000-09-24 20:19:45 +0000277 self._entity_stack.append((self._parser, self._source))
278 self._parser = self._parser.ExternalEntityParserCreate(context)
279 self._source = source
280
281 try:
282 xmlreader.IncrementalParser.parse(self, source)
Lars Gustäbele292a242000-09-24 20:19:45 +0000283 except:
284 return 0 # FIXME: save error info here?
285
286 (self._parser, self._source) = self._entity_stack[-1]
287 del self._entity_stack[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +0000288 return 1
Fred Drake16f63292000-10-23 18:09:50 +0000289
Fred Drake45cd9de2000-06-29 19:34:54 +0000290# ---
Fred Drake16f63292000-10-23 18:09:50 +0000291
Fred Drake45cd9de2000-06-29 19:34:54 +0000292def create_parser(*args, **kwargs):
Fred Drakeddb48672000-09-23 05:32:26 +0000293 return apply(ExpatParser, args, kwargs)
Fred Drake16f63292000-10-23 18:09:50 +0000294
Fred Drake45cd9de2000-06-29 19:34:54 +0000295# ---
296
297if __name__ == "__main__":
298 import xml.sax
299 p = create_parser()
300 p.setContentHandler(xml.sax.XMLGenerator())
301 p.setErrorHandler(xml.sax.ErrorHandler())
302 p.parse("../../../hamlet.xml")