blob: b8a31ffffa8ce5daf24fe599cf15b5c0c0bc7b5f [file] [log] [blame]
Fred Drake45cd9de2000-06-29 19:34:54 +00001"""
2SAX driver for the Pyexpat C module. This driver works with
Lars Gustäbelbb757132000-09-24 20:38:18 +00003pyexpat.__version__ == '2.22'.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Fred Drake45cd9de2000-06-29 19:34:54 +00006version = "0.20"
7
Lars Gustäbelf43cf312000-09-24 18:29:24 +00008from xml.sax._exceptions import *
Martin v. Löwisfb73bb12001-06-17 07:05:43 +00009
10# xml.parsers.expat does not raise ImportError in Jython
11import sys
12if sys.platform[ : 4] == "java":
13 raise SAXReaderNotAvailable("expat not available in Java", None)
14del sys
15
Martin v. Löwis962c9e72000-10-06 17:41:52 +000016try:
17 from xml.parsers import expat
18except ImportError:
Fred Drake16f63292000-10-23 18:09:50 +000019 raise SAXReaderNotAvailable("expat not supported",None)
Martin v. Löwis2066fa02000-09-24 21:17:39 +000020from xml.sax import xmlreader, saxutils, handler
Fred Drake45cd9de2000-06-29 19:34:54 +000021
Lars Gustäbel32bf12e2000-09-24 18:39:23 +000022AttributesImpl = xmlreader.AttributesImpl
23AttributesNSImpl = xmlreader.AttributesNSImpl
24
Martin v. Löwis3f0969f2000-09-29 19:00:40 +000025import string
26
Fred Drake45cd9de2000-06-29 19:34:54 +000027# --- ExpatParser
28
Fred Drakeddb48672000-09-23 05:32:26 +000029class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
Fred Drake45cd9de2000-06-29 19:34:54 +000030 "SAX driver for the Pyexpat C module."
31
32 def __init__(self, namespaceHandling=0, bufsize=2**16-20):
33 xmlreader.IncrementalParser.__init__(self, bufsize)
Lars Gustäbele292a242000-09-24 20:19:45 +000034 self._source = xmlreader.InputSource()
Fred Drake45cd9de2000-06-29 19:34:54 +000035 self._parser = None
36 self._namespaces = namespaceHandling
Martin v. Löwis05917252001-01-27 08:56:24 +000037 self._lex_handler_prop = None
Fred Drake45cd9de2000-06-29 19:34:54 +000038 self._parsing = 0
Lars Gustäbele292a242000-09-24 20:19:45 +000039 self._entity_stack = []
Fred Drake45cd9de2000-06-29 19:34:54 +000040
41 # XMLReader methods
42
Lars Gustäbel523b0a62000-09-24 18:54:49 +000043 def parse(self, source):
Lars Gustäbelbb757132000-09-24 20:38:18 +000044 "Parse an XML document from a URL or an InputSource."
Lars Gustäbel523b0a62000-09-24 18:54:49 +000045 source = saxutils.prepare_input_source(source)
46
47 self._source = source
Fred Drake45cd9de2000-06-29 19:34:54 +000048 self.reset()
49 self._cont_handler.setDocumentLocator(self)
Fred Drake16f63292000-10-23 18:09:50 +000050 xmlreader.IncrementalParser.parse(self, source)
Fred Drake45cd9de2000-06-29 19:34:54 +000051
Lars Gustäbel523b0a62000-09-24 18:54:49 +000052 def prepareParser(self, source):
53 if source.getSystemId() != None:
54 self._parser.SetBase(source.getSystemId())
Fred Drake16f63292000-10-23 18:09:50 +000055
Martin v. Löwisfb73bb12001-06-17 07:05:43 +000056 # Redefined setContentHandle to allow changing handlers during parsing
57
58 def setContentHandler(self, handler):
59 xmlreader.IncrementalParser.setContentHandler(self, handler)
60 if self._parsing:
61 self._reset_cont_handler()
62
Fred Drake45cd9de2000-06-29 19:34:54 +000063 def getFeature(self, name):
Martin v. Löwis2066fa02000-09-24 21:17:39 +000064 if name == handler.feature_namespaces:
Lars Gustäbelf43cf312000-09-24 18:29:24 +000065 return self._namespaces
Fred Drake45cd9de2000-06-29 19:34:54 +000066 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
67
68 def setFeature(self, name, state):
Lars Gustäbelf43cf312000-09-24 18:29:24 +000069 if self._parsing:
70 raise SAXNotSupportedException("Cannot set features while parsing")
Martin v. Löwis2066fa02000-09-24 21:17:39 +000071 if name == handler.feature_namespaces:
Lars Gustäbelf43cf312000-09-24 18:29:24 +000072 self._namespaces = state
73 else:
74 raise SAXNotRecognizedException("Feature '%s' not recognized" %
75 name)
Fred Drake45cd9de2000-06-29 19:34:54 +000076
77 def getProperty(self, name):
Martin v. Löwis05917252001-01-27 08:56:24 +000078 if name == handler.property_lexical_handler:
79 return self._lex_handler_prop
Fred Drake45cd9de2000-06-29 19:34:54 +000080 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
81
82 def setProperty(self, name, value):
Martin v. Löwis05917252001-01-27 08:56:24 +000083 if name == handler.property_lexical_handler:
84 self._lex_handler_prop = value
Martin v. Löwisfb73bb12001-06-17 07:05:43 +000085 if self._parsing:
86 self._reset_lex_handler_prop()
Martin v. Löwis05917252001-01-27 08:56:24 +000087 else:
88 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
Fred Drake45cd9de2000-06-29 19:34:54 +000089
90 # IncrementalParser methods
91
Martin v. Löwisee1dc152000-10-06 21:08:59 +000092 def feed(self, data, isFinal = 0):
Fred Drake45cd9de2000-06-29 19:34:54 +000093 if not self._parsing:
Fred Drake45cd9de2000-06-29 19:34:54 +000094 self.reset()
Lars Gustäbel55b4efd2000-10-14 10:28:01 +000095 self._parsing = 1
Fred Drake45cd9de2000-06-29 19:34:54 +000096 self._cont_handler.startDocument()
Lars Gustäbelf43cf312000-09-24 18:29:24 +000097
Martin v. Löwisee1dc152000-10-06 21:08:59 +000098 try:
99 # The isFinal parameter is internal to the expat reader.
100 # If it is set to true, expat will check validity of the entire
101 # document. When feeding chunks, they are not normally final -
102 # except when invoked from close.
103 self._parser.Parse(data, isFinal)
104 except expat.error:
105 error_code = self._parser.ErrorCode
Martin v. Löwis04f49432000-10-09 16:45:54 +0000106 exc = SAXParseException(expat.ErrorString(error_code), None, self)
Martin v. Löwis05917252001-01-27 08:56:24 +0000107 # FIXME: when to invoke error()?
Martin v. Löwis04f49432000-10-09 16:45:54 +0000108 self._err_handler.fatalError(exc)
Fred Drake45cd9de2000-06-29 19:34:54 +0000109
110 def close(self):
Martin v. Löwisee1dc152000-10-06 21:08:59 +0000111 if self._entity_stack:
112 # If we are completing an external entity, do nothing here
113 return
114 self.feed("", isFinal = 1)
115 self._cont_handler.endDocument()
116 self._parsing = 0
Martin v. Löwis05917252001-01-27 08:56:24 +0000117 # break cycle created by expat handlers pointing to our methods
118 self._parser = None
Fred Drake16f63292000-10-23 18:09:50 +0000119
Martin v. Löwisfb73bb12001-06-17 07:05:43 +0000120 def _reset_cont_handler(self):
121 self._parser.ProcessingInstructionHandler = \
122 self._cont_handler.processingInstruction
123 self._parser.CharacterDataHandler = self._cont_handler.characters
124
125 def _reset_lex_handler_prop(self):
126 self._parser.CommentHandler = self._lex_handler_prop.comment
127 self._parser.StartCdataSectionHandler = self._lex_handler_prop.startCDATA
128 self._parser.EndCdataSectionHandler = self._lex_handler_prop.endCDATA
129
Fred Drake45cd9de2000-06-29 19:34:54 +0000130 def reset(self):
131 if self._namespaces:
Fred Drake96ea1962000-09-23 04:49:30 +0000132 self._parser = expat.ParserCreate(None, " ")
Fred Drake45cd9de2000-06-29 19:34:54 +0000133 self._parser.StartElementHandler = self.start_element_ns
134 self._parser.EndElementHandler = self.end_element_ns
135 else:
Fred Drake96ea1962000-09-23 04:49:30 +0000136 self._parser = expat.ParserCreate()
Paul Prescod6c4753f2000-07-04 03:39:33 +0000137 self._parser.StartElementHandler = self.start_element
138 self._parser.EndElementHandler = self.end_element
Fred Drake45cd9de2000-06-29 19:34:54 +0000139
Martin v. Löwisfb73bb12001-06-17 07:05:43 +0000140 self._reset_cont_handler()
Fred Drake45cd9de2000-06-29 19:34:54 +0000141 self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
142 self._parser.NotationDeclHandler = self.notation_decl
143 self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
144 self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
Martin v. Löwis70d39a62001-01-27 09:01:20 +0000145
Martin v. Löwis05917252001-01-27 08:56:24 +0000146 self._decl_handler_prop = None
147 if self._lex_handler_prop:
Martin v. Löwisfb73bb12001-06-17 07:05:43 +0000148 self._reset_lex_handler_prop()
Martin v. Löwis70d39a62001-01-27 09:01:20 +0000149# self._parser.DefaultHandler =
150# self._parser.DefaultHandlerExpand =
151# self._parser.NotStandaloneHandler =
Fred Drake45cd9de2000-06-29 19:34:54 +0000152 self._parser.ExternalEntityRefHandler = self.external_entity_ref
Lars Gustäbelbb757132000-09-24 20:38:18 +0000153
Lars Gustäbel55b4efd2000-10-14 10:28:01 +0000154 self._parsing = 0
Lars Gustäbelbb757132000-09-24 20:38:18 +0000155 self._entity_stack = []
Fred Drake16f63292000-10-23 18:09:50 +0000156
Fred Drake45cd9de2000-06-29 19:34:54 +0000157 # Locator methods
158
159 def getColumnNumber(self):
Martin v. Löwis05917252001-01-27 08:56:24 +0000160 if self._parser is None:
161 return None
Fred Drake45cd9de2000-06-29 19:34:54 +0000162 return self._parser.ErrorColumnNumber
163
164 def getLineNumber(self):
Martin v. Löwis05917252001-01-27 08:56:24 +0000165 if self._parser is None:
166 return 1
Fred Drake45cd9de2000-06-29 19:34:54 +0000167 return self._parser.ErrorLineNumber
168
169 def getPublicId(self):
170 return self._source.getPublicId()
171
172 def getSystemId(self):
Martin v. Löwisee1dc152000-10-06 21:08:59 +0000173 return self._source.getSystemId()
Fred Drake16f63292000-10-23 18:09:50 +0000174
Fred Drake45cd9de2000-06-29 19:34:54 +0000175 # event handlers
Fred Drake45cd9de2000-06-29 19:34:54 +0000176 def start_element(self, name, attrs):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000177 self._cont_handler.startElement(name, AttributesImpl(attrs))
Fred Drake45cd9de2000-06-29 19:34:54 +0000178
179 def end_element(self, name):
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000180 self._cont_handler.endElement(name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000181
182 def start_element_ns(self, name, attrs):
Martin v. Löwis3f0969f2000-09-29 19:00:40 +0000183 pair = string.split(name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000184 if len(pair) == 1:
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000185 pair = (None, name)
Lars Gustäbeld2f5a9a2000-10-19 07:36:29 +0000186 else:
187 pair = tuple(pair)
Fred Drake45cd9de2000-06-29 19:34:54 +0000188
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000189 newattrs = {}
190 for (aname, value) in attrs.items():
Martin v. Löwis3f0969f2000-09-29 19:00:40 +0000191 apair = string.split(aname)
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000192 if len(apair) == 1:
193 apair = (None, aname)
194 else:
195 apair = tuple(apair)
196
197 newattrs[apair] = value
198
Fred Drake16f63292000-10-23 18:09:50 +0000199 self._cont_handler.startElementNS(pair, None,
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000200 AttributesNSImpl(newattrs, {}))
Fred Drake45cd9de2000-06-29 19:34:54 +0000201
202 def end_element_ns(self, name):
Martin v. Löwis3f0969f2000-09-29 19:00:40 +0000203 pair = string.split(name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000204 if len(pair) == 1:
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000205 pair = (None, name)
Martin v. Löwis05917252001-01-27 08:56:24 +0000206 else:
207 pair = tuple(pair)
Fred Drake16f63292000-10-23 18:09:50 +0000208
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000209 self._cont_handler.endElementNS(pair, None)
Fred Drake45cd9de2000-06-29 19:34:54 +0000210
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000211 # this is not used (call directly to ContentHandler)
Fred Drake45cd9de2000-06-29 19:34:54 +0000212 def processing_instruction(self, target, data):
213 self._cont_handler.processingInstruction(target, data)
214
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000215 # this is not used (call directly to ContentHandler)
Fred Drake45cd9de2000-06-29 19:34:54 +0000216 def character_data(self, data):
217 self._cont_handler.characters(data)
218
219 def start_namespace_decl(self, prefix, uri):
220 self._cont_handler.startPrefixMapping(prefix, uri)
221
222 def end_namespace_decl(self, prefix):
223 self._cont_handler.endPrefixMapping(prefix)
Fred Drake16f63292000-10-23 18:09:50 +0000224
Fred Drake45cd9de2000-06-29 19:34:54 +0000225 def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
226 self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
227
228 def notation_decl(self, name, base, sysid, pubid):
229 self._dtd_handler.notationDecl(name, pubid, sysid)
230
231 def external_entity_ref(self, context, base, sysid, pubid):
Fred Drake45cd9de2000-06-29 19:34:54 +0000232 source = self._ent_handler.resolveEntity(pubid, sysid)
Lars Gustäbele292a242000-09-24 20:19:45 +0000233 source = saxutils.prepare_input_source(source,
234 self._source.getSystemId() or
235 "")
Fred Drake16f63292000-10-23 18:09:50 +0000236
Lars Gustäbele292a242000-09-24 20:19:45 +0000237 self._entity_stack.append((self._parser, self._source))
238 self._parser = self._parser.ExternalEntityParserCreate(context)
239 self._source = source
240
241 try:
242 xmlreader.IncrementalParser.parse(self, source)
Lars Gustäbele292a242000-09-24 20:19:45 +0000243 except:
244 return 0 # FIXME: save error info here?
245
246 (self._parser, self._source) = self._entity_stack[-1]
247 del self._entity_stack[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +0000248 return 1
Fred Drake16f63292000-10-23 18:09:50 +0000249
Fred Drake45cd9de2000-06-29 19:34:54 +0000250# ---
Fred Drake16f63292000-10-23 18:09:50 +0000251
Fred Drake45cd9de2000-06-29 19:34:54 +0000252def create_parser(*args, **kwargs):
Fred Drakeddb48672000-09-23 05:32:26 +0000253 return apply(ExpatParser, args, kwargs)
Fred Drake16f63292000-10-23 18:09:50 +0000254
Fred Drake45cd9de2000-06-29 19:34:54 +0000255# ---
256
257if __name__ == "__main__":
258 import xml.sax
259 p = create_parser()
260 p.setContentHandler(xml.sax.XMLGenerator())
261 p.setErrorHandler(xml.sax.ErrorHandler())
262 p.parse("../../../hamlet.xml")