blob: 1414bb5f68702f7e20900d85d6d36f7a9bd628a4 [file] [log] [blame]
Fred Drake45cd9de2000-06-29 19:34:54 +00001"""
2SAX driver for the Pyexpat C module. This driver works with
Lars Gustäbelbb757132000-09-24 20:38:18 +00003pyexpat.__version__ == '2.22'.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Fred Drake45cd9de2000-06-29 19:34:54 +00006version = "0.20"
7
Lars Gustäbelf43cf312000-09-24 18:29:24 +00008from xml.sax._exceptions import *
Martin v. Löwis962c9e72000-10-06 17:41:52 +00009try:
10 from xml.parsers import expat
11except ImportError:
12 raise SAXReaderNotAvailable("expat not supported",None)
Martin v. Löwis2066fa02000-09-24 21:17:39 +000013from xml.sax import xmlreader, saxutils, handler
Fred Drake45cd9de2000-06-29 19:34:54 +000014
Lars Gustäbel32bf12e2000-09-24 18:39:23 +000015AttributesImpl = xmlreader.AttributesImpl
16AttributesNSImpl = xmlreader.AttributesNSImpl
17
Martin v. Löwis3f0969f2000-09-29 19:00:40 +000018import string
19
Fred Drake45cd9de2000-06-29 19:34:54 +000020# --- ExpatParser
21
Fred Drakeddb48672000-09-23 05:32:26 +000022class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
Fred Drake45cd9de2000-06-29 19:34:54 +000023 "SAX driver for the Pyexpat C module."
24
25 def __init__(self, namespaceHandling=0, bufsize=2**16-20):
26 xmlreader.IncrementalParser.__init__(self, bufsize)
Lars Gustäbele292a242000-09-24 20:19:45 +000027 self._source = xmlreader.InputSource()
Fred Drake45cd9de2000-06-29 19:34:54 +000028 self._parser = None
29 self._namespaces = namespaceHandling
30 self._parsing = 0
Lars Gustäbele292a242000-09-24 20:19:45 +000031 self._entity_stack = []
Fred Drake45cd9de2000-06-29 19:34:54 +000032
33 # XMLReader methods
34
Lars Gustäbel523b0a62000-09-24 18:54:49 +000035 def parse(self, source):
Lars Gustäbelbb757132000-09-24 20:38:18 +000036 "Parse an XML document from a URL or an InputSource."
Lars Gustäbel523b0a62000-09-24 18:54:49 +000037 source = saxutils.prepare_input_source(source)
38
39 self._source = source
Fred Drake45cd9de2000-06-29 19:34:54 +000040 self.reset()
41 self._cont_handler.setDocumentLocator(self)
Martin v. Löwisee1dc152000-10-06 21:08:59 +000042 xmlreader.IncrementalParser.parse(self, source)
Fred Drake45cd9de2000-06-29 19:34:54 +000043
Lars Gustäbel523b0a62000-09-24 18:54:49 +000044 def prepareParser(self, source):
45 if source.getSystemId() != None:
46 self._parser.SetBase(source.getSystemId())
Fred Drake45cd9de2000-06-29 19:34:54 +000047
48 def getFeature(self, name):
Martin v. Löwis2066fa02000-09-24 21:17:39 +000049 if name == handler.feature_namespaces:
Lars Gustäbelf43cf312000-09-24 18:29:24 +000050 return self._namespaces
Fred Drake45cd9de2000-06-29 19:34:54 +000051 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
52
53 def setFeature(self, name, state):
Lars Gustäbelf43cf312000-09-24 18:29:24 +000054 if self._parsing:
55 raise SAXNotSupportedException("Cannot set features while parsing")
Martin v. Löwis2066fa02000-09-24 21:17:39 +000056 if name == handler.feature_namespaces:
Lars Gustäbelf43cf312000-09-24 18:29:24 +000057 self._namespaces = state
58 else:
59 raise SAXNotRecognizedException("Feature '%s' not recognized" %
60 name)
Fred Drake45cd9de2000-06-29 19:34:54 +000061
62 def getProperty(self, name):
Fred Drake45cd9de2000-06-29 19:34:54 +000063 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
64
65 def setProperty(self, name, value):
Fred Drake45cd9de2000-06-29 19:34:54 +000066 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
67
68 # IncrementalParser methods
69
Martin v. Löwisee1dc152000-10-06 21:08:59 +000070 def feed(self, data, isFinal = 0):
Fred Drake45cd9de2000-06-29 19:34:54 +000071 if not self._parsing:
Fred Drakeddb48672000-09-23 05:32:26 +000072 self._parsing = 1
Fred Drake45cd9de2000-06-29 19:34:54 +000073 self.reset()
74 self._cont_handler.startDocument()
Lars Gustäbelf43cf312000-09-24 18:29:24 +000075
Martin v. Löwisee1dc152000-10-06 21:08:59 +000076 try:
77 # The isFinal parameter is internal to the expat reader.
78 # If it is set to true, expat will check validity of the entire
79 # document. When feeding chunks, they are not normally final -
80 # except when invoked from close.
81 self._parser.Parse(data, isFinal)
82 except expat.error:
83 error_code = self._parser.ErrorCode
Martin v. Löwis04f49432000-10-09 16:45:54 +000084 exc = SAXParseException(expat.ErrorString(error_code), None, self)
85 self._err_handler.fatalError(exc)
Fred Drake45cd9de2000-06-29 19:34:54 +000086
87 def close(self):
Martin v. Löwisee1dc152000-10-06 21:08:59 +000088 if self._entity_stack:
89 # If we are completing an external entity, do nothing here
90 return
91 self.feed("", isFinal = 1)
92 self._cont_handler.endDocument()
93 self._parsing = 0
Fred Drake45cd9de2000-06-29 19:34:54 +000094
95 def reset(self):
96 if self._namespaces:
Fred Drake96ea1962000-09-23 04:49:30 +000097 self._parser = expat.ParserCreate(None, " ")
Fred Drake45cd9de2000-06-29 19:34:54 +000098 self._parser.StartElementHandler = self.start_element_ns
99 self._parser.EndElementHandler = self.end_element_ns
100 else:
Fred Drake96ea1962000-09-23 04:49:30 +0000101 self._parser = expat.ParserCreate()
Paul Prescod6c4753f2000-07-04 03:39:33 +0000102 self._parser.StartElementHandler = self.start_element
103 self._parser.EndElementHandler = self.end_element
Fred Drake45cd9de2000-06-29 19:34:54 +0000104
105 self._parser.ProcessingInstructionHandler = \
106 self._cont_handler.processingInstruction
107 self._parser.CharacterDataHandler = self._cont_handler.characters
108 self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
109 self._parser.NotationDeclHandler = self.notation_decl
110 self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
111 self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
112# self._parser.CommentHandler =
113# self._parser.StartCdataSectionHandler =
114# self._parser.EndCdataSectionHandler =
115# self._parser.DefaultHandler =
116# self._parser.DefaultHandlerExpand =
117# self._parser.NotStandaloneHandler =
118 self._parser.ExternalEntityRefHandler = self.external_entity_ref
Lars Gustäbelbb757132000-09-24 20:38:18 +0000119
Lars Gustäbelbb757132000-09-24 20:38:18 +0000120 self._entity_stack = []
121
Fred Drake45cd9de2000-06-29 19:34:54 +0000122 # Locator methods
123
124 def getColumnNumber(self):
125 return self._parser.ErrorColumnNumber
126
127 def getLineNumber(self):
128 return self._parser.ErrorLineNumber
129
130 def getPublicId(self):
131 return self._source.getPublicId()
132
133 def getSystemId(self):
Martin v. Löwisee1dc152000-10-06 21:08:59 +0000134 return self._source.getSystemId()
Fred Drake45cd9de2000-06-29 19:34:54 +0000135
Fred Drake45cd9de2000-06-29 19:34:54 +0000136 # event handlers
Fred Drake45cd9de2000-06-29 19:34:54 +0000137 def start_element(self, name, attrs):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000138 self._cont_handler.startElement(name, AttributesImpl(attrs))
Fred Drake45cd9de2000-06-29 19:34:54 +0000139
140 def end_element(self, name):
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000141 self._cont_handler.endElement(name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000142
143 def start_element_ns(self, name, attrs):
Martin v. Löwis3f0969f2000-09-29 19:00:40 +0000144 pair = string.split(name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000145 if len(pair) == 1:
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000146 pair = (None, name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000147
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000148 newattrs = {}
149 for (aname, value) in attrs.items():
Martin v. Löwis3f0969f2000-09-29 19:00:40 +0000150 apair = string.split(aname)
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000151 if len(apair) == 1:
152 apair = (None, aname)
153 else:
154 apair = tuple(apair)
155
156 newattrs[apair] = value
157
158 self._cont_handler.startElementNS(pair, None,
159 AttributesNSImpl(newattrs, {}))
Fred Drake45cd9de2000-06-29 19:34:54 +0000160
161 def end_element_ns(self, name):
Martin v. Löwis3f0969f2000-09-29 19:00:40 +0000162 pair = string.split(name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000163 if len(pair) == 1:
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000164 pair = (None, name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000165
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000166 self._cont_handler.endElementNS(pair, None)
Fred Drake45cd9de2000-06-29 19:34:54 +0000167
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000168 # this is not used (call directly to ContentHandler)
Fred Drake45cd9de2000-06-29 19:34:54 +0000169 def processing_instruction(self, target, data):
170 self._cont_handler.processingInstruction(target, data)
171
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000172 # this is not used (call directly to ContentHandler)
Fred Drake45cd9de2000-06-29 19:34:54 +0000173 def character_data(self, data):
174 self._cont_handler.characters(data)
175
176 def start_namespace_decl(self, prefix, uri):
177 self._cont_handler.startPrefixMapping(prefix, uri)
178
179 def end_namespace_decl(self, prefix):
180 self._cont_handler.endPrefixMapping(prefix)
181
182 def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
183 self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
184
185 def notation_decl(self, name, base, sysid, pubid):
186 self._dtd_handler.notationDecl(name, pubid, sysid)
187
188 def external_entity_ref(self, context, base, sysid, pubid):
Fred Drake45cd9de2000-06-29 19:34:54 +0000189 source = self._ent_handler.resolveEntity(pubid, sysid)
Lars Gustäbele292a242000-09-24 20:19:45 +0000190 source = saxutils.prepare_input_source(source,
191 self._source.getSystemId() or
192 "")
193
194 self._entity_stack.append((self._parser, self._source))
195 self._parser = self._parser.ExternalEntityParserCreate(context)
196 self._source = source
197
198 try:
199 xmlreader.IncrementalParser.parse(self, source)
Lars Gustäbele292a242000-09-24 20:19:45 +0000200 except:
201 return 0 # FIXME: save error info here?
202
203 (self._parser, self._source) = self._entity_stack[-1]
204 del self._entity_stack[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +0000205 return 1
206
207# ---
208
209def create_parser(*args, **kwargs):
Fred Drakeddb48672000-09-23 05:32:26 +0000210 return apply(ExpatParser, args, kwargs)
Fred Drake45cd9de2000-06-29 19:34:54 +0000211
212# ---
213
214if __name__ == "__main__":
215 import xml.sax
216 p = create_parser()
217 p.setContentHandler(xml.sax.XMLGenerator())
218 p.setErrorHandler(xml.sax.ErrorHandler())
219 p.parse("../../../hamlet.xml")