blob: 2fc2b7c145033cd03fcf8b73275557fdb5a30501 [file] [log] [blame]
Fred Drake45cd9de2000-06-29 19:34:54 +00001"""
2SAX driver for the Pyexpat C module. This driver works with
Lars Gustäbelbb757132000-09-24 20:38:18 +00003pyexpat.__version__ == '2.22'.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Fred Drake45cd9de2000-06-29 19:34:54 +00006version = "0.20"
7
Lars Gustäbelf43cf312000-09-24 18:29:24 +00008from xml.sax._exceptions import *
Fred Drake96ea1962000-09-23 04:49:30 +00009from xml.parsers import expat
Martin v. Löwis2066fa02000-09-24 21:17:39 +000010from xml.sax import xmlreader, saxutils, handler
Fred Drake45cd9de2000-06-29 19:34:54 +000011
Lars Gustäbel32bf12e2000-09-24 18:39:23 +000012AttributesImpl = xmlreader.AttributesImpl
13AttributesNSImpl = xmlreader.AttributesNSImpl
14
Martin v. Löwis3f0969f2000-09-29 19:00:40 +000015import string
16
Fred Drake45cd9de2000-06-29 19:34:54 +000017# --- ExpatParser
18
Fred Drakeddb48672000-09-23 05:32:26 +000019class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
Fred Drake45cd9de2000-06-29 19:34:54 +000020 "SAX driver for the Pyexpat C module."
21
22 def __init__(self, namespaceHandling=0, bufsize=2**16-20):
23 xmlreader.IncrementalParser.__init__(self, bufsize)
Lars Gustäbele292a242000-09-24 20:19:45 +000024 self._source = xmlreader.InputSource()
Fred Drake45cd9de2000-06-29 19:34:54 +000025 self._parser = None
26 self._namespaces = namespaceHandling
27 self._parsing = 0
Lars Gustäbele292a242000-09-24 20:19:45 +000028 self._entity_stack = []
Fred Drake45cd9de2000-06-29 19:34:54 +000029
30 # XMLReader methods
31
Lars Gustäbel523b0a62000-09-24 18:54:49 +000032 def parse(self, source):
Lars Gustäbelbb757132000-09-24 20:38:18 +000033 "Parse an XML document from a URL or an InputSource."
Lars Gustäbel523b0a62000-09-24 18:54:49 +000034 source = saxutils.prepare_input_source(source)
35
36 self._source = source
Fred Drake45cd9de2000-06-29 19:34:54 +000037 self.reset()
38 self._cont_handler.setDocumentLocator(self)
39 try:
Lars Gustäbel523b0a62000-09-24 18:54:49 +000040 xmlreader.IncrementalParser.parse(self, source)
Fred Drake96ea1962000-09-23 04:49:30 +000041 except expat.error:
Fred Drake45cd9de2000-06-29 19:34:54 +000042 error_code = self._parser.ErrorCode
Lars Gustäbelf43cf312000-09-24 18:29:24 +000043 raise SAXParseException(expat.ErrorString(error_code), None, self)
Fred Drake45cd9de2000-06-29 19:34:54 +000044
Lars Gustäbelbb757132000-09-24 20:38:18 +000045 self._cont_handler.endDocument()
Fred Drake45cd9de2000-06-29 19:34:54 +000046
Lars Gustäbel523b0a62000-09-24 18:54:49 +000047 def prepareParser(self, source):
48 if source.getSystemId() != None:
49 self._parser.SetBase(source.getSystemId())
Fred Drake45cd9de2000-06-29 19:34:54 +000050
51 def getFeature(self, name):
Martin v. Löwis2066fa02000-09-24 21:17:39 +000052 if name == handler.feature_namespaces:
Lars Gustäbelf43cf312000-09-24 18:29:24 +000053 return self._namespaces
Fred Drake45cd9de2000-06-29 19:34:54 +000054 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
55
56 def setFeature(self, name, state):
Lars Gustäbelf43cf312000-09-24 18:29:24 +000057 if self._parsing:
58 raise SAXNotSupportedException("Cannot set features while parsing")
Martin v. Löwis2066fa02000-09-24 21:17:39 +000059 if name == handler.feature_namespaces:
Lars Gustäbelf43cf312000-09-24 18:29:24 +000060 self._namespaces = state
61 else:
62 raise SAXNotRecognizedException("Feature '%s' not recognized" %
63 name)
Fred Drake45cd9de2000-06-29 19:34:54 +000064
65 def getProperty(self, name):
Fred Drake45cd9de2000-06-29 19:34:54 +000066 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
67
68 def setProperty(self, name, value):
Fred Drake45cd9de2000-06-29 19:34:54 +000069 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
70
71 # IncrementalParser methods
72
73 def feed(self, data):
74 if not self._parsing:
Fred Drakeddb48672000-09-23 05:32:26 +000075 self._parsing = 1
Fred Drake45cd9de2000-06-29 19:34:54 +000076 self.reset()
77 self._cont_handler.startDocument()
Lars Gustäbelf43cf312000-09-24 18:29:24 +000078
79 if not self._parser.Parse(data, 0):
80 msg = pyexpat.ErrorString(self._parser.ErrorCode)
81 raise SAXParseException(msg, None, self)
Fred Drake45cd9de2000-06-29 19:34:54 +000082
83 def close(self):
84 if self._parsing:
85 self._cont_handler.endDocument()
Fred Drakeddb48672000-09-23 05:32:26 +000086 self._parsing = 0
Fred Drake45cd9de2000-06-29 19:34:54 +000087 self._parser.Parse("", 1)
88
89 def reset(self):
90 if self._namespaces:
Fred Drake96ea1962000-09-23 04:49:30 +000091 self._parser = expat.ParserCreate(None, " ")
Fred Drake45cd9de2000-06-29 19:34:54 +000092 self._parser.StartElementHandler = self.start_element_ns
93 self._parser.EndElementHandler = self.end_element_ns
94 else:
Fred Drake96ea1962000-09-23 04:49:30 +000095 self._parser = expat.ParserCreate()
Paul Prescod6c4753f2000-07-04 03:39:33 +000096 self._parser.StartElementHandler = self.start_element
97 self._parser.EndElementHandler = self.end_element
Fred Drake45cd9de2000-06-29 19:34:54 +000098
99 self._parser.ProcessingInstructionHandler = \
100 self._cont_handler.processingInstruction
101 self._parser.CharacterDataHandler = self._cont_handler.characters
102 self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
103 self._parser.NotationDeclHandler = self.notation_decl
104 self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
105 self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
106# self._parser.CommentHandler =
107# self._parser.StartCdataSectionHandler =
108# self._parser.EndCdataSectionHandler =
109# self._parser.DefaultHandler =
110# self._parser.DefaultHandlerExpand =
111# self._parser.NotStandaloneHandler =
112 self._parser.ExternalEntityRefHandler = self.external_entity_ref
Lars Gustäbelbb757132000-09-24 20:38:18 +0000113
Lars Gustäbelbb757132000-09-24 20:38:18 +0000114 self._entity_stack = []
115
Fred Drake45cd9de2000-06-29 19:34:54 +0000116 # Locator methods
117
118 def getColumnNumber(self):
119 return self._parser.ErrorColumnNumber
120
121 def getLineNumber(self):
122 return self._parser.ErrorLineNumber
123
124 def getPublicId(self):
125 return self._source.getPublicId()
126
127 def getSystemId(self):
128 return self._parser.GetBase()
129
Fred Drake45cd9de2000-06-29 19:34:54 +0000130 # event handlers
Fred Drake45cd9de2000-06-29 19:34:54 +0000131 def start_element(self, name, attrs):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000132 self._cont_handler.startElement(name, AttributesImpl(attrs))
Fred Drake45cd9de2000-06-29 19:34:54 +0000133
134 def end_element(self, name):
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000135 self._cont_handler.endElement(name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000136
137 def start_element_ns(self, name, attrs):
Martin v. Löwis3f0969f2000-09-29 19:00:40 +0000138 pair = string.split(name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000139 if len(pair) == 1:
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000140 pair = (None, name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000141
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000142 newattrs = {}
143 for (aname, value) in attrs.items():
Martin v. Löwis3f0969f2000-09-29 19:00:40 +0000144 apair = string.split(aname)
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000145 if len(apair) == 1:
146 apair = (None, aname)
147 else:
148 apair = tuple(apair)
149
150 newattrs[apair] = value
151
152 self._cont_handler.startElementNS(pair, None,
153 AttributesNSImpl(newattrs, {}))
Fred Drake45cd9de2000-06-29 19:34:54 +0000154
155 def end_element_ns(self, name):
Martin v. Löwis3f0969f2000-09-29 19:00:40 +0000156 pair = string.split(name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000157 if len(pair) == 1:
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000158 pair = (None, name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000159
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000160 self._cont_handler.endElementNS(pair, None)
Fred Drake45cd9de2000-06-29 19:34:54 +0000161
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000162 # this is not used (call directly to ContentHandler)
Fred Drake45cd9de2000-06-29 19:34:54 +0000163 def processing_instruction(self, target, data):
164 self._cont_handler.processingInstruction(target, data)
165
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000166 # this is not used (call directly to ContentHandler)
Fred Drake45cd9de2000-06-29 19:34:54 +0000167 def character_data(self, data):
168 self._cont_handler.characters(data)
169
170 def start_namespace_decl(self, prefix, uri):
171 self._cont_handler.startPrefixMapping(prefix, uri)
172
173 def end_namespace_decl(self, prefix):
174 self._cont_handler.endPrefixMapping(prefix)
175
176 def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
177 self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
178
179 def notation_decl(self, name, base, sysid, pubid):
180 self._dtd_handler.notationDecl(name, pubid, sysid)
181
182 def external_entity_ref(self, context, base, sysid, pubid):
Fred Drake45cd9de2000-06-29 19:34:54 +0000183 source = self._ent_handler.resolveEntity(pubid, sysid)
Lars Gustäbele292a242000-09-24 20:19:45 +0000184 source = saxutils.prepare_input_source(source,
185 self._source.getSystemId() or
186 "")
187
188 self._entity_stack.append((self._parser, self._source))
189 self._parser = self._parser.ExternalEntityParserCreate(context)
190 self._source = source
191
192 try:
193 xmlreader.IncrementalParser.parse(self, source)
194 self.close()
195 except:
196 return 0 # FIXME: save error info here?
197
198 (self._parser, self._source) = self._entity_stack[-1]
199 del self._entity_stack[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +0000200 return 1
201
202# ---
203
204def create_parser(*args, **kwargs):
Fred Drakeddb48672000-09-23 05:32:26 +0000205 return apply(ExpatParser, args, kwargs)
Fred Drake45cd9de2000-06-29 19:34:54 +0000206
207# ---
208
209if __name__ == "__main__":
210 import xml.sax
211 p = create_parser()
212 p.setContentHandler(xml.sax.XMLGenerator())
213 p.setErrorHandler(xml.sax.ErrorHandler())
214 p.parse("../../../hamlet.xml")