blob: 1d93542af9a818b5f6be85c52a7b5b57decb3aaf [file] [log] [blame]
Fred Drake45cd9de2000-06-29 19:34:54 +00001"""
2SAX driver for the Pyexpat C module. This driver works with
Lars Gustäbelbb757132000-09-24 20:38:18 +00003pyexpat.__version__ == '2.22'.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Fred Drake45cd9de2000-06-29 19:34:54 +00006version = "0.20"
7
Lars Gustäbelf43cf312000-09-24 18:29:24 +00008from xml.sax._exceptions import *
Fred Drake96ea1962000-09-23 04:49:30 +00009from xml.parsers import expat
Lars Gustäbel523b0a62000-09-24 18:54:49 +000010from xml.sax import xmlreader, saxutils
Fred Drake45cd9de2000-06-29 19:34:54 +000011
Lars Gustäbel32bf12e2000-09-24 18:39:23 +000012AttributesImpl = xmlreader.AttributesImpl
13AttributesNSImpl = xmlreader.AttributesNSImpl
14
Fred Drake45cd9de2000-06-29 19:34:54 +000015# --- ExpatParser
16
Fred Drakeddb48672000-09-23 05:32:26 +000017class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
Fred Drake45cd9de2000-06-29 19:34:54 +000018 "SAX driver for the Pyexpat C module."
19
20 def __init__(self, namespaceHandling=0, bufsize=2**16-20):
21 xmlreader.IncrementalParser.__init__(self, bufsize)
Lars Gustäbele292a242000-09-24 20:19:45 +000022 self._source = xmlreader.InputSource()
Fred Drake45cd9de2000-06-29 19:34:54 +000023 self._parser = None
24 self._namespaces = namespaceHandling
25 self._parsing = 0
Lars Gustäbele292a242000-09-24 20:19:45 +000026 self._entity_stack = []
Fred Drake45cd9de2000-06-29 19:34:54 +000027
28 # XMLReader methods
29
Lars Gustäbel523b0a62000-09-24 18:54:49 +000030 def parse(self, source):
Lars Gustäbelbb757132000-09-24 20:38:18 +000031 "Parse an XML document from a URL or an InputSource."
Lars Gustäbel523b0a62000-09-24 18:54:49 +000032 source = saxutils.prepare_input_source(source)
33
34 self._source = source
Fred Drake45cd9de2000-06-29 19:34:54 +000035 self.reset()
36 self._cont_handler.setDocumentLocator(self)
37 try:
Lars Gustäbel523b0a62000-09-24 18:54:49 +000038 xmlreader.IncrementalParser.parse(self, source)
Fred Drake96ea1962000-09-23 04:49:30 +000039 except expat.error:
Fred Drake45cd9de2000-06-29 19:34:54 +000040 error_code = self._parser.ErrorCode
Lars Gustäbelf43cf312000-09-24 18:29:24 +000041 raise SAXParseException(expat.ErrorString(error_code), None, self)
Fred Drake45cd9de2000-06-29 19:34:54 +000042
Lars Gustäbelbb757132000-09-24 20:38:18 +000043 self._cont_handler.endDocument()
Fred Drake45cd9de2000-06-29 19:34:54 +000044
Lars Gustäbel523b0a62000-09-24 18:54:49 +000045 def prepareParser(self, source):
46 if source.getSystemId() != None:
47 self._parser.SetBase(source.getSystemId())
Fred Drake45cd9de2000-06-29 19:34:54 +000048
49 def getFeature(self, name):
Lars Gustäbelf43cf312000-09-24 18:29:24 +000050 if name == feature_namespaces:
51 return self._namespaces
Fred Drake45cd9de2000-06-29 19:34:54 +000052 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
53
54 def setFeature(self, name, state):
Lars Gustäbelf43cf312000-09-24 18:29:24 +000055 if self._parsing:
56 raise SAXNotSupportedException("Cannot set features while parsing")
57 if name == feature_namespaces:
58 self._namespaces = state
59 else:
60 raise SAXNotRecognizedException("Feature '%s' not recognized" %
61 name)
Fred Drake45cd9de2000-06-29 19:34:54 +000062
63 def getProperty(self, name):
Fred Drake45cd9de2000-06-29 19:34:54 +000064 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
65
66 def setProperty(self, name, value):
Fred Drake45cd9de2000-06-29 19:34:54 +000067 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
68
69 # IncrementalParser methods
70
71 def feed(self, data):
72 if not self._parsing:
Fred Drakeddb48672000-09-23 05:32:26 +000073 self._parsing = 1
Fred Drake45cd9de2000-06-29 19:34:54 +000074 self.reset()
75 self._cont_handler.startDocument()
Lars Gustäbelf43cf312000-09-24 18:29:24 +000076
77 if not self._parser.Parse(data, 0):
78 msg = pyexpat.ErrorString(self._parser.ErrorCode)
79 raise SAXParseException(msg, None, self)
Fred Drake45cd9de2000-06-29 19:34:54 +000080
81 def close(self):
82 if self._parsing:
83 self._cont_handler.endDocument()
Fred Drakeddb48672000-09-23 05:32:26 +000084 self._parsing = 0
Fred Drake45cd9de2000-06-29 19:34:54 +000085 self._parser.Parse("", 1)
86
87 def reset(self):
88 if self._namespaces:
Fred Drake96ea1962000-09-23 04:49:30 +000089 self._parser = expat.ParserCreate(None, " ")
Fred Drake45cd9de2000-06-29 19:34:54 +000090 self._parser.StartElementHandler = self.start_element_ns
91 self._parser.EndElementHandler = self.end_element_ns
92 else:
Fred Drake96ea1962000-09-23 04:49:30 +000093 self._parser = expat.ParserCreate()
Paul Prescod6c4753f2000-07-04 03:39:33 +000094 self._parser.StartElementHandler = self.start_element
95 self._parser.EndElementHandler = self.end_element
Fred Drake45cd9de2000-06-29 19:34:54 +000096
97 self._parser.ProcessingInstructionHandler = \
98 self._cont_handler.processingInstruction
99 self._parser.CharacterDataHandler = self._cont_handler.characters
100 self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
101 self._parser.NotationDeclHandler = self.notation_decl
102 self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
103 self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
104# self._parser.CommentHandler =
105# self._parser.StartCdataSectionHandler =
106# self._parser.EndCdataSectionHandler =
107# self._parser.DefaultHandler =
108# self._parser.DefaultHandlerExpand =
109# self._parser.NotStandaloneHandler =
110 self._parser.ExternalEntityRefHandler = self.external_entity_ref
Lars Gustäbelbb757132000-09-24 20:38:18 +0000111
112 self._parsing = 0
113 self._entity_stack = []
114
Fred Drake45cd9de2000-06-29 19:34:54 +0000115 # Locator methods
116
117 def getColumnNumber(self):
118 return self._parser.ErrorColumnNumber
119
120 def getLineNumber(self):
121 return self._parser.ErrorLineNumber
122
123 def getPublicId(self):
124 return self._source.getPublicId()
125
126 def getSystemId(self):
127 return self._parser.GetBase()
128
Fred Drake45cd9de2000-06-29 19:34:54 +0000129 # event handlers
Fred Drake45cd9de2000-06-29 19:34:54 +0000130 def start_element(self, name, attrs):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000131 self._cont_handler.startElement(name, AttributesImpl(attrs))
Fred Drake45cd9de2000-06-29 19:34:54 +0000132
133 def end_element(self, name):
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000134 self._cont_handler.endElement(name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000135
136 def start_element_ns(self, name, attrs):
Fred Drake96ea1962000-09-23 04:49:30 +0000137 pair = name.split()
Fred Drake45cd9de2000-06-29 19:34:54 +0000138 if len(pair) == 1:
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000139 pair = (None, name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000140
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000141 newattrs = {}
142 for (aname, value) in attrs.items():
143 apair = aname.split()
144 if len(apair) == 1:
145 apair = (None, aname)
146 else:
147 apair = tuple(apair)
148
149 newattrs[apair] = value
150
151 self._cont_handler.startElementNS(pair, None,
152 AttributesNSImpl(newattrs, {}))
Fred Drake45cd9de2000-06-29 19:34:54 +0000153
154 def end_element_ns(self, name):
Fred Drake96ea1962000-09-23 04:49:30 +0000155 pair = name.split()
Fred Drake45cd9de2000-06-29 19:34:54 +0000156 if len(pair) == 1:
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000157 pair = (None, name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000158
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000159 self._cont_handler.endElementNS(pair, None)
Fred Drake45cd9de2000-06-29 19:34:54 +0000160
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000161 # this is not used (call directly to ContentHandler)
Fred Drake45cd9de2000-06-29 19:34:54 +0000162 def processing_instruction(self, target, data):
163 self._cont_handler.processingInstruction(target, data)
164
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000165 # this is not used (call directly to ContentHandler)
Fred Drake45cd9de2000-06-29 19:34:54 +0000166 def character_data(self, data):
167 self._cont_handler.characters(data)
168
169 def start_namespace_decl(self, prefix, uri):
170 self._cont_handler.startPrefixMapping(prefix, uri)
171
172 def end_namespace_decl(self, prefix):
173 self._cont_handler.endPrefixMapping(prefix)
174
175 def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
176 self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
177
178 def notation_decl(self, name, base, sysid, pubid):
179 self._dtd_handler.notationDecl(name, pubid, sysid)
180
181 def external_entity_ref(self, context, base, sysid, pubid):
Fred Drake45cd9de2000-06-29 19:34:54 +0000182 source = self._ent_handler.resolveEntity(pubid, sysid)
Lars Gustäbele292a242000-09-24 20:19:45 +0000183 source = saxutils.prepare_input_source(source,
184 self._source.getSystemId() or
185 "")
186
187 self._entity_stack.append((self._parser, self._source))
188 self._parser = self._parser.ExternalEntityParserCreate(context)
189 self._source = source
190
191 try:
192 xmlreader.IncrementalParser.parse(self, source)
193 self.close()
194 except:
195 return 0 # FIXME: save error info here?
196
197 (self._parser, self._source) = self._entity_stack[-1]
198 del self._entity_stack[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +0000199 return 1
200
201# ---
202
203def create_parser(*args, **kwargs):
Fred Drakeddb48672000-09-23 05:32:26 +0000204 return apply(ExpatParser, args, kwargs)
Fred Drake45cd9de2000-06-29 19:34:54 +0000205
206# ---
207
208if __name__ == "__main__":
209 import xml.sax
210 p = create_parser()
211 p.setContentHandler(xml.sax.XMLGenerator())
212 p.setErrorHandler(xml.sax.ErrorHandler())
213 p.parse("../../../hamlet.xml")