blob: 341efd35849c5923ac8a9f6b56f5b0cbbdbf634b [file] [log] [blame]
Fred Drake45cd9de2000-06-29 19:34:54 +00001"""
2SAX driver for the Pyexpat C module. This driver works with
3pyexpat.__version__ == '1.5'.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
6# Todo on driver:
7# - make it support external entities (wait for pyexpat.c)
8# - enable configuration between reset() and feed() calls
9# - support lexical events?
10# - proper inputsource handling
11# - properties and features
12
13# Todo on pyexpat.c:
14# - support XML_ExternalEntityParserCreate
15# - exceptions in callouts from pyexpat to python code lose position info
16
17version = "0.20"
18
Lars Gustäbelf43cf312000-09-24 18:29:24 +000019from xml.sax._exceptions import *
Fred Drake96ea1962000-09-23 04:49:30 +000020from xml.parsers import expat
Lars Gustäbel523b0a62000-09-24 18:54:49 +000021from xml.sax import xmlreader, saxutils
Fred Drake45cd9de2000-06-29 19:34:54 +000022
Lars Gustäbel32bf12e2000-09-24 18:39:23 +000023AttributesImpl = xmlreader.AttributesImpl
24AttributesNSImpl = xmlreader.AttributesNSImpl
25
Fred Drake45cd9de2000-06-29 19:34:54 +000026# --- ExpatParser
27
Fred Drakeddb48672000-09-23 05:32:26 +000028class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
Fred Drake45cd9de2000-06-29 19:34:54 +000029 "SAX driver for the Pyexpat C module."
30
31 def __init__(self, namespaceHandling=0, bufsize=2**16-20):
32 xmlreader.IncrementalParser.__init__(self, bufsize)
33 self._source = None
34 self._parser = None
35 self._namespaces = namespaceHandling
36 self._parsing = 0
37
38 # XMLReader methods
39
Lars Gustäbel523b0a62000-09-24 18:54:49 +000040 def parse(self, source):
Skip Montanaro26a79832000-07-06 02:56:36 +000041 "Parse an XML document from a URL."
Lars Gustäbel523b0a62000-09-24 18:54:49 +000042 source = saxutils.prepare_input_source(source)
43
44 self._source = source
Fred Drake45cd9de2000-06-29 19:34:54 +000045 self.reset()
46 self._cont_handler.setDocumentLocator(self)
47 try:
Lars Gustäbel523b0a62000-09-24 18:54:49 +000048 xmlreader.IncrementalParser.parse(self, source)
Fred Drake96ea1962000-09-23 04:49:30 +000049 except expat.error:
Fred Drake45cd9de2000-06-29 19:34:54 +000050 error_code = self._parser.ErrorCode
Lars Gustäbelf43cf312000-09-24 18:29:24 +000051 raise SAXParseException(expat.ErrorString(error_code), None, self)
Fred Drake45cd9de2000-06-29 19:34:54 +000052
53 self._cont_handler.endDocument()
54
Lars Gustäbel523b0a62000-09-24 18:54:49 +000055 def prepareParser(self, source):
56 if source.getSystemId() != None:
57 self._parser.SetBase(source.getSystemId())
Fred Drake45cd9de2000-06-29 19:34:54 +000058
59 def getFeature(self, name):
Lars Gustäbelf43cf312000-09-24 18:29:24 +000060 if name == feature_namespaces:
61 return self._namespaces
Fred Drake45cd9de2000-06-29 19:34:54 +000062 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
63
64 def setFeature(self, name, state):
Lars Gustäbelf43cf312000-09-24 18:29:24 +000065 if self._parsing:
66 raise SAXNotSupportedException("Cannot set features while parsing")
67 if name == feature_namespaces:
68 self._namespaces = state
69 else:
70 raise SAXNotRecognizedException("Feature '%s' not recognized" %
71 name)
Fred Drake45cd9de2000-06-29 19:34:54 +000072
73 def getProperty(self, name):
Fred Drake45cd9de2000-06-29 19:34:54 +000074 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
75
76 def setProperty(self, name, value):
Fred Drake45cd9de2000-06-29 19:34:54 +000077 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
78
79 # IncrementalParser methods
80
81 def feed(self, data):
82 if not self._parsing:
Fred Drakeddb48672000-09-23 05:32:26 +000083 self._parsing = 1
Fred Drake45cd9de2000-06-29 19:34:54 +000084 self.reset()
85 self._cont_handler.startDocument()
Lars Gustäbelf43cf312000-09-24 18:29:24 +000086
87 if not self._parser.Parse(data, 0):
88 msg = pyexpat.ErrorString(self._parser.ErrorCode)
89 raise SAXParseException(msg, None, self)
Fred Drake45cd9de2000-06-29 19:34:54 +000090
91 def close(self):
92 if self._parsing:
93 self._cont_handler.endDocument()
Fred Drakeddb48672000-09-23 05:32:26 +000094 self._parsing = 0
Fred Drake45cd9de2000-06-29 19:34:54 +000095 self._parser.Parse("", 1)
96
97 def reset(self):
98 if self._namespaces:
Fred Drake96ea1962000-09-23 04:49:30 +000099 self._parser = expat.ParserCreate(None, " ")
Fred Drake45cd9de2000-06-29 19:34:54 +0000100 self._parser.StartElementHandler = self.start_element_ns
101 self._parser.EndElementHandler = self.end_element_ns
102 else:
Fred Drake96ea1962000-09-23 04:49:30 +0000103 self._parser = expat.ParserCreate()
Paul Prescod6c4753f2000-07-04 03:39:33 +0000104 self._parser.StartElementHandler = self.start_element
105 self._parser.EndElementHandler = self.end_element
Fred Drake45cd9de2000-06-29 19:34:54 +0000106
107 self._parser.ProcessingInstructionHandler = \
108 self._cont_handler.processingInstruction
109 self._parser.CharacterDataHandler = self._cont_handler.characters
110 self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
111 self._parser.NotationDeclHandler = self.notation_decl
112 self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
113 self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
114# self._parser.CommentHandler =
115# self._parser.StartCdataSectionHandler =
116# self._parser.EndCdataSectionHandler =
117# self._parser.DefaultHandler =
118# self._parser.DefaultHandlerExpand =
119# self._parser.NotStandaloneHandler =
120 self._parser.ExternalEntityRefHandler = self.external_entity_ref
121
122 # Locator methods
123
124 def getColumnNumber(self):
125 return self._parser.ErrorColumnNumber
126
127 def getLineNumber(self):
128 return self._parser.ErrorLineNumber
129
130 def getPublicId(self):
131 return self._source.getPublicId()
132
133 def getSystemId(self):
134 return self._parser.GetBase()
135
Fred Drake45cd9de2000-06-29 19:34:54 +0000136 # event handlers
Fred Drake45cd9de2000-06-29 19:34:54 +0000137 def start_element(self, name, attrs):
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000138 self._cont_handler.startElement(name, AttributesImpl(attrs))
Fred Drake45cd9de2000-06-29 19:34:54 +0000139
140 def end_element(self, name):
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000141 self._cont_handler.endElement(name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000142
143 def start_element_ns(self, name, attrs):
Fred Drake96ea1962000-09-23 04:49:30 +0000144 pair = name.split()
Fred Drake45cd9de2000-06-29 19:34:54 +0000145 if len(pair) == 1:
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000146 pair = (None, name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000147
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000148 newattrs = {}
149 for (aname, value) in attrs.items():
150 apair = aname.split()
151 if len(apair) == 1:
152 apair = (None, aname)
153 else:
154 apair = tuple(apair)
155
156 newattrs[apair] = value
157
158 self._cont_handler.startElementNS(pair, None,
159 AttributesNSImpl(newattrs, {}))
Fred Drake45cd9de2000-06-29 19:34:54 +0000160
161 def end_element_ns(self, name):
Fred Drake96ea1962000-09-23 04:49:30 +0000162 pair = name.split()
Fred Drake45cd9de2000-06-29 19:34:54 +0000163 if len(pair) == 1:
Lars Gustäbel32bf12e2000-09-24 18:39:23 +0000164 pair = (None, name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000165
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000166 self._cont_handler.endElementNS(pair, None)
Fred Drake45cd9de2000-06-29 19:34:54 +0000167
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000168 # this is not used (call directly to ContentHandler)
Fred Drake45cd9de2000-06-29 19:34:54 +0000169 def processing_instruction(self, target, data):
170 self._cont_handler.processingInstruction(target, data)
171
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000172 # this is not used (call directly to ContentHandler)
Fred Drake45cd9de2000-06-29 19:34:54 +0000173 def character_data(self, data):
174 self._cont_handler.characters(data)
175
176 def start_namespace_decl(self, prefix, uri):
177 self._cont_handler.startPrefixMapping(prefix, uri)
178
179 def end_namespace_decl(self, prefix):
180 self._cont_handler.endPrefixMapping(prefix)
181
182 def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
183 self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
184
185 def notation_decl(self, name, base, sysid, pubid):
186 self._dtd_handler.notationDecl(name, pubid, sysid)
187
188 def external_entity_ref(self, context, base, sysid, pubid):
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000189 raise NotImplementedError()
Fred Drake45cd9de2000-06-29 19:34:54 +0000190 source = self._ent_handler.resolveEntity(pubid, sysid)
191 source = saxutils.prepare_input_source(source)
192 # FIXME: create new parser, stack self._source and self._parser
193 # FIXME: reuse code from self.parse(...)
194 return 1
195
196# ---
197
198def create_parser(*args, **kwargs):
Fred Drakeddb48672000-09-23 05:32:26 +0000199 return apply(ExpatParser, args, kwargs)
Fred Drake45cd9de2000-06-29 19:34:54 +0000200
201# ---
202
203if __name__ == "__main__":
204 import xml.sax
205 p = create_parser()
206 p.setContentHandler(xml.sax.XMLGenerator())
207 p.setErrorHandler(xml.sax.ErrorHandler())
208 p.parse("../../../hamlet.xml")