blob: 1120f17354499b6c8daddb6146f627063cfa2ab0 [file] [log] [blame]
Fred Drake45cd9de2000-06-29 19:34:54 +00001"""
2SAX driver for the Pyexpat C module. This driver works with
3pyexpat.__version__ == '1.5'.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
6# Todo on driver:
7# - make it support external entities (wait for pyexpat.c)
8# - enable configuration between reset() and feed() calls
9# - support lexical events?
10# - proper inputsource handling
11# - properties and features
12
13# Todo on pyexpat.c:
14# - support XML_ExternalEntityParserCreate
15# - exceptions in callouts from pyexpat to python code lose position info
16
17version = "0.20"
18
Lars Gustäbelf43cf312000-09-24 18:29:24 +000019from xml.sax._exceptions import *
Fred Drake96ea1962000-09-23 04:49:30 +000020from xml.parsers import expat
Fred Drake45cd9de2000-06-29 19:34:54 +000021from xml.sax import xmlreader
Fred Drake45cd9de2000-06-29 19:34:54 +000022
23# --- ExpatParser
24
Fred Drakeddb48672000-09-23 05:32:26 +000025class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
Fred Drake45cd9de2000-06-29 19:34:54 +000026 "SAX driver for the Pyexpat C module."
27
28 def __init__(self, namespaceHandling=0, bufsize=2**16-20):
29 xmlreader.IncrementalParser.__init__(self, bufsize)
30 self._source = None
31 self._parser = None
32 self._namespaces = namespaceHandling
33 self._parsing = 0
Lars Gustäbelf43cf312000-09-24 18:29:24 +000034 self._attrs = xmlreader.AttributesImpl({}, {})
Fred Drake45cd9de2000-06-29 19:34:54 +000035
36 # XMLReader methods
37
Fred Drakeddb48672000-09-23 05:32:26 +000038 def parse(self, stream_or_string):
Skip Montanaro26a79832000-07-06 02:56:36 +000039 "Parse an XML document from a URL."
Fred Drakeddb48672000-09-23 05:32:26 +000040 if type(stream_or_string) is type(""):
41 stream = open(stream_or_string)
Fred Drake45cd9de2000-06-29 19:34:54 +000042 else:
Fred Drakeddb48672000-09-23 05:32:26 +000043 stream = stream_or_string
Fred Drake45cd9de2000-06-29 19:34:54 +000044
45 self.reset()
46 self._cont_handler.setDocumentLocator(self)
47 try:
48 xmlreader.IncrementalParser.parse(self, stream)
Fred Drake96ea1962000-09-23 04:49:30 +000049 except expat.error:
Fred Drake45cd9de2000-06-29 19:34:54 +000050 error_code = self._parser.ErrorCode
Lars Gustäbelf43cf312000-09-24 18:29:24 +000051 raise SAXParseException(expat.ErrorString(error_code), None, self)
Fred Drake45cd9de2000-06-29 19:34:54 +000052
53 self._cont_handler.endDocument()
54
55 def prepareParser(self, filename=None):
56 self._source = filename
57
58 if self._source != None:
59 self._parser.SetBase(self._source)
60
61 def getFeature(self, name):
Lars Gustäbelf43cf312000-09-24 18:29:24 +000062 if name == feature_namespaces:
63 return self._namespaces
Fred Drake45cd9de2000-06-29 19:34:54 +000064 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
65
66 def setFeature(self, name, state):
Lars Gustäbelf43cf312000-09-24 18:29:24 +000067 if self._parsing:
68 raise SAXNotSupportedException("Cannot set features while parsing")
69 if name == feature_namespaces:
70 self._namespaces = state
71 else:
72 raise SAXNotRecognizedException("Feature '%s' not recognized" %
73 name)
Fred Drake45cd9de2000-06-29 19:34:54 +000074
75 def getProperty(self, name):
Fred Drake45cd9de2000-06-29 19:34:54 +000076 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
77
78 def setProperty(self, name, value):
Fred Drake45cd9de2000-06-29 19:34:54 +000079 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
80
81 # IncrementalParser methods
82
83 def feed(self, data):
84 if not self._parsing:
Fred Drakeddb48672000-09-23 05:32:26 +000085 self._parsing = 1
Fred Drake45cd9de2000-06-29 19:34:54 +000086 self.reset()
87 self._cont_handler.startDocument()
Lars Gustäbelf43cf312000-09-24 18:29:24 +000088
89 if not self._parser.Parse(data, 0):
90 msg = pyexpat.ErrorString(self._parser.ErrorCode)
91 raise SAXParseException(msg, None, self)
Fred Drake45cd9de2000-06-29 19:34:54 +000092
93 def close(self):
94 if self._parsing:
95 self._cont_handler.endDocument()
Fred Drakeddb48672000-09-23 05:32:26 +000096 self._parsing = 0
Fred Drake45cd9de2000-06-29 19:34:54 +000097 self._parser.Parse("", 1)
98
99 def reset(self):
100 if self._namespaces:
Fred Drake96ea1962000-09-23 04:49:30 +0000101 self._parser = expat.ParserCreate(None, " ")
Fred Drake45cd9de2000-06-29 19:34:54 +0000102 self._parser.StartElementHandler = self.start_element_ns
103 self._parser.EndElementHandler = self.end_element_ns
104 else:
Fred Drake96ea1962000-09-23 04:49:30 +0000105 self._parser = expat.ParserCreate()
Paul Prescod6c4753f2000-07-04 03:39:33 +0000106 self._parser.StartElementHandler = self.start_element
107 self._parser.EndElementHandler = self.end_element
Fred Drake45cd9de2000-06-29 19:34:54 +0000108
109 self._parser.ProcessingInstructionHandler = \
110 self._cont_handler.processingInstruction
111 self._parser.CharacterDataHandler = self._cont_handler.characters
112 self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
113 self._parser.NotationDeclHandler = self.notation_decl
114 self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
115 self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
116# self._parser.CommentHandler =
117# self._parser.StartCdataSectionHandler =
118# self._parser.EndCdataSectionHandler =
119# self._parser.DefaultHandler =
120# self._parser.DefaultHandlerExpand =
121# self._parser.NotStandaloneHandler =
122 self._parser.ExternalEntityRefHandler = self.external_entity_ref
123
124 # Locator methods
125
126 def getColumnNumber(self):
127 return self._parser.ErrorColumnNumber
128
129 def getLineNumber(self):
130 return self._parser.ErrorLineNumber
131
132 def getPublicId(self):
133 return self._source.getPublicId()
134
135 def getSystemId(self):
136 return self._parser.GetBase()
137
Fred Drake45cd9de2000-06-29 19:34:54 +0000138 # event handlers
Fred Drake45cd9de2000-06-29 19:34:54 +0000139 def start_element(self, name, attrs):
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000140 self._cont_handler.startElement(name, self._attrs)
Fred Drake45cd9de2000-06-29 19:34:54 +0000141
142 def end_element(self, name):
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000143 self._cont_handler.endElement(name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000144
145 def start_element_ns(self, name, attrs):
Fred Drake96ea1962000-09-23 04:49:30 +0000146 pair = name.split()
Fred Drake45cd9de2000-06-29 19:34:54 +0000147 if len(pair) == 1:
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000148 pair = (None, name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000149
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000150 self._cont_handler.startElementNS(pair, None, self._attrs)
Fred Drake45cd9de2000-06-29 19:34:54 +0000151
152 def end_element_ns(self, name):
Fred Drake96ea1962000-09-23 04:49:30 +0000153 pair = name.split()
Fred Drake45cd9de2000-06-29 19:34:54 +0000154 if len(pair) == 1:
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000155 name = (None, name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000156
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000157 self._cont_handler.endElementNS(pair, None)
Fred Drake45cd9de2000-06-29 19:34:54 +0000158
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000159 # this is not used (call directly to ContentHandler)
Fred Drake45cd9de2000-06-29 19:34:54 +0000160 def processing_instruction(self, target, data):
161 self._cont_handler.processingInstruction(target, data)
162
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000163 # this is not used (call directly to ContentHandler)
Fred Drake45cd9de2000-06-29 19:34:54 +0000164 def character_data(self, data):
165 self._cont_handler.characters(data)
166
167 def start_namespace_decl(self, prefix, uri):
168 self._cont_handler.startPrefixMapping(prefix, uri)
169
170 def end_namespace_decl(self, prefix):
171 self._cont_handler.endPrefixMapping(prefix)
172
173 def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
174 self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
175
176 def notation_decl(self, name, base, sysid, pubid):
177 self._dtd_handler.notationDecl(name, pubid, sysid)
178
179 def external_entity_ref(self, context, base, sysid, pubid):
Lars Gustäbelf43cf312000-09-24 18:29:24 +0000180 raise NotImplementedError()
Fred Drake45cd9de2000-06-29 19:34:54 +0000181 source = self._ent_handler.resolveEntity(pubid, sysid)
182 source = saxutils.prepare_input_source(source)
183 # FIXME: create new parser, stack self._source and self._parser
184 # FIXME: reuse code from self.parse(...)
185 return 1
186
187# ---
188
189def create_parser(*args, **kwargs):
Fred Drakeddb48672000-09-23 05:32:26 +0000190 return apply(ExpatParser, args, kwargs)
Fred Drake45cd9de2000-06-29 19:34:54 +0000191
192# ---
193
194if __name__ == "__main__":
195 import xml.sax
196 p = create_parser()
197 p.setContentHandler(xml.sax.XMLGenerator())
198 p.setErrorHandler(xml.sax.ErrorHandler())
199 p.parse("../../../hamlet.xml")