blob: f0b6164c5562e36c7607f56808111c3966f0d660 [file] [log] [blame]
Fred Drake45cd9de2000-06-29 19:34:54 +00001"""
2SAX driver for the Pyexpat C module. This driver works with
3pyexpat.__version__ == '1.5'.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
6# Todo on driver:
7# - make it support external entities (wait for pyexpat.c)
8# - enable configuration between reset() and feed() calls
9# - support lexical events?
10# - proper inputsource handling
11# - properties and features
12
13# Todo on pyexpat.c:
14# - support XML_ExternalEntityParserCreate
15# - exceptions in callouts from pyexpat to python code lose position info
16
17version = "0.20"
18
Fred Drake96ea1962000-09-23 04:49:30 +000019from xml.parsers import expat
Fred Drake45cd9de2000-06-29 19:34:54 +000020from xml.sax import xmlreader
Fred Drake45cd9de2000-06-29 19:34:54 +000021import xml.sax
22
23# --- ExpatParser
24
25class ExpatParser( xmlreader.IncrementalParser, xmlreader.Locator ):
26 "SAX driver for the Pyexpat C module."
27
28 def __init__(self, namespaceHandling=0, bufsize=2**16-20):
29 xmlreader.IncrementalParser.__init__(self, bufsize)
30 self._source = None
31 self._parser = None
32 self._namespaces = namespaceHandling
33 self._parsing = 0
34
35 # XMLReader methods
36
37 def parse(self, stream_or_string ):
Skip Montanaro26a79832000-07-06 02:56:36 +000038 "Parse an XML document from a URL."
Fred Drake45cd9de2000-06-29 19:34:54 +000039 if type( stream_or_string ) == type( "" ):
40 stream=open( stream_or_string )
41 else:
42 stream=stream_or_string
43
44 self.reset()
45 self._cont_handler.setDocumentLocator(self)
46 try:
47 xmlreader.IncrementalParser.parse(self, stream)
Fred Drake96ea1962000-09-23 04:49:30 +000048 except expat.error:
Fred Drake45cd9de2000-06-29 19:34:54 +000049 error_code = self._parser.ErrorCode
Fred Drake96ea1962000-09-23 04:49:30 +000050 raise xml.sax.SAXParseException(expat.ErrorString(error_code),
Fred Drake45cd9de2000-06-29 19:34:54 +000051 None, self)
52
53 self._cont_handler.endDocument()
54
55 def prepareParser(self, filename=None):
56 self._source = filename
57
58 if self._source != None:
59 self._parser.SetBase(self._source)
60
61 def getFeature(self, name):
62 "Looks up and returns the state of a SAX2 feature."
63 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
64
65 def setFeature(self, name, state):
66 "Sets the state of a SAX2 feature."
67 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
68
69 def getProperty(self, name):
70 "Looks up and returns the value of a SAX2 property."
71 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
72
73 def setProperty(self, name, value):
74 "Sets the value of a SAX2 property."
75 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
76
77 # IncrementalParser methods
78
79 def feed(self, data):
80 if not self._parsing:
81 self._parsing=1
82 self.reset()
83 self._cont_handler.startDocument()
84 # FIXME: error checking and endDocument()
85 self._parser.Parse(data, 0)
86
87 def close(self):
88 if self._parsing:
89 self._cont_handler.endDocument()
90 self._parsing=0
91 self._parser.Parse("", 1)
92
93 def reset(self):
94 if self._namespaces:
Fred Drake96ea1962000-09-23 04:49:30 +000095 self._parser = expat.ParserCreate(None, " ")
Fred Drake45cd9de2000-06-29 19:34:54 +000096 self._parser.StartElementHandler = self.start_element_ns
97 self._parser.EndElementHandler = self.end_element_ns
98 else:
Fred Drake96ea1962000-09-23 04:49:30 +000099 self._parser = expat.ParserCreate()
Paul Prescod6c4753f2000-07-04 03:39:33 +0000100 self._parser.StartElementHandler = self.start_element
101 self._parser.EndElementHandler = self.end_element
Fred Drake45cd9de2000-06-29 19:34:54 +0000102
103 self._parser.ProcessingInstructionHandler = \
104 self._cont_handler.processingInstruction
105 self._parser.CharacterDataHandler = self._cont_handler.characters
106 self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
107 self._parser.NotationDeclHandler = self.notation_decl
108 self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
109 self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
110# self._parser.CommentHandler =
111# self._parser.StartCdataSectionHandler =
112# self._parser.EndCdataSectionHandler =
113# self._parser.DefaultHandler =
114# self._parser.DefaultHandlerExpand =
115# self._parser.NotStandaloneHandler =
116 self._parser.ExternalEntityRefHandler = self.external_entity_ref
117
118 # Locator methods
119
120 def getColumnNumber(self):
121 return self._parser.ErrorColumnNumber
122
123 def getLineNumber(self):
124 return self._parser.ErrorLineNumber
125
126 def getPublicId(self):
127 return self._source.getPublicId()
128
129 def getSystemId(self):
130 return self._parser.GetBase()
131
Fred Drake45cd9de2000-06-29 19:34:54 +0000132 # event handlers
Fred Drake45cd9de2000-06-29 19:34:54 +0000133 def start_element(self, name, attrs):
Paul Prescod6c4753f2000-07-04 03:39:33 +0000134 self._cont_handler.startElement(name, name,
Fred Drake45cd9de2000-06-29 19:34:54 +0000135 xmlreader.AttributesImpl(attrs, attrs))
136
137 def end_element(self, name):
Paul Prescod6c4753f2000-07-04 03:39:33 +0000138 self._cont_handler.endElement( name, name )
Fred Drake45cd9de2000-06-29 19:34:54 +0000139
140 def start_element_ns(self, name, attrs):
Fred Drake96ea1962000-09-23 04:49:30 +0000141 pair = name.split()
Fred Drake45cd9de2000-06-29 19:34:54 +0000142 if len(pair) == 1:
Paul Prescod6c4753f2000-07-04 03:39:33 +0000143 tup = (None, name )
Fred Drake45cd9de2000-06-29 19:34:54 +0000144 else:
Paul Prescod6c4753f2000-07-04 03:39:33 +0000145 tup = pair
Fred Drake45cd9de2000-06-29 19:34:54 +0000146
Paul Prescod6c4753f2000-07-04 03:39:33 +0000147 self._cont_handler.startElement(tup, None,
Fred Drake45cd9de2000-06-29 19:34:54 +0000148 xmlreader.AttributesImpl(attrs, None))
149
150 def end_element_ns(self, name):
Fred Drake96ea1962000-09-23 04:49:30 +0000151 pair = name.split()
Fred Drake45cd9de2000-06-29 19:34:54 +0000152 if len(pair) == 1:
153 name = (None, name, None)
154 else:
155 name = pair+[None] # prefix is not implemented yet!
156
Paul Prescod6c4753f2000-07-04 03:39:33 +0000157 self._cont_handler.endElement(name, None)
Fred Drake45cd9de2000-06-29 19:34:54 +0000158
Paul Prescod6c4753f2000-07-04 03:39:33 +0000159 # this is not used
Fred Drake45cd9de2000-06-29 19:34:54 +0000160 def processing_instruction(self, target, data):
161 self._cont_handler.processingInstruction(target, data)
162
Paul Prescod6c4753f2000-07-04 03:39:33 +0000163 # this is not used
Fred Drake45cd9de2000-06-29 19:34:54 +0000164 def character_data(self, data):
165 self._cont_handler.characters(data)
166
167 def start_namespace_decl(self, prefix, uri):
168 self._cont_handler.startPrefixMapping(prefix, uri)
169
170 def end_namespace_decl(self, prefix):
171 self._cont_handler.endPrefixMapping(prefix)
172
173 def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
174 self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
175
176 def notation_decl(self, name, base, sysid, pubid):
177 self._dtd_handler.notationDecl(name, pubid, sysid)
178
179 def external_entity_ref(self, context, base, sysid, pubid):
180 assert 0 # not implemented
181 source = self._ent_handler.resolveEntity(pubid, sysid)
182 source = saxutils.prepare_input_source(source)
183 # FIXME: create new parser, stack self._source and self._parser
184 # FIXME: reuse code from self.parse(...)
185 return 1
186
187# ---
188
189def create_parser(*args, **kwargs):
190 return apply( ExpatParser, args, kwargs )
191
192# ---
193
194if __name__ == "__main__":
195 import xml.sax
196 p = create_parser()
197 p.setContentHandler(xml.sax.XMLGenerator())
198 p.setErrorHandler(xml.sax.ErrorHandler())
199 p.parse("../../../hamlet.xml")