blob: b6816a43a757b234a07148dd883412f4c44ab07b [file] [log] [blame]
Fred Drake45cd9de2000-06-29 19:34:54 +00001"""
2SAX driver for the Pyexpat C module. This driver works with
3pyexpat.__version__ == '1.5'.
4
5$Id$
6"""
7
8# Todo on driver:
9# - make it support external entities (wait for pyexpat.c)
10# - enable configuration between reset() and feed() calls
11# - support lexical events?
12# - proper inputsource handling
13# - properties and features
14
15# Todo on pyexpat.c:
16# - support XML_ExternalEntityParserCreate
17# - exceptions in callouts from pyexpat to python code lose position info
18
19version = "0.20"
20
21from string import split
22
23from xml.sax import xmlreader
24import pyexpat
25import xml.sax
26
27# --- ExpatParser
28
29class ExpatParser( xmlreader.IncrementalParser, xmlreader.Locator ):
30 "SAX driver for the Pyexpat C module."
31
32 def __init__(self, namespaceHandling=0, bufsize=2**16-20):
33 xmlreader.IncrementalParser.__init__(self, bufsize)
34 self._source = None
35 self._parser = None
36 self._namespaces = namespaceHandling
37 self._parsing = 0
38
39 # XMLReader methods
40
41 def parse(self, stream_or_string ):
42 "Parse an XML document from a URL."
43 if type( stream_or_string ) == type( "" ):
44 stream=open( stream_or_string )
45 else:
46 stream=stream_or_string
47
48 self.reset()
49 self._cont_handler.setDocumentLocator(self)
50 try:
51 xmlreader.IncrementalParser.parse(self, stream)
52 except pyexpat.error:
53 error_code = self._parser.ErrorCode
54 raise xml.sax.SAXParseException(pyexpat.ErrorString(error_code),
55 None, self)
56
57 self._cont_handler.endDocument()
58
59 def prepareParser(self, filename=None):
60 self._source = filename
61
62 if self._source != None:
63 self._parser.SetBase(self._source)
64
65 def getFeature(self, name):
66 "Looks up and returns the state of a SAX2 feature."
67 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
68
69 def setFeature(self, name, state):
70 "Sets the state of a SAX2 feature."
71 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
72
73 def getProperty(self, name):
74 "Looks up and returns the value of a SAX2 property."
75 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
76
77 def setProperty(self, name, value):
78 "Sets the value of a SAX2 property."
79 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
80
81 # IncrementalParser methods
82
83 def feed(self, data):
84 if not self._parsing:
85 self._parsing=1
86 self.reset()
87 self._cont_handler.startDocument()
88 # FIXME: error checking and endDocument()
89 self._parser.Parse(data, 0)
90
91 def close(self):
92 if self._parsing:
93 self._cont_handler.endDocument()
94 self._parsing=0
95 self._parser.Parse("", 1)
96
97 def reset(self):
98 if self._namespaces:
99 self._parser = pyexpat.ParserCreate(None, " ")
100 self._parser.StartElementHandler = self.start_element_ns
101 self._parser.EndElementHandler = self.end_element_ns
102 else:
103 self._parser = pyexpat.ParserCreate()
104 self._parser.StartElementHandler = self._cont_handler.startElement
105 self._parser.EndElementHandler = self._cont_handler.endElement
106
107 self._parser.ProcessingInstructionHandler = \
108 self._cont_handler.processingInstruction
109 self._parser.CharacterDataHandler = self._cont_handler.characters
110 self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
111 self._parser.NotationDeclHandler = self.notation_decl
112 self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
113 self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
114# self._parser.CommentHandler =
115# self._parser.StartCdataSectionHandler =
116# self._parser.EndCdataSectionHandler =
117# self._parser.DefaultHandler =
118# self._parser.DefaultHandlerExpand =
119# self._parser.NotStandaloneHandler =
120 self._parser.ExternalEntityRefHandler = self.external_entity_ref
121
122 # Locator methods
123
124 def getColumnNumber(self):
125 return self._parser.ErrorColumnNumber
126
127 def getLineNumber(self):
128 return self._parser.ErrorLineNumber
129
130 def getPublicId(self):
131 return self._source.getPublicId()
132
133 def getSystemId(self):
134 return self._parser.GetBase()
135
136 # internal methods
137
138 # event handlers
139
140 def start_element(self, name, attrs):
141 self._cont_handler.startElement(name,
142 xmlreader.AttributesImpl(attrs, attrs))
143
144 def end_element(self, name):
145 self._cont_handler.endElement(name)
146
147 def start_element_ns(self, name, attrs):
148 pair = split(name)
149 if len(pair) == 1:
150 tup = (None, name, None)
151 else:
152 tup = pair+[None] # prefix is not implemented yet!
153
154 self._cont_handler.startElement(tup,
155 xmlreader.AttributesImpl(attrs, None))
156
157 def end_element_ns(self, name):
158 pair = split(name)
159 if len(pair) == 1:
160 name = (None, name, None)
161 else:
162 name = pair+[None] # prefix is not implemented yet!
163
164 self._cont_handler.endElement(name)
165
166 def processing_instruction(self, target, data):
167 self._cont_handler.processingInstruction(target, data)
168
169 def character_data(self, data):
170 self._cont_handler.characters(data)
171
172 def start_namespace_decl(self, prefix, uri):
173 self._cont_handler.startPrefixMapping(prefix, uri)
174
175 def end_namespace_decl(self, prefix):
176 self._cont_handler.endPrefixMapping(prefix)
177
178 def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
179 self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
180
181 def notation_decl(self, name, base, sysid, pubid):
182 self._dtd_handler.notationDecl(name, pubid, sysid)
183
184 def external_entity_ref(self, context, base, sysid, pubid):
185 assert 0 # not implemented
186 source = self._ent_handler.resolveEntity(pubid, sysid)
187 source = saxutils.prepare_input_source(source)
188 # FIXME: create new parser, stack self._source and self._parser
189 # FIXME: reuse code from self.parse(...)
190 return 1
191
192# ---
193
194def create_parser(*args, **kwargs):
195 return apply( ExpatParser, args, kwargs )
196
197# ---
198
199if __name__ == "__main__":
200 import xml.sax
201 p = create_parser()
202 p.setContentHandler(xml.sax.XMLGenerator())
203 p.setErrorHandler(xml.sax.ErrorHandler())
204 p.parse("../../../hamlet.xml")