blob: bd3a4675e3e869241755b520fc5f9a1e392ed76a [file] [log] [blame]
Fred Drake45cd9de2000-06-29 19:34:54 +00001"""
2SAX driver for the Pyexpat C module. This driver works with
3pyexpat.__version__ == '1.5'.
4
5$Id$
6"""
7
8# Todo on driver:
9# - make it support external entities (wait for pyexpat.c)
10# - enable configuration between reset() and feed() calls
11# - support lexical events?
12# - proper inputsource handling
13# - properties and features
14
15# Todo on pyexpat.c:
16# - support XML_ExternalEntityParserCreate
17# - exceptions in callouts from pyexpat to python code lose position info
18
19version = "0.20"
20
21from string import split
22
23from xml.sax import xmlreader
24import pyexpat
25import xml.sax
26
27# --- ExpatParser
28
29class ExpatParser( xmlreader.IncrementalParser, xmlreader.Locator ):
30 "SAX driver for the Pyexpat C module."
31
32 def __init__(self, namespaceHandling=0, bufsize=2**16-20):
33 xmlreader.IncrementalParser.__init__(self, bufsize)
34 self._source = None
35 self._parser = None
36 self._namespaces = namespaceHandling
37 self._parsing = 0
38
39 # XMLReader methods
40
41 def parse(self, stream_or_string ):
Skip Montanaro26a79832000-07-06 02:56:36 +000042 "Parse an XML document from a URL."
Fred Drake45cd9de2000-06-29 19:34:54 +000043 if type( stream_or_string ) == type( "" ):
44 stream=open( stream_or_string )
45 else:
46 stream=stream_or_string
47
48 self.reset()
49 self._cont_handler.setDocumentLocator(self)
50 try:
51 xmlreader.IncrementalParser.parse(self, stream)
52 except pyexpat.error:
53 error_code = self._parser.ErrorCode
54 raise xml.sax.SAXParseException(pyexpat.ErrorString(error_code),
55 None, self)
56
57 self._cont_handler.endDocument()
58
59 def prepareParser(self, filename=None):
60 self._source = filename
61
62 if self._source != None:
63 self._parser.SetBase(self._source)
64
65 def getFeature(self, name):
66 "Looks up and returns the state of a SAX2 feature."
67 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
68
69 def setFeature(self, name, state):
70 "Sets the state of a SAX2 feature."
71 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
72
73 def getProperty(self, name):
74 "Looks up and returns the value of a SAX2 property."
75 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
76
77 def setProperty(self, name, value):
78 "Sets the value of a SAX2 property."
79 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
80
81 # IncrementalParser methods
82
83 def feed(self, data):
84 if not self._parsing:
85 self._parsing=1
86 self.reset()
87 self._cont_handler.startDocument()
88 # FIXME: error checking and endDocument()
89 self._parser.Parse(data, 0)
90
91 def close(self):
92 if self._parsing:
93 self._cont_handler.endDocument()
94 self._parsing=0
95 self._parser.Parse("", 1)
96
97 def reset(self):
98 if self._namespaces:
99 self._parser = pyexpat.ParserCreate(None, " ")
100 self._parser.StartElementHandler = self.start_element_ns
101 self._parser.EndElementHandler = self.end_element_ns
102 else:
103 self._parser = pyexpat.ParserCreate()
Paul Prescod6c4753f2000-07-04 03:39:33 +0000104 self._parser.StartElementHandler = self.start_element
105 self._parser.EndElementHandler = self.end_element
Fred Drake45cd9de2000-06-29 19:34:54 +0000106
107 self._parser.ProcessingInstructionHandler = \
108 self._cont_handler.processingInstruction
109 self._parser.CharacterDataHandler = self._cont_handler.characters
110 self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
111 self._parser.NotationDeclHandler = self.notation_decl
112 self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
113 self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
114# self._parser.CommentHandler =
115# self._parser.StartCdataSectionHandler =
116# self._parser.EndCdataSectionHandler =
117# self._parser.DefaultHandler =
118# self._parser.DefaultHandlerExpand =
119# self._parser.NotStandaloneHandler =
120 self._parser.ExternalEntityRefHandler = self.external_entity_ref
121
122 # Locator methods
123
124 def getColumnNumber(self):
125 return self._parser.ErrorColumnNumber
126
127 def getLineNumber(self):
128 return self._parser.ErrorLineNumber
129
130 def getPublicId(self):
131 return self._source.getPublicId()
132
133 def getSystemId(self):
134 return self._parser.GetBase()
135
Fred Drake45cd9de2000-06-29 19:34:54 +0000136 # event handlers
Fred Drake45cd9de2000-06-29 19:34:54 +0000137 def start_element(self, name, attrs):
Paul Prescod6c4753f2000-07-04 03:39:33 +0000138 self._cont_handler.startElement(name, name,
Fred Drake45cd9de2000-06-29 19:34:54 +0000139 xmlreader.AttributesImpl(attrs, attrs))
140
141 def end_element(self, name):
Paul Prescod6c4753f2000-07-04 03:39:33 +0000142 self._cont_handler.endElement( name, name )
Fred Drake45cd9de2000-06-29 19:34:54 +0000143
144 def start_element_ns(self, name, attrs):
145 pair = split(name)
146 if len(pair) == 1:
Paul Prescod6c4753f2000-07-04 03:39:33 +0000147 tup = (None, name )
Fred Drake45cd9de2000-06-29 19:34:54 +0000148 else:
Paul Prescod6c4753f2000-07-04 03:39:33 +0000149 tup = pair
Fred Drake45cd9de2000-06-29 19:34:54 +0000150
Paul Prescod6c4753f2000-07-04 03:39:33 +0000151 self._cont_handler.startElement(tup, None,
Fred Drake45cd9de2000-06-29 19:34:54 +0000152 xmlreader.AttributesImpl(attrs, None))
153
154 def end_element_ns(self, name):
155 pair = split(name)
156 if len(pair) == 1:
157 name = (None, name, None)
158 else:
159 name = pair+[None] # prefix is not implemented yet!
160
Paul Prescod6c4753f2000-07-04 03:39:33 +0000161 self._cont_handler.endElement(name, None)
Fred Drake45cd9de2000-06-29 19:34:54 +0000162
Paul Prescod6c4753f2000-07-04 03:39:33 +0000163 # this is not used
Fred Drake45cd9de2000-06-29 19:34:54 +0000164 def processing_instruction(self, target, data):
165 self._cont_handler.processingInstruction(target, data)
166
Paul Prescod6c4753f2000-07-04 03:39:33 +0000167 # this is not used
Fred Drake45cd9de2000-06-29 19:34:54 +0000168 def character_data(self, data):
169 self._cont_handler.characters(data)
170
171 def start_namespace_decl(self, prefix, uri):
172 self._cont_handler.startPrefixMapping(prefix, uri)
173
174 def end_namespace_decl(self, prefix):
175 self._cont_handler.endPrefixMapping(prefix)
176
177 def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
178 self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
179
180 def notation_decl(self, name, base, sysid, pubid):
181 self._dtd_handler.notationDecl(name, pubid, sysid)
182
183 def external_entity_ref(self, context, base, sysid, pubid):
184 assert 0 # not implemented
185 source = self._ent_handler.resolveEntity(pubid, sysid)
186 source = saxutils.prepare_input_source(source)
187 # FIXME: create new parser, stack self._source and self._parser
188 # FIXME: reuse code from self.parse(...)
189 return 1
190
191# ---
192
193def create_parser(*args, **kwargs):
194 return apply( ExpatParser, args, kwargs )
195
196# ---
197
198if __name__ == "__main__":
199 import xml.sax
200 p = create_parser()
201 p.setContentHandler(xml.sax.XMLGenerator())
202 p.setErrorHandler(xml.sax.ErrorHandler())
203 p.parse("../../../hamlet.xml")