blob: 7f5ef79739ad22b5baeae824821178348728d0da [file] [log] [blame]
Fred Drakec16adce2000-12-14 18:00:18 +00001import xml.sax
2import xml.sax.handler
Fred Drake55c38192000-06-29 19:39:57 +00003
Fred Drake1f549022000-09-24 05:21:58 +00004START_ELEMENT = "START_ELEMENT"
5END_ELEMENT = "END_ELEMENT"
6COMMENT = "COMMENT"
7START_DOCUMENT = "START_DOCUMENT"
8END_DOCUMENT = "END_DOCUMENT"
9PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"
10IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
11CHARACTERS = "CHARACTERS"
Fred Drake55c38192000-06-29 19:39:57 +000012
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000013class PullDOM(xml.sax.ContentHandler):
Fred Drakec16adce2000-12-14 18:00:18 +000014 _locator = None
15 document = None
16
17 def __init__(self, documentFactory=None):
18 self.documentFactory = documentFactory
Fred Drake1f549022000-09-24 05:21:58 +000019 self.firstEvent = [None, None]
20 self.lastEvent = self.firstEvent
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000021 self._ns_contexts = [{}] # contains uri -> prefix dicts
22 self._current_context = self._ns_contexts[-1]
Fred Drake55c38192000-06-29 19:39:57 +000023
Fred Drakec16adce2000-12-14 18:00:18 +000024 def setDocumentLocator(self, locator):
25 self._locator = locator
Fred Drake55c38192000-06-29 19:39:57 +000026
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000027 def startPrefixMapping(self, prefix, uri):
28 self._ns_contexts.append(self._current_context.copy())
Fred Drakec16adce2000-12-14 18:00:18 +000029 self._current_context[uri] = prefix or ''
Fred Drake55c38192000-06-29 19:39:57 +000030
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000031 def endPrefixMapping(self, prefix):
Fred Drakec16adce2000-12-14 18:00:18 +000032 self._current_context = self._ns_contexts.pop()
Fred Drake1f549022000-09-24 05:21:58 +000033
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000034 def startElementNS(self, name, tagName , attrs):
Fred Drakec16adce2000-12-14 18:00:18 +000035 uri, localname = name
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000036 if uri:
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000037 # When using namespaces, the reader may or may not
38 # provide us with the original name. If not, create
39 # *a* valid tagName from the current context.
40 if tagName is None:
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000041 tagName = self._current_context[uri] + ":" + localname
42 node = self.document.createElementNS(uri, tagName)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000043 else:
44 # When the tagname is not prefixed, it just appears as
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000045 # localname
46 node = self.document.createElement(localname)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000047
48 for aname,value in attrs.items():
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000049 a_uri, a_localname = aname
50 if a_uri:
51 qname = self._current_context[a_uri] + ":" + a_localname
52 attr = self.document.createAttributeNS(a_uri, qname)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000053 else:
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000054 attr = self.document.createAttribute(a_localname)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000055 attr.value = value
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000056 node.setAttributeNode(attr)
Fred Drake16f63292000-10-23 18:09:50 +000057
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +000058## print self.curNode, self.curNode.childNodes, node, node.parentNode
59 self.curNode.appendChild(node)
60# node.parentNode = self.curNode
Fred Drake55c38192000-06-29 19:39:57 +000061 self.curNode = node
Lars Gustäbele84bf752000-09-24 18:31:37 +000062
Fred Drake1f549022000-09-24 05:21:58 +000063 self.lastEvent[1] = [(START_ELEMENT, node), None]
64 self.lastEvent = self.lastEvent[1]
65 #self.events.append((START_ELEMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +000066
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000067 def endElementNS(self, name, tagName):
Fred Drake55c38192000-06-29 19:39:57 +000068 node = self.curNode
Fred Drake1f549022000-09-24 05:21:58 +000069 self.lastEvent[1] = [(END_ELEMENT, node), None]
70 self.lastEvent = self.lastEvent[1]
71 #self.events.append((END_ELEMENT, node))
Fred Drakec16adce2000-12-14 18:00:18 +000072 self.curNode = self.curNode.parentNode
Fred Drake55c38192000-06-29 19:39:57 +000073
Lars Gustäbeld178ba62000-10-11 22:34:04 +000074 def startElement(self, name, attrs):
75 node = self.document.createElement(name)
76
77 for aname,value in attrs.items():
78 attr = self.document.createAttribute(aname)
79 attr.value = value
80 node.setAttributeNode(attr)
Fred Drake16f63292000-10-23 18:09:50 +000081
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +000082 #node.parentNode = self.curNode
83 self.curNode.appendChild(node)
Lars Gustäbeld178ba62000-10-11 22:34:04 +000084 self.curNode = node
85
86 self.lastEvent[1] = [(START_ELEMENT, node), None]
87 self.lastEvent = self.lastEvent[1]
88 #self.events.append((START_ELEMENT, node))
89
90 def endElement(self, name):
91 node = self.curNode
92 self.lastEvent[1] = [(END_ELEMENT, node), None]
93 self.lastEvent = self.lastEvent[1]
94 #self.events.append((END_ELEMENT, node))
95 self.curNode = node.parentNode
Fred Drake16f63292000-10-23 18:09:50 +000096
Fred Drake1f549022000-09-24 05:21:58 +000097 def comment(self, s):
98 node = self.document.createComment(s)
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +000099 self.curNode.appendChild(node)
100# parent = self.curNode
101# node.parentNode = parent
Fred Drake1f549022000-09-24 05:21:58 +0000102 self.lastEvent[1] = [(COMMENT, node), None]
103 self.lastEvent = self.lastEvent[1]
104 #self.events.append((COMMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +0000105
Fred Drake1f549022000-09-24 05:21:58 +0000106 def processingInstruction(self, target, data):
107 node = self.document.createProcessingInstruction(target, data)
Fred Drake16f63292000-10-23 18:09:50 +0000108
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000109 self.curNode.appendChild(node)
110# parent = self.curNode
111# node.parentNode = parent
Fred Drake1f549022000-09-24 05:21:58 +0000112 self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]
113 self.lastEvent = self.lastEvent[1]
114 #self.events.append((PROCESSING_INSTRUCTION, node))
Fred Drake55c38192000-06-29 19:39:57 +0000115
Fred Drake1f549022000-09-24 05:21:58 +0000116 def ignorableWhitespace(self, chars):
Fred Drakec16adce2000-12-14 18:00:18 +0000117 node = self.document.createTextNode(chars)
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000118 self.curNode.appendChild(node)
119# parent = self.curNode
120# node.parentNode = parent
Fred Drake1f549022000-09-24 05:21:58 +0000121 self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
122 self.lastEvent = self.lastEvent[1]
123 #self.events.append((IGNORABLE_WHITESPACE, node))
Fred Drake55c38192000-06-29 19:39:57 +0000124
Fred Drake1f549022000-09-24 05:21:58 +0000125 def characters(self, chars):
126 node = self.document.createTextNode(chars)
Andrew M. Kuchling04a45e92000-12-20 14:47:24 +0000127 self.curNode.appendChild(node)
128# parent = self.curNode
129# node.parentNode = parent
Fred Drake1f549022000-09-24 05:21:58 +0000130 self.lastEvent[1] = [(CHARACTERS, node), None]
131 self.lastEvent = self.lastEvent[1]
Fred Drake55c38192000-06-29 19:39:57 +0000132
Fred Drake1f549022000-09-24 05:21:58 +0000133 def startDocument(self):
Fred Drakec16adce2000-12-14 18:00:18 +0000134 publicId = systemId = None
135 if self._locator:
136 publicId = self._locator.getPublicId()
137 systemId = self._locator.getSystemId()
138 if self.documentFactory is None:
139 import xml.dom.minidom
140 self.documentFactory = xml.dom.minidom.Document.implementation
141 node = self.documentFactory.createDocument(None, publicId, systemId)
142 self.curNode = self.document = node
Fred Drake1f549022000-09-24 05:21:58 +0000143 self.lastEvent[1] = [(START_DOCUMENT, node), None]
144 self.lastEvent = self.lastEvent[1]
145 #self.events.append((START_DOCUMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +0000146
Fred Drake1f549022000-09-24 05:21:58 +0000147 def endDocument(self):
Fred Drakec16adce2000-12-14 18:00:18 +0000148 assert self.curNode.parentNode is None, \
149 "not all elements have been properly closed"
150 assert self.curNode.documentElement is not None, \
151 "document does not contain a root element"
152 node = self.curNode.documentElement
Fred Drake1f549022000-09-24 05:21:58 +0000153 self.lastEvent[1] = [(END_DOCUMENT, node), None]
154 #self.events.append((END_DOCUMENT, self.curNode))
Fred Drake55c38192000-06-29 19:39:57 +0000155
156class ErrorHandler:
Fred Drake1f549022000-09-24 05:21:58 +0000157 def warning(self, exception):
Fred Drake55c38192000-06-29 19:39:57 +0000158 print exception
Fred Drake1f549022000-09-24 05:21:58 +0000159 def error(self, exception):
Fred Drake16f63292000-10-23 18:09:50 +0000160 raise exception
Fred Drake1f549022000-09-24 05:21:58 +0000161 def fatalError(self, exception):
Fred Drake16f63292000-10-23 18:09:50 +0000162 raise exception
Fred Drake55c38192000-06-29 19:39:57 +0000163
164class DOMEventStream:
Fred Drake1f549022000-09-24 05:21:58 +0000165 def __init__(self, stream, parser, bufsize):
166 self.stream = stream
167 self.parser = parser
168 self.bufsize = bufsize
Fred Drake55c38192000-06-29 19:39:57 +0000169 self.reset()
170
Fred Drake1f549022000-09-24 05:21:58 +0000171 def reset(self):
Fred Drake55c38192000-06-29 19:39:57 +0000172 self.pulldom = PullDOM()
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +0000173 # This content handler relies on namespace support
Fred Drakec16adce2000-12-14 18:00:18 +0000174 self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
Fred Drake1f549022000-09-24 05:21:58 +0000175 self.parser.setContentHandler(self.pulldom)
Fred Drake55c38192000-06-29 19:39:57 +0000176
Fred Drake1f549022000-09-24 05:21:58 +0000177 def __getitem__(self, pos):
178 rc = self.getEvent()
179 if rc:
180 return rc
Fred Drake55c38192000-06-29 19:39:57 +0000181 raise IndexError
182
Fred Drake1f549022000-09-24 05:21:58 +0000183 def expandNode(self, node):
184 event = self.getEvent()
Fred Drake55c38192000-06-29 19:39:57 +0000185 while event:
Fred Drake1f549022000-09-24 05:21:58 +0000186 token, cur_node = event
187 if cur_node is node:
188 return
Lars Gustäbelec964d52000-10-13 20:53:27 +0000189 if token != END_ELEMENT:
190 cur_node.parentNode.appendChild(cur_node)
Fred Drake1f549022000-09-24 05:21:58 +0000191 event = self.getEvent()
Fred Drake55c38192000-06-29 19:39:57 +0000192
Fred Drake1f549022000-09-24 05:21:58 +0000193 def getEvent(self):
Fred Drake55c38192000-06-29 19:39:57 +0000194 if not self.pulldom.firstEvent[1]:
Fred Drake1f549022000-09-24 05:21:58 +0000195 self.pulldom.lastEvent = self.pulldom.firstEvent
Fred Drake55c38192000-06-29 19:39:57 +0000196 while not self.pulldom.firstEvent[1]:
Fred Drakec16adce2000-12-14 18:00:18 +0000197 buf = self.stream.read(self.bufsize)
Fred Drake55c38192000-06-29 19:39:57 +0000198 if not buf:
199 #FIXME: why doesn't Expat close work?
200 #self.parser.close()
201 return None
Fred Drake1f549022000-09-24 05:21:58 +0000202 self.parser.feed(buf)
203 rc = self.pulldom.firstEvent[1][0]
204 self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
Fred Drake55c38192000-06-29 19:39:57 +0000205 return rc
206
Lars Gustäbelec964d52000-10-13 20:53:27 +0000207class SAX2DOM(PullDOM):
208
209 def startElementNS(self, name, tagName , attrs):
210 PullDOM.startElementNS(self, name, tagName, attrs)
211 self.curNode.parentNode.appendChild(self.curNode)
212
213 def startElement(self, name, attrs):
214 PullDOM.startElement(self, name, attrs)
215 self.curNode.parentNode.appendChild(self.curNode)
216
217 def processingInstruction(self, target, data):
218 PullDOM.processingInstruction(self, target, data)
219 node = self.lastEvent[0][1]
Fred Drake16f63292000-10-23 18:09:50 +0000220 node.parentNode.appendChild(node)
Lars Gustäbelec964d52000-10-13 20:53:27 +0000221
222 def ignorableWhitespace(self, chars):
223 PullDOM.ignorableWhitespace(self, chars)
224 node = self.lastEvent[0][1]
Fred Drake16f63292000-10-23 18:09:50 +0000225 node.parentNode.appendChild(node)
Lars Gustäbelec964d52000-10-13 20:53:27 +0000226
227 def characters(self, chars):
228 PullDOM.characters(self, chars)
229 node = self.lastEvent[0][1]
Fred Drake16f63292000-10-23 18:09:50 +0000230 node.parentNode.appendChild(node)
231
Fred Drakec16adce2000-12-14 18:00:18 +0000232
Fred Drake1f549022000-09-24 05:21:58 +0000233default_bufsize = (2 ** 14) - 20
234
Fred Drakec16adce2000-12-14 18:00:18 +0000235def parse(stream_or_string, parser=None, bufsize=None):
236 if bufsize is None:
237 bufsize = default_bufsize
238 if type(stream_or_string) in [type(""), type(u"")]:
Fred Drake1f549022000-09-24 05:21:58 +0000239 stream = open(stream_or_string)
Fred Drake55c38192000-06-29 19:39:57 +0000240 else:
Fred Drake1f549022000-09-24 05:21:58 +0000241 stream = stream_or_string
Fred Drake16f63292000-10-23 18:09:50 +0000242 if not parser:
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +0000243 parser = xml.sax.make_parser()
Fred Drake1f549022000-09-24 05:21:58 +0000244 return DOMEventStream(stream, parser, bufsize)
Fred Drake55c38192000-06-29 19:39:57 +0000245
Fred Drake1f549022000-09-24 05:21:58 +0000246def parseString(string, parser=None):
Fred Drake55c38192000-06-29 19:39:57 +0000247 try:
Fred Drake1f549022000-09-24 05:21:58 +0000248 from cStringIO import StringIO
Fred Drake55c38192000-06-29 19:39:57 +0000249 except ImportError:
Fred Drake1f549022000-09-24 05:21:58 +0000250 from StringIO import StringIO
Fred Drake16f63292000-10-23 18:09:50 +0000251
Fred Drake1f549022000-09-24 05:21:58 +0000252 bufsize = len(string)
253 buf = StringIO(string)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +0000254 if not parser:
255 parser = xml.sax.make_parser()
Fred Drake1f549022000-09-24 05:21:58 +0000256 return DOMEventStream(buf, parser, bufsize)