blob: 7374069fbc81e81e3f26f90de55ac4442485e8b6 [file] [log] [blame]
Fred Drake55c38192000-06-29 19:39:57 +00001import minidom
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +00002import xml.sax,xml.sax.handler
Fred Drake55c38192000-06-29 19:39:57 +00003
Fred Drake1f549022000-09-24 05:21:58 +00004START_ELEMENT = "START_ELEMENT"
5END_ELEMENT = "END_ELEMENT"
6COMMENT = "COMMENT"
7START_DOCUMENT = "START_DOCUMENT"
8END_DOCUMENT = "END_DOCUMENT"
9PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"
10IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
11CHARACTERS = "CHARACTERS"
Fred Drake55c38192000-06-29 19:39:57 +000012
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000013class PullDOM(xml.sax.ContentHandler):
Fred Drake1f549022000-09-24 05:21:58 +000014 def __init__(self):
15 self.firstEvent = [None, None]
16 self.lastEvent = self.firstEvent
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000017 self._ns_contexts = [{}] # contains uri -> prefix dicts
18 self._current_context = self._ns_contexts[-1]
Fred Drake55c38192000-06-29 19:39:57 +000019
Fred Drake1f549022000-09-24 05:21:58 +000020 def setDocumentLocator(self, locator): pass
Fred Drake55c38192000-06-29 19:39:57 +000021
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000022 def startPrefixMapping(self, prefix, uri):
23 self._ns_contexts.append(self._current_context.copy())
24 self._current_context[uri] = prefix
Fred Drake55c38192000-06-29 19:39:57 +000025
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000026 def endPrefixMapping(self, prefix):
27 del self._ns_contexts[-1]
Fred Drake1f549022000-09-24 05:21:58 +000028
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000029 def startElementNS(self, name, tagName , attrs):
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000030 uri,localname = name
31 if uri:
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000032 # When using namespaces, the reader may or may not
33 # provide us with the original name. If not, create
34 # *a* valid tagName from the current context.
35 if tagName is None:
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000036 tagName = self._current_context[uri] + ":" + localname
37 node = self.document.createElementNS(uri, tagName)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000038 else:
39 # When the tagname is not prefixed, it just appears as
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000040 # localname
41 node = self.document.createElement(localname)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000042
43 for aname,value in attrs.items():
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000044 a_uri, a_localname = aname
45 if a_uri:
46 qname = self._current_context[a_uri] + ":" + a_localname
47 attr = self.document.createAttributeNS(a_uri, qname)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000048 else:
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000049 attr = self.document.createAttribute(a_localname)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000050 attr.value = value
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000051 node.setAttributeNode(attr)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000052
Fred Drake1f549022000-09-24 05:21:58 +000053 parent = self.curNode
Fred Drake55c38192000-06-29 19:39:57 +000054 node.parentNode = parent
Fred Drake55c38192000-06-29 19:39:57 +000055 self.curNode = node
Lars Gustäbele84bf752000-09-24 18:31:37 +000056
Fred Drake1f549022000-09-24 05:21:58 +000057 self.lastEvent[1] = [(START_ELEMENT, node), None]
58 self.lastEvent = self.lastEvent[1]
59 #self.events.append((START_ELEMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +000060
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000061 def endElementNS(self, name, tagName):
Fred Drake55c38192000-06-29 19:39:57 +000062 node = self.curNode
Fred Drake1f549022000-09-24 05:21:58 +000063 self.lastEvent[1] = [(END_ELEMENT, node), None]
64 self.lastEvent = self.lastEvent[1]
65 #self.events.append((END_ELEMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +000066 self.curNode = node.parentNode
67
Lars Gustäbeld178ba62000-10-11 22:34:04 +000068 def startElement(self, name, attrs):
69 node = self.document.createElement(name)
70
71 for aname,value in attrs.items():
72 attr = self.document.createAttribute(aname)
73 attr.value = value
74 node.setAttributeNode(attr)
75
76 parent = self.curNode
Lars Gustäbeld178ba62000-10-11 22:34:04 +000077 node.parentNode = parent
78 self.curNode = node
79
80 self.lastEvent[1] = [(START_ELEMENT, node), None]
81 self.lastEvent = self.lastEvent[1]
82 #self.events.append((START_ELEMENT, node))
83
84 def endElement(self, name):
85 node = self.curNode
86 self.lastEvent[1] = [(END_ELEMENT, node), None]
87 self.lastEvent = self.lastEvent[1]
88 #self.events.append((END_ELEMENT, node))
89 self.curNode = node.parentNode
90
Fred Drake1f549022000-09-24 05:21:58 +000091 def comment(self, s):
92 node = self.document.createComment(s)
93 parent = self.curNode
94 node.parentNode = parent
Fred Drake1f549022000-09-24 05:21:58 +000095 self.lastEvent[1] = [(COMMENT, node), None]
96 self.lastEvent = self.lastEvent[1]
97 #self.events.append((COMMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +000098
Fred Drake1f549022000-09-24 05:21:58 +000099 def processingInstruction(self, target, data):
100 node = self.document.createProcessingInstruction(target, data)
Fred Drake55c38192000-06-29 19:39:57 +0000101
Fred Drake1f549022000-09-24 05:21:58 +0000102 parent = self.curNode
103 node.parentNode = parent
Fred Drake1f549022000-09-24 05:21:58 +0000104 self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]
105 self.lastEvent = self.lastEvent[1]
106 #self.events.append((PROCESSING_INSTRUCTION, node))
Fred Drake55c38192000-06-29 19:39:57 +0000107
Fred Drake1f549022000-09-24 05:21:58 +0000108 def ignorableWhitespace(self, chars):
109 node = self.document.createTextNode(chars[start:start + length])
110 parent = self.curNode
111 node.parentNode = parent
Fred Drake1f549022000-09-24 05:21:58 +0000112 self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
113 self.lastEvent = self.lastEvent[1]
114 #self.events.append((IGNORABLE_WHITESPACE, node))
Fred Drake55c38192000-06-29 19:39:57 +0000115
Fred Drake1f549022000-09-24 05:21:58 +0000116 def characters(self, chars):
117 node = self.document.createTextNode(chars)
Lars Gustäbeld178ba62000-10-11 22:34:04 +0000118 parent = self.curNode
Lars Gustäbeld178ba62000-10-11 22:34:04 +0000119 node.parentNode = parent
Fred Drake1f549022000-09-24 05:21:58 +0000120 self.lastEvent[1] = [(CHARACTERS, node), None]
121 self.lastEvent = self.lastEvent[1]
Fred Drake55c38192000-06-29 19:39:57 +0000122
Fred Drake1f549022000-09-24 05:21:58 +0000123 def startDocument(self):
Fred Drake55c38192000-06-29 19:39:57 +0000124 node = self.curNode = self.document = minidom.Document()
Fred Drake1f549022000-09-24 05:21:58 +0000125 node.parentNode = None
126 self.lastEvent[1] = [(START_DOCUMENT, node), None]
127 self.lastEvent = self.lastEvent[1]
128 #self.events.append((START_DOCUMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +0000129
Fred Drake1f549022000-09-24 05:21:58 +0000130 def endDocument(self):
131 assert not self.curNode.parentNode
132 for node in self.curNode.childNodes:
133 if node.nodeType == node.ELEMENT_NODE:
134 self.document.documentElement = node
135 #if not self.document.documentElement:
136 # raise Error, "No document element"
137
138 self.lastEvent[1] = [(END_DOCUMENT, node), None]
139 #self.events.append((END_DOCUMENT, self.curNode))
Fred Drake55c38192000-06-29 19:39:57 +0000140
141class ErrorHandler:
Fred Drake1f549022000-09-24 05:21:58 +0000142 def warning(self, exception):
Fred Drake55c38192000-06-29 19:39:57 +0000143 print exception
Fred Drake1f549022000-09-24 05:21:58 +0000144 def error(self, exception):
Fred Drake55c38192000-06-29 19:39:57 +0000145 raise exception
Fred Drake1f549022000-09-24 05:21:58 +0000146 def fatalError(self, exception):
Fred Drake55c38192000-06-29 19:39:57 +0000147 raise exception
148
149class DOMEventStream:
Fred Drake1f549022000-09-24 05:21:58 +0000150 def __init__(self, stream, parser, bufsize):
151 self.stream = stream
152 self.parser = parser
153 self.bufsize = bufsize
Fred Drake55c38192000-06-29 19:39:57 +0000154 self.reset()
155
Fred Drake1f549022000-09-24 05:21:58 +0000156 def reset(self):
Fred Drake55c38192000-06-29 19:39:57 +0000157 self.pulldom = PullDOM()
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +0000158 # This content handler relies on namespace support
159 self.parser.setFeature(xml.sax.handler.feature_namespaces,1)
Fred Drake1f549022000-09-24 05:21:58 +0000160 self.parser.setContentHandler(self.pulldom)
Fred Drake55c38192000-06-29 19:39:57 +0000161
Fred Drake1f549022000-09-24 05:21:58 +0000162 def __getitem__(self, pos):
163 rc = self.getEvent()
164 if rc:
165 return rc
Fred Drake55c38192000-06-29 19:39:57 +0000166 raise IndexError
167
Fred Drake1f549022000-09-24 05:21:58 +0000168 def expandNode(self, node):
169 event = self.getEvent()
Fred Drake55c38192000-06-29 19:39:57 +0000170 while event:
Fred Drake1f549022000-09-24 05:21:58 +0000171 token, cur_node = event
172 if cur_node is node:
173 return
Lars Gustäbelec964d52000-10-13 20:53:27 +0000174 if token != END_ELEMENT:
175 cur_node.parentNode.appendChild(cur_node)
Fred Drake1f549022000-09-24 05:21:58 +0000176 event = self.getEvent()
Fred Drake55c38192000-06-29 19:39:57 +0000177
Fred Drake1f549022000-09-24 05:21:58 +0000178 def getEvent(self):
Fred Drake55c38192000-06-29 19:39:57 +0000179 if not self.pulldom.firstEvent[1]:
Fred Drake1f549022000-09-24 05:21:58 +0000180 self.pulldom.lastEvent = self.pulldom.firstEvent
Fred Drake55c38192000-06-29 19:39:57 +0000181 while not self.pulldom.firstEvent[1]:
Fred Drake1f549022000-09-24 05:21:58 +0000182 buf=self.stream.read(self.bufsize)
Fred Drake55c38192000-06-29 19:39:57 +0000183 if not buf:
184 #FIXME: why doesn't Expat close work?
185 #self.parser.close()
186 return None
Fred Drake1f549022000-09-24 05:21:58 +0000187 self.parser.feed(buf)
188 rc = self.pulldom.firstEvent[1][0]
189 self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
Fred Drake55c38192000-06-29 19:39:57 +0000190 return rc
191
Lars Gustäbelec964d52000-10-13 20:53:27 +0000192class SAX2DOM(PullDOM):
193
194 def startElementNS(self, name, tagName , attrs):
195 PullDOM.startElementNS(self, name, tagName, attrs)
196 self.curNode.parentNode.appendChild(self.curNode)
197
198 def startElement(self, name, attrs):
199 PullDOM.startElement(self, name, attrs)
200 self.curNode.parentNode.appendChild(self.curNode)
201
202 def processingInstruction(self, target, data):
203 PullDOM.processingInstruction(self, target, data)
204 node = self.lastEvent[0][1]
205 node.parentNode.appendChild(node)
206
207 def ignorableWhitespace(self, chars):
208 PullDOM.ignorableWhitespace(self, chars)
209 node = self.lastEvent[0][1]
210 node.parentNode.appendChild(node)
211
212 def characters(self, chars):
213 PullDOM.characters(self, chars)
214 node = self.lastEvent[0][1]
215 node.parentNode.appendChild(node)
216
Fred Drake1f549022000-09-24 05:21:58 +0000217default_bufsize = (2 ** 14) - 20
218
Fred Drake1f549022000-09-24 05:21:58 +0000219def parse(stream_or_string, parser=None, bufsize=default_bufsize):
220 if type(stream_or_string) is type(""):
221 stream = open(stream_or_string)
Fred Drake55c38192000-06-29 19:39:57 +0000222 else:
Fred Drake1f549022000-09-24 05:21:58 +0000223 stream = stream_or_string
Fred Drake55c38192000-06-29 19:39:57 +0000224 if not parser:
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +0000225 parser = xml.sax.make_parser()
Fred Drake1f549022000-09-24 05:21:58 +0000226 return DOMEventStream(stream, parser, bufsize)
Fred Drake55c38192000-06-29 19:39:57 +0000227
Fred Drake1f549022000-09-24 05:21:58 +0000228def parseString(string, parser=None):
Fred Drake55c38192000-06-29 19:39:57 +0000229 try:
Fred Drake1f549022000-09-24 05:21:58 +0000230 from cStringIO import StringIO
Fred Drake55c38192000-06-29 19:39:57 +0000231 except ImportError:
Fred Drake1f549022000-09-24 05:21:58 +0000232 from StringIO import StringIO
Fred Drake55c38192000-06-29 19:39:57 +0000233
Fred Drake1f549022000-09-24 05:21:58 +0000234 bufsize = len(string)
235 buf = StringIO(string)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +0000236 if not parser:
237 parser = xml.sax.make_parser()
Fred Drake1f549022000-09-24 05:21:58 +0000238 return DOMEventStream(buf, parser, bufsize)