blob: cedbe4f4a373e9636d66f441207c9d9c46f470ea [file] [log] [blame]
Fred Drake55c38192000-06-29 19:39:57 +00001import minidom
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +00002import xml.sax,xml.sax.handler
Fred Drake55c38192000-06-29 19:39:57 +00003
Fred Drake1f549022000-09-24 05:21:58 +00004START_ELEMENT = "START_ELEMENT"
5END_ELEMENT = "END_ELEMENT"
6COMMENT = "COMMENT"
7START_DOCUMENT = "START_DOCUMENT"
8END_DOCUMENT = "END_DOCUMENT"
9PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"
10IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
11CHARACTERS = "CHARACTERS"
Fred Drake55c38192000-06-29 19:39:57 +000012
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000013class PullDOM(xml.sax.ContentHandler):
Fred Drake1f549022000-09-24 05:21:58 +000014 def __init__(self):
15 self.firstEvent = [None, None]
16 self.lastEvent = self.firstEvent
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000017 self._ns_contexts = [{}] # contains uri -> prefix dicts
18 self._current_context = self._ns_contexts[-1]
Fred Drake55c38192000-06-29 19:39:57 +000019
Fred Drake1f549022000-09-24 05:21:58 +000020 def setDocumentLocator(self, locator): pass
Fred Drake55c38192000-06-29 19:39:57 +000021
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000022 def startPrefixMapping(self, prefix, uri):
23 self._ns_contexts.append(self._current_context.copy())
24 self._current_context[uri] = prefix
Fred Drake55c38192000-06-29 19:39:57 +000025
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000026 def endPrefixMapping(self, prefix):
27 del self._ns_contexts[-1]
Fred Drake1f549022000-09-24 05:21:58 +000028
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000029 def startElementNS(self, name, tagName , attrs):
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000030 uri,localname = name
31 if uri:
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000032 # When using namespaces, the reader may or may not
33 # provide us with the original name. If not, create
34 # *a* valid tagName from the current context.
35 if tagName is None:
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000036 tagName = self._current_context[uri] + ":" + localname
37 node = self.document.createElementNS(uri, tagName)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000038 else:
39 # When the tagname is not prefixed, it just appears as
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000040 # localname
41 node = self.document.createElement(localname)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000042
43 for aname,value in attrs.items():
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000044 a_uri, a_localname = aname
45 if a_uri:
46 qname = self._current_context[a_uri] + ":" + a_localname
47 attr = self.document.createAttributeNS(a_uri, qname)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000048 else:
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000049 attr = self.document.createAttribute(a_localname)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000050 attr.value = value
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000051 node.setAttributeNode(attr)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000052
Fred Drake1f549022000-09-24 05:21:58 +000053 parent = self.curNode
Lars Gustäbeld178ba62000-10-11 22:34:04 +000054 parent.appendChild(node)
Fred Drake55c38192000-06-29 19:39:57 +000055 node.parentNode = parent
Fred Drake55c38192000-06-29 19:39:57 +000056 self.curNode = node
Lars Gustäbele84bf752000-09-24 18:31:37 +000057
Fred Drake1f549022000-09-24 05:21:58 +000058 self.lastEvent[1] = [(START_ELEMENT, node), None]
59 self.lastEvent = self.lastEvent[1]
60 #self.events.append((START_ELEMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +000061
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000062 def endElementNS(self, name, tagName):
Fred Drake55c38192000-06-29 19:39:57 +000063 node = self.curNode
Fred Drake1f549022000-09-24 05:21:58 +000064 self.lastEvent[1] = [(END_ELEMENT, node), None]
65 self.lastEvent = self.lastEvent[1]
66 #self.events.append((END_ELEMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +000067 self.curNode = node.parentNode
68
Lars Gustäbeld178ba62000-10-11 22:34:04 +000069 def startElement(self, name, attrs):
70 node = self.document.createElement(name)
71
72 for aname,value in attrs.items():
73 attr = self.document.createAttribute(aname)
74 attr.value = value
75 node.setAttributeNode(attr)
76
77 parent = self.curNode
78 parent.appendChild(node)
79 node.parentNode = parent
80 self.curNode = node
81
82 self.lastEvent[1] = [(START_ELEMENT, node), None]
83 self.lastEvent = self.lastEvent[1]
84 #self.events.append((START_ELEMENT, node))
85
86 def endElement(self, name):
87 node = self.curNode
88 self.lastEvent[1] = [(END_ELEMENT, node), None]
89 self.lastEvent = self.lastEvent[1]
90 #self.events.append((END_ELEMENT, node))
91 self.curNode = node.parentNode
92
Fred Drake1f549022000-09-24 05:21:58 +000093 def comment(self, s):
94 node = self.document.createComment(s)
95 parent = self.curNode
Lars Gustäbeld178ba62000-10-11 22:34:04 +000096 parent.appendChild(node)
Fred Drake1f549022000-09-24 05:21:58 +000097 node.parentNode = parent
Fred Drake1f549022000-09-24 05:21:58 +000098 self.lastEvent[1] = [(COMMENT, node), None]
99 self.lastEvent = self.lastEvent[1]
100 #self.events.append((COMMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +0000101
Fred Drake1f549022000-09-24 05:21:58 +0000102 def processingInstruction(self, target, data):
103 node = self.document.createProcessingInstruction(target, data)
Fred Drake55c38192000-06-29 19:39:57 +0000104
Fred Drake1f549022000-09-24 05:21:58 +0000105 parent = self.curNode
Lars Gustäbeld178ba62000-10-11 22:34:04 +0000106 parent.appendChild(node)
Fred Drake1f549022000-09-24 05:21:58 +0000107 node.parentNode = parent
Fred Drake1f549022000-09-24 05:21:58 +0000108 self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]
109 self.lastEvent = self.lastEvent[1]
110 #self.events.append((PROCESSING_INSTRUCTION, node))
Fred Drake55c38192000-06-29 19:39:57 +0000111
Fred Drake1f549022000-09-24 05:21:58 +0000112 def ignorableWhitespace(self, chars):
113 node = self.document.createTextNode(chars[start:start + length])
114 parent = self.curNode
Lars Gustäbeld178ba62000-10-11 22:34:04 +0000115 parent.appendChild(node)
Fred Drake1f549022000-09-24 05:21:58 +0000116 node.parentNode = parent
Fred Drake1f549022000-09-24 05:21:58 +0000117 self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
118 self.lastEvent = self.lastEvent[1]
119 #self.events.append((IGNORABLE_WHITESPACE, node))
Fred Drake55c38192000-06-29 19:39:57 +0000120
Fred Drake1f549022000-09-24 05:21:58 +0000121 def characters(self, chars):
122 node = self.document.createTextNode(chars)
Lars Gustäbeld178ba62000-10-11 22:34:04 +0000123 parent = self.curNode
124 parent.appendChild(node)
125 node.parentNode = parent
Fred Drake1f549022000-09-24 05:21:58 +0000126 self.lastEvent[1] = [(CHARACTERS, node), None]
127 self.lastEvent = self.lastEvent[1]
Fred Drake55c38192000-06-29 19:39:57 +0000128
Fred Drake1f549022000-09-24 05:21:58 +0000129 def startDocument(self):
Fred Drake55c38192000-06-29 19:39:57 +0000130 node = self.curNode = self.document = minidom.Document()
Fred Drake1f549022000-09-24 05:21:58 +0000131 node.parentNode = None
132 self.lastEvent[1] = [(START_DOCUMENT, node), None]
133 self.lastEvent = self.lastEvent[1]
134 #self.events.append((START_DOCUMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +0000135
Fred Drake1f549022000-09-24 05:21:58 +0000136 def endDocument(self):
137 assert not self.curNode.parentNode
138 for node in self.curNode.childNodes:
139 if node.nodeType == node.ELEMENT_NODE:
140 self.document.documentElement = node
141 #if not self.document.documentElement:
142 # raise Error, "No document element"
143
144 self.lastEvent[1] = [(END_DOCUMENT, node), None]
145 #self.events.append((END_DOCUMENT, self.curNode))
Fred Drake55c38192000-06-29 19:39:57 +0000146
147class ErrorHandler:
Fred Drake1f549022000-09-24 05:21:58 +0000148 def warning(self, exception):
Fred Drake55c38192000-06-29 19:39:57 +0000149 print exception
Fred Drake1f549022000-09-24 05:21:58 +0000150 def error(self, exception):
Fred Drake55c38192000-06-29 19:39:57 +0000151 raise exception
Fred Drake1f549022000-09-24 05:21:58 +0000152 def fatalError(self, exception):
Fred Drake55c38192000-06-29 19:39:57 +0000153 raise exception
154
155class DOMEventStream:
Fred Drake1f549022000-09-24 05:21:58 +0000156 def __init__(self, stream, parser, bufsize):
157 self.stream = stream
158 self.parser = parser
159 self.bufsize = bufsize
Fred Drake55c38192000-06-29 19:39:57 +0000160 self.reset()
161
Fred Drake1f549022000-09-24 05:21:58 +0000162 def reset(self):
Fred Drake55c38192000-06-29 19:39:57 +0000163 self.pulldom = PullDOM()
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +0000164 # This content handler relies on namespace support
165 self.parser.setFeature(xml.sax.handler.feature_namespaces,1)
Fred Drake1f549022000-09-24 05:21:58 +0000166 self.parser.setContentHandler(self.pulldom)
Fred Drake55c38192000-06-29 19:39:57 +0000167
Fred Drake1f549022000-09-24 05:21:58 +0000168 def __getitem__(self, pos):
169 rc = self.getEvent()
170 if rc:
171 return rc
Fred Drake55c38192000-06-29 19:39:57 +0000172 raise IndexError
173
Fred Drake1f549022000-09-24 05:21:58 +0000174 def expandNode(self, node):
175 event = self.getEvent()
Fred Drake55c38192000-06-29 19:39:57 +0000176 while event:
Fred Drake1f549022000-09-24 05:21:58 +0000177 token, cur_node = event
178 if cur_node is node:
179 return
Fred Drake1f549022000-09-24 05:21:58 +0000180 event = self.getEvent()
Fred Drake55c38192000-06-29 19:39:57 +0000181
Fred Drake1f549022000-09-24 05:21:58 +0000182 def getEvent(self):
Fred Drake55c38192000-06-29 19:39:57 +0000183 if not self.pulldom.firstEvent[1]:
Fred Drake1f549022000-09-24 05:21:58 +0000184 self.pulldom.lastEvent = self.pulldom.firstEvent
Fred Drake55c38192000-06-29 19:39:57 +0000185 while not self.pulldom.firstEvent[1]:
Fred Drake1f549022000-09-24 05:21:58 +0000186 buf=self.stream.read(self.bufsize)
Fred Drake55c38192000-06-29 19:39:57 +0000187 if not buf:
188 #FIXME: why doesn't Expat close work?
189 #self.parser.close()
190 return None
Fred Drake1f549022000-09-24 05:21:58 +0000191 self.parser.feed(buf)
192 rc = self.pulldom.firstEvent[1][0]
193 self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
Fred Drake55c38192000-06-29 19:39:57 +0000194 return rc
195
Fred Drake1f549022000-09-24 05:21:58 +0000196default_bufsize = (2 ** 14) - 20
197
Fred Drake55c38192000-06-29 19:39:57 +0000198# FIXME: move into sax package for common usage
Fred Drake1f549022000-09-24 05:21:58 +0000199def parse(stream_or_string, parser=None, bufsize=default_bufsize):
200 if type(stream_or_string) is type(""):
201 stream = open(stream_or_string)
Fred Drake55c38192000-06-29 19:39:57 +0000202 else:
Fred Drake1f549022000-09-24 05:21:58 +0000203 stream = stream_or_string
Fred Drake55c38192000-06-29 19:39:57 +0000204 if not parser:
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +0000205 parser = xml.sax.make_parser()
Fred Drake1f549022000-09-24 05:21:58 +0000206 return DOMEventStream(stream, parser, bufsize)
Fred Drake55c38192000-06-29 19:39:57 +0000207
Fred Drake1f549022000-09-24 05:21:58 +0000208def parseString(string, parser=None):
Fred Drake55c38192000-06-29 19:39:57 +0000209 try:
Fred Drake1f549022000-09-24 05:21:58 +0000210 from cStringIO import StringIO
Fred Drake55c38192000-06-29 19:39:57 +0000211 except ImportError:
Fred Drake1f549022000-09-24 05:21:58 +0000212 from StringIO import StringIO
Fred Drake55c38192000-06-29 19:39:57 +0000213
Fred Drake1f549022000-09-24 05:21:58 +0000214 bufsize = len(string)
215 buf = StringIO(string)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +0000216 if not parser:
217 parser = xml.sax.make_parser()
Fred Drake1f549022000-09-24 05:21:58 +0000218 return DOMEventStream(buf, parser, bufsize)