blob: f1e51269c8a4905a81de3fd070a6ba396b901357 [file] [log] [blame]
Fred Drake55c38192000-06-29 19:39:57 +00001import minidom
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +00002import xml.sax,xml.sax.handler
Fred Drake55c38192000-06-29 19:39:57 +00003
Fred Drake1f549022000-09-24 05:21:58 +00004START_ELEMENT = "START_ELEMENT"
5END_ELEMENT = "END_ELEMENT"
6COMMENT = "COMMENT"
7START_DOCUMENT = "START_DOCUMENT"
8END_DOCUMENT = "END_DOCUMENT"
9PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"
10IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
11CHARACTERS = "CHARACTERS"
Fred Drake55c38192000-06-29 19:39:57 +000012
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000013class PullDOM(xml.sax.ContentHandler):
Fred Drake1f549022000-09-24 05:21:58 +000014 def __init__(self):
15 self.firstEvent = [None, None]
16 self.lastEvent = self.firstEvent
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000017 self._ns_contexts = [{}] # contains uri -> prefix dicts
18 self._current_context = self._ns_contexts[-1]
Fred Drake55c38192000-06-29 19:39:57 +000019
Fred Drake1f549022000-09-24 05:21:58 +000020 def setDocumentLocator(self, locator): pass
Fred Drake55c38192000-06-29 19:39:57 +000021
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000022 def startPrefixMapping(self, prefix, uri):
23 self._ns_contexts.append(self._current_context.copy())
24 self._current_context[uri] = prefix
Fred Drake55c38192000-06-29 19:39:57 +000025
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000026 def endPrefixMapping(self, prefix):
27 del self._ns_contexts[-1]
Fred Drake1f549022000-09-24 05:21:58 +000028
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000029 def startElementNS(self, name, tagName , attrs):
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000030 uri,localname = name
31 if uri:
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000032 # When using namespaces, the reader may or may not
33 # provide us with the original name. If not, create
34 # *a* valid tagName from the current context.
35 if tagName is None:
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000036 tagName = self._current_context[uri] + ":" + localname
37 node = self.document.createElementNS(uri, tagName)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000038 else:
39 # When the tagname is not prefixed, it just appears as
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000040 # localname
41 node = self.document.createElement(localname)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000042
43 for aname,value in attrs.items():
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000044 a_uri, a_localname = aname
45 if a_uri:
46 qname = self._current_context[a_uri] + ":" + a_localname
47 attr = self.document.createAttributeNS(a_uri, qname)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000048 else:
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000049 attr = self.document.createAttribute(a_localname)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000050 attr.value = value
Martin v. Löwis2c8a89c2000-10-06 22:36:03 +000051 node.setAttributeNode(attr)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000052
Fred Drake1f549022000-09-24 05:21:58 +000053 parent = self.curNode
Fred Drake55c38192000-06-29 19:39:57 +000054 node.parentNode = parent
55 if parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +000056 node.previousSibling = parent.childNodes[-1]
57 node.previousSibling.nextSibling = node
Fred Drake55c38192000-06-29 19:39:57 +000058 self.curNode = node
Lars Gustäbele84bf752000-09-24 18:31:37 +000059
Fred Drake1f549022000-09-24 05:21:58 +000060 self.lastEvent[1] = [(START_ELEMENT, node), None]
61 self.lastEvent = self.lastEvent[1]
62 #self.events.append((START_ELEMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +000063
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000064 def endElementNS(self, name, tagName):
Fred Drake55c38192000-06-29 19:39:57 +000065 node = self.curNode
Fred Drake1f549022000-09-24 05:21:58 +000066 self.lastEvent[1] = [(END_ELEMENT, node), None]
67 self.lastEvent = self.lastEvent[1]
68 #self.events.append((END_ELEMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +000069 self.curNode = node.parentNode
70
Fred Drake1f549022000-09-24 05:21:58 +000071 def comment(self, s):
72 node = self.document.createComment(s)
73 parent = self.curNode
74 node.parentNode = parent
Fred Drake55c38192000-06-29 19:39:57 +000075 if parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +000076 node.previousSibling = parent.childNodes[-1]
77 node.previousSibling.nextSibling = node
78 self.lastEvent[1] = [(COMMENT, node), None]
79 self.lastEvent = self.lastEvent[1]
80 #self.events.append((COMMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +000081
Fred Drake1f549022000-09-24 05:21:58 +000082 def processingInstruction(self, target, data):
83 node = self.document.createProcessingInstruction(target, data)
84 #self.appendChild(node)
Fred Drake55c38192000-06-29 19:39:57 +000085
Fred Drake1f549022000-09-24 05:21:58 +000086 parent = self.curNode
87 node.parentNode = parent
Fred Drake55c38192000-06-29 19:39:57 +000088 if parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +000089 node.previousSibling = parent.childNodes[-1]
90 node.previousSibling.nextSibling = node
91 self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]
92 self.lastEvent = self.lastEvent[1]
93 #self.events.append((PROCESSING_INSTRUCTION, node))
Fred Drake55c38192000-06-29 19:39:57 +000094
Fred Drake1f549022000-09-24 05:21:58 +000095 def ignorableWhitespace(self, chars):
96 node = self.document.createTextNode(chars[start:start + length])
97 parent = self.curNode
98 node.parentNode = parent
Fred Drake55c38192000-06-29 19:39:57 +000099 if parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +0000100 node.previousSibling = parent.childNodes[-1]
101 node.previousSibling.nextSibling = node
102 self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
103 self.lastEvent = self.lastEvent[1]
104 #self.events.append((IGNORABLE_WHITESPACE, node))
Fred Drake55c38192000-06-29 19:39:57 +0000105
Fred Drake1f549022000-09-24 05:21:58 +0000106 def characters(self, chars):
107 node = self.document.createTextNode(chars)
108 node.parentNode = self.curNode
109 self.lastEvent[1] = [(CHARACTERS, node), None]
110 self.lastEvent = self.lastEvent[1]
Fred Drake55c38192000-06-29 19:39:57 +0000111
Fred Drake1f549022000-09-24 05:21:58 +0000112 def startDocument(self):
Fred Drake55c38192000-06-29 19:39:57 +0000113 node = self.curNode = self.document = minidom.Document()
Fred Drake1f549022000-09-24 05:21:58 +0000114 node.parentNode = None
115 self.lastEvent[1] = [(START_DOCUMENT, node), None]
116 self.lastEvent = self.lastEvent[1]
117 #self.events.append((START_DOCUMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +0000118
Fred Drake1f549022000-09-24 05:21:58 +0000119 def endDocument(self):
120 assert not self.curNode.parentNode
121 for node in self.curNode.childNodes:
122 if node.nodeType == node.ELEMENT_NODE:
123 self.document.documentElement = node
124 #if not self.document.documentElement:
125 # raise Error, "No document element"
126
127 self.lastEvent[1] = [(END_DOCUMENT, node), None]
128 #self.events.append((END_DOCUMENT, self.curNode))
Fred Drake55c38192000-06-29 19:39:57 +0000129
130class ErrorHandler:
Fred Drake1f549022000-09-24 05:21:58 +0000131 def warning(self, exception):
Fred Drake55c38192000-06-29 19:39:57 +0000132 print exception
Fred Drake1f549022000-09-24 05:21:58 +0000133 def error(self, exception):
Fred Drake55c38192000-06-29 19:39:57 +0000134 raise exception
Fred Drake1f549022000-09-24 05:21:58 +0000135 def fatalError(self, exception):
Fred Drake55c38192000-06-29 19:39:57 +0000136 raise exception
137
138class DOMEventStream:
Fred Drake1f549022000-09-24 05:21:58 +0000139 def __init__(self, stream, parser, bufsize):
140 self.stream = stream
141 self.parser = parser
142 self.bufsize = bufsize
Fred Drake55c38192000-06-29 19:39:57 +0000143 self.reset()
144
Fred Drake1f549022000-09-24 05:21:58 +0000145 def reset(self):
Fred Drake55c38192000-06-29 19:39:57 +0000146 self.pulldom = PullDOM()
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +0000147 # This content handler relies on namespace support
148 self.parser.setFeature(xml.sax.handler.feature_namespaces,1)
Fred Drake1f549022000-09-24 05:21:58 +0000149 self.parser.setContentHandler(self.pulldom)
Fred Drake55c38192000-06-29 19:39:57 +0000150
Fred Drake1f549022000-09-24 05:21:58 +0000151 def __getitem__(self, pos):
152 rc = self.getEvent()
153 if rc:
154 return rc
Fred Drake55c38192000-06-29 19:39:57 +0000155 raise IndexError
156
Fred Drake1f549022000-09-24 05:21:58 +0000157 def expandNode(self, node):
158 event = self.getEvent()
Fred Drake55c38192000-06-29 19:39:57 +0000159 while event:
Fred Drake1f549022000-09-24 05:21:58 +0000160 token, cur_node = event
161 if cur_node is node:
162 return
163 if token != END_ELEMENT:
164 cur_node.parentNode.appendChild(cur_node)
165 event = self.getEvent()
Fred Drake55c38192000-06-29 19:39:57 +0000166
Fred Drake1f549022000-09-24 05:21:58 +0000167 def getEvent(self):
Fred Drake55c38192000-06-29 19:39:57 +0000168 if not self.pulldom.firstEvent[1]:
Fred Drake1f549022000-09-24 05:21:58 +0000169 self.pulldom.lastEvent = self.pulldom.firstEvent
Fred Drake55c38192000-06-29 19:39:57 +0000170 while not self.pulldom.firstEvent[1]:
Fred Drake1f549022000-09-24 05:21:58 +0000171 buf=self.stream.read(self.bufsize)
Fred Drake55c38192000-06-29 19:39:57 +0000172 if not buf:
173 #FIXME: why doesn't Expat close work?
174 #self.parser.close()
175 return None
Fred Drake1f549022000-09-24 05:21:58 +0000176 self.parser.feed(buf)
177 rc = self.pulldom.firstEvent[1][0]
178 self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
Fred Drake55c38192000-06-29 19:39:57 +0000179 return rc
180
Fred Drake1f549022000-09-24 05:21:58 +0000181default_bufsize = (2 ** 14) - 20
182
Fred Drake55c38192000-06-29 19:39:57 +0000183# FIXME: move into sax package for common usage
Fred Drake1f549022000-09-24 05:21:58 +0000184def parse(stream_or_string, parser=None, bufsize=default_bufsize):
185 if type(stream_or_string) is type(""):
186 stream = open(stream_or_string)
Fred Drake55c38192000-06-29 19:39:57 +0000187 else:
Fred Drake1f549022000-09-24 05:21:58 +0000188 stream = stream_or_string
Fred Drake55c38192000-06-29 19:39:57 +0000189 if not parser:
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +0000190 parser = xml.sax.make_parser()
Fred Drake1f549022000-09-24 05:21:58 +0000191 return DOMEventStream(stream, parser, bufsize)
Fred Drake55c38192000-06-29 19:39:57 +0000192
Fred Drake1f549022000-09-24 05:21:58 +0000193def parseString(string, parser=None):
Fred Drake55c38192000-06-29 19:39:57 +0000194 try:
Fred Drake1f549022000-09-24 05:21:58 +0000195 from cStringIO import StringIO
Fred Drake55c38192000-06-29 19:39:57 +0000196 except ImportError:
Fred Drake1f549022000-09-24 05:21:58 +0000197 from StringIO import StringIO
Fred Drake55c38192000-06-29 19:39:57 +0000198
Fred Drake1f549022000-09-24 05:21:58 +0000199 bufsize = len(string)
200 buf = StringIO(string)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +0000201 if not parser:
202 parser = xml.sax.make_parser()
Fred Drake1f549022000-09-24 05:21:58 +0000203 return DOMEventStream(buf, parser, bufsize)