blob: 9f93d6aa9de7a43fab811d72cb019a5f633ffd3d [file] [log] [blame]
Fred Drake55c38192000-06-29 19:39:57 +00001import minidom
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +00002import xml.sax,xml.sax.handler
Fred Drake55c38192000-06-29 19:39:57 +00003
Fred Drake1f549022000-09-24 05:21:58 +00004START_ELEMENT = "START_ELEMENT"
5END_ELEMENT = "END_ELEMENT"
6COMMENT = "COMMENT"
7START_DOCUMENT = "START_DOCUMENT"
8END_DOCUMENT = "END_DOCUMENT"
9PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"
10IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
11CHARACTERS = "CHARACTERS"
Fred Drake55c38192000-06-29 19:39:57 +000012
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000013class PullDOM(xml.sax.ContentHandler):
Fred Drake1f549022000-09-24 05:21:58 +000014 def __init__(self):
15 self.firstEvent = [None, None]
16 self.lastEvent = self.firstEvent
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000017 self._ns_contexts = [{}] # contains uri -> prefix dicts
18 self._current_context = self._ns_contexts[-1]
Fred Drake55c38192000-06-29 19:39:57 +000019
Fred Drake1f549022000-09-24 05:21:58 +000020 def setDocumentLocator(self, locator): pass
Fred Drake55c38192000-06-29 19:39:57 +000021
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000022 def startPrefixMapping(self, prefix, uri):
23 self._ns_contexts.append(self._current_context.copy())
24 self._current_context[uri] = prefix
Fred Drake55c38192000-06-29 19:39:57 +000025
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000026 def endPrefixMapping(self, prefix):
27 del self._ns_contexts[-1]
Fred Drake1f549022000-09-24 05:21:58 +000028
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000029 def startElementNS(self, name, tagName , attrs):
30 if name[0]:
31 # When using namespaces, the reader may or may not
32 # provide us with the original name. If not, create
33 # *a* valid tagName from the current context.
34 if tagName is None:
35 tagName = self._current_context[name[0]] + ":" + name[1]
36 node = self.document.createElementNS(name[0], tagName)
37 else:
38 # When the tagname is not prefixed, it just appears as
39 # name[1]
40 node = self.document.createElement(name[1])
41
42 for aname,value in attrs.items():
43 if aname[0]:
44 qname = self._current_context[name[0]] + ":" + aname[1]
45 attr = self.document.createAttributeNS(name[0], qname)
46 else:
47 attr = self.document.createAttribute(name[0], name[1])
48 attr.value = value
49 node.setAttributeNode(qname, attr)
50
Fred Drake1f549022000-09-24 05:21:58 +000051 parent = self.curNode
Fred Drake55c38192000-06-29 19:39:57 +000052 node.parentNode = parent
53 if parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +000054 node.previousSibling = parent.childNodes[-1]
55 node.previousSibling.nextSibling = node
Fred Drake55c38192000-06-29 19:39:57 +000056 self.curNode = node
Lars Gustäbele84bf752000-09-24 18:31:37 +000057
Fred Drake1f549022000-09-24 05:21:58 +000058 self.lastEvent[1] = [(START_ELEMENT, node), None]
59 self.lastEvent = self.lastEvent[1]
60 #self.events.append((START_ELEMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +000061
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +000062 def endElementNS(self, name, tagName):
Fred Drake55c38192000-06-29 19:39:57 +000063 node = self.curNode
Fred Drake1f549022000-09-24 05:21:58 +000064 self.lastEvent[1] = [(END_ELEMENT, node), None]
65 self.lastEvent = self.lastEvent[1]
66 #self.events.append((END_ELEMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +000067 self.curNode = node.parentNode
68
Fred Drake1f549022000-09-24 05:21:58 +000069 def comment(self, s):
70 node = self.document.createComment(s)
71 parent = self.curNode
72 node.parentNode = parent
Fred Drake55c38192000-06-29 19:39:57 +000073 if parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +000074 node.previousSibling = parent.childNodes[-1]
75 node.previousSibling.nextSibling = node
76 self.lastEvent[1] = [(COMMENT, node), None]
77 self.lastEvent = self.lastEvent[1]
78 #self.events.append((COMMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +000079
Fred Drake1f549022000-09-24 05:21:58 +000080 def processingInstruction(self, target, data):
81 node = self.document.createProcessingInstruction(target, data)
82 #self.appendChild(node)
Fred Drake55c38192000-06-29 19:39:57 +000083
Fred Drake1f549022000-09-24 05:21:58 +000084 parent = self.curNode
85 node.parentNode = parent
Fred Drake55c38192000-06-29 19:39:57 +000086 if parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +000087 node.previousSibling = parent.childNodes[-1]
88 node.previousSibling.nextSibling = node
89 self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]
90 self.lastEvent = self.lastEvent[1]
91 #self.events.append((PROCESSING_INSTRUCTION, node))
Fred Drake55c38192000-06-29 19:39:57 +000092
Fred Drake1f549022000-09-24 05:21:58 +000093 def ignorableWhitespace(self, chars):
94 node = self.document.createTextNode(chars[start:start + length])
95 parent = self.curNode
96 node.parentNode = parent
Fred Drake55c38192000-06-29 19:39:57 +000097 if parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +000098 node.previousSibling = parent.childNodes[-1]
99 node.previousSibling.nextSibling = node
100 self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
101 self.lastEvent = self.lastEvent[1]
102 #self.events.append((IGNORABLE_WHITESPACE, node))
Fred Drake55c38192000-06-29 19:39:57 +0000103
Fred Drake1f549022000-09-24 05:21:58 +0000104 def characters(self, chars):
105 node = self.document.createTextNode(chars)
106 node.parentNode = self.curNode
107 self.lastEvent[1] = [(CHARACTERS, node), None]
108 self.lastEvent = self.lastEvent[1]
Fred Drake55c38192000-06-29 19:39:57 +0000109
Fred Drake1f549022000-09-24 05:21:58 +0000110 def startDocument(self):
Fred Drake55c38192000-06-29 19:39:57 +0000111 node = self.curNode = self.document = minidom.Document()
Fred Drake1f549022000-09-24 05:21:58 +0000112 node.parentNode = None
113 self.lastEvent[1] = [(START_DOCUMENT, node), None]
114 self.lastEvent = self.lastEvent[1]
115 #self.events.append((START_DOCUMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +0000116
Fred Drake1f549022000-09-24 05:21:58 +0000117 def endDocument(self):
118 assert not self.curNode.parentNode
119 for node in self.curNode.childNodes:
120 if node.nodeType == node.ELEMENT_NODE:
121 self.document.documentElement = node
122 #if not self.document.documentElement:
123 # raise Error, "No document element"
124
125 self.lastEvent[1] = [(END_DOCUMENT, node), None]
126 #self.events.append((END_DOCUMENT, self.curNode))
Fred Drake55c38192000-06-29 19:39:57 +0000127
128class ErrorHandler:
Fred Drake1f549022000-09-24 05:21:58 +0000129 def warning(self, exception):
Fred Drake55c38192000-06-29 19:39:57 +0000130 print exception
Fred Drake1f549022000-09-24 05:21:58 +0000131 def error(self, exception):
Fred Drake55c38192000-06-29 19:39:57 +0000132 raise exception
Fred Drake1f549022000-09-24 05:21:58 +0000133 def fatalError(self, exception):
Fred Drake55c38192000-06-29 19:39:57 +0000134 raise exception
135
136class DOMEventStream:
Fred Drake1f549022000-09-24 05:21:58 +0000137 def __init__(self, stream, parser, bufsize):
138 self.stream = stream
139 self.parser = parser
140 self.bufsize = bufsize
Fred Drake55c38192000-06-29 19:39:57 +0000141 self.reset()
142
Fred Drake1f549022000-09-24 05:21:58 +0000143 def reset(self):
Fred Drake55c38192000-06-29 19:39:57 +0000144 self.pulldom = PullDOM()
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +0000145 # This content handler relies on namespace support
146 self.parser.setFeature(xml.sax.handler.feature_namespaces,1)
Fred Drake1f549022000-09-24 05:21:58 +0000147 self.parser.setContentHandler(self.pulldom)
Fred Drake55c38192000-06-29 19:39:57 +0000148
Fred Drake1f549022000-09-24 05:21:58 +0000149 def __getitem__(self, pos):
150 rc = self.getEvent()
151 if rc:
152 return rc
Fred Drake55c38192000-06-29 19:39:57 +0000153 raise IndexError
154
Fred Drake1f549022000-09-24 05:21:58 +0000155 def expandNode(self, node):
156 event = self.getEvent()
Fred Drake55c38192000-06-29 19:39:57 +0000157 while event:
Fred Drake1f549022000-09-24 05:21:58 +0000158 token, cur_node = event
159 if cur_node is node:
160 return
161 if token != END_ELEMENT:
162 cur_node.parentNode.appendChild(cur_node)
163 event = self.getEvent()
Fred Drake55c38192000-06-29 19:39:57 +0000164
Fred Drake1f549022000-09-24 05:21:58 +0000165 def getEvent(self):
Fred Drake55c38192000-06-29 19:39:57 +0000166 if not self.pulldom.firstEvent[1]:
Fred Drake1f549022000-09-24 05:21:58 +0000167 self.pulldom.lastEvent = self.pulldom.firstEvent
Fred Drake55c38192000-06-29 19:39:57 +0000168 while not self.pulldom.firstEvent[1]:
Fred Drake1f549022000-09-24 05:21:58 +0000169 buf=self.stream.read(self.bufsize)
Fred Drake55c38192000-06-29 19:39:57 +0000170 if not buf:
171 #FIXME: why doesn't Expat close work?
172 #self.parser.close()
173 return None
Fred Drake1f549022000-09-24 05:21:58 +0000174 self.parser.feed(buf)
175 rc = self.pulldom.firstEvent[1][0]
176 self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
Fred Drake55c38192000-06-29 19:39:57 +0000177 return rc
178
Fred Drake1f549022000-09-24 05:21:58 +0000179default_bufsize = (2 ** 14) - 20
180
Fred Drake55c38192000-06-29 19:39:57 +0000181# FIXME: move into sax package for common usage
Fred Drake1f549022000-09-24 05:21:58 +0000182def parse(stream_or_string, parser=None, bufsize=default_bufsize):
183 if type(stream_or_string) is type(""):
184 stream = open(stream_or_string)
Fred Drake55c38192000-06-29 19:39:57 +0000185 else:
Fred Drake1f549022000-09-24 05:21:58 +0000186 stream = stream_or_string
Fred Drake55c38192000-06-29 19:39:57 +0000187 if not parser:
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +0000188 parser = xml.sax.make_parser()
Fred Drake1f549022000-09-24 05:21:58 +0000189 return DOMEventStream(stream, parser, bufsize)
Fred Drake55c38192000-06-29 19:39:57 +0000190
Fred Drake1f549022000-09-24 05:21:58 +0000191def parseString(string, parser=None):
Fred Drake55c38192000-06-29 19:39:57 +0000192 try:
Fred Drake1f549022000-09-24 05:21:58 +0000193 from cStringIO import StringIO
Fred Drake55c38192000-06-29 19:39:57 +0000194 except ImportError:
Fred Drake1f549022000-09-24 05:21:58 +0000195 from StringIO import StringIO
Fred Drake55c38192000-06-29 19:39:57 +0000196
Fred Drake1f549022000-09-24 05:21:58 +0000197 bufsize = len(string)
198 buf = StringIO(string)
Martin v. Löwisa13a9dc2000-09-24 21:54:14 +0000199 if not parser:
200 parser = xml.sax.make_parser()
Fred Drake1f549022000-09-24 05:21:58 +0000201 return DOMEventStream(buf, parser, bufsize)