blob: 011d46ae1e490acc334cda42230eeaeb3d2a28a5 [file] [log] [blame]
Fred Drake55c38192000-06-29 19:39:57 +00001import minidom
Lars Gustäbelb798c012000-09-21 08:38:46 +00002import xml.sax
Fred Drake55c38192000-06-29 19:39:57 +00003
4#todo: SAX2/namespace handling
5
Fred Drake1f549022000-09-24 05:21:58 +00006START_ELEMENT = "START_ELEMENT"
7END_ELEMENT = "END_ELEMENT"
8COMMENT = "COMMENT"
9START_DOCUMENT = "START_DOCUMENT"
10END_DOCUMENT = "END_DOCUMENT"
11PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"
12IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
13CHARACTERS = "CHARACTERS"
Fred Drake55c38192000-06-29 19:39:57 +000014
15class PullDOM:
Fred Drake1f549022000-09-24 05:21:58 +000016 def __init__(self):
17 self.firstEvent = [None, None]
18 self.lastEvent = self.firstEvent
Fred Drake55c38192000-06-29 19:39:57 +000019
Fred Drake1f549022000-09-24 05:21:58 +000020 def setDocumentLocator(self, locator): pass
Fred Drake55c38192000-06-29 19:39:57 +000021
Fred Drake1f549022000-09-24 05:21:58 +000022 def startElement(self, name, tagName, attrs):
23 if not hasattr(self, "curNode"):
Fred Drake55c38192000-06-29 19:39:57 +000024 # FIXME: hack!
Fred Drake1f549022000-09-24 05:21:58 +000025 self.startDocument()
Fred Drake55c38192000-06-29 19:39:57 +000026
Fred Drake1f549022000-09-24 05:21:58 +000027 node = self.document.createElement(tagName) #FIXME namespaces!
Fred Drake55c38192000-06-29 19:39:57 +000028 for attr in attrs.keys():
Fred Drake1f549022000-09-24 05:21:58 +000029 node.setAttribute(attr, attrs[attr])
30
31 parent = self.curNode
Fred Drake55c38192000-06-29 19:39:57 +000032 node.parentNode = parent
33 if parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +000034 node.previousSibling = parent.childNodes[-1]
35 node.previousSibling.nextSibling = node
Fred Drake55c38192000-06-29 19:39:57 +000036 self.curNode = node
37 # FIXME: do I have to screen namespace attributes
Fred Drake1f549022000-09-24 05:21:58 +000038 self.lastEvent[1] = [(START_ELEMENT, node), None]
39 self.lastEvent = self.lastEvent[1]
40 #self.events.append((START_ELEMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +000041
Fred Drake1f549022000-09-24 05:21:58 +000042 def endElement(self, name, tagName):
Fred Drake55c38192000-06-29 19:39:57 +000043 node = self.curNode
Fred Drake1f549022000-09-24 05:21:58 +000044 self.lastEvent[1] = [(END_ELEMENT, node), None]
45 self.lastEvent = self.lastEvent[1]
46 #self.events.append((END_ELEMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +000047 self.curNode = node.parentNode
48
Fred Drake1f549022000-09-24 05:21:58 +000049 def comment(self, s):
50 node = self.document.createComment(s)
51 parent = self.curNode
52 node.parentNode = parent
Fred Drake55c38192000-06-29 19:39:57 +000053 if parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +000054 node.previousSibling = parent.childNodes[-1]
55 node.previousSibling.nextSibling = node
56 self.lastEvent[1] = [(COMMENT, node), None]
57 self.lastEvent = self.lastEvent[1]
58 #self.events.append((COMMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +000059
Fred Drake1f549022000-09-24 05:21:58 +000060 def processingInstruction(self, target, data):
61 node = self.document.createProcessingInstruction(target, data)
62 #self.appendChild(node)
Fred Drake55c38192000-06-29 19:39:57 +000063
Fred Drake1f549022000-09-24 05:21:58 +000064 parent = self.curNode
65 node.parentNode = parent
Fred Drake55c38192000-06-29 19:39:57 +000066 if parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +000067 node.previousSibling = parent.childNodes[-1]
68 node.previousSibling.nextSibling = node
69 self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]
70 self.lastEvent = self.lastEvent[1]
71 #self.events.append((PROCESSING_INSTRUCTION, node))
Fred Drake55c38192000-06-29 19:39:57 +000072
Fred Drake1f549022000-09-24 05:21:58 +000073 def ignorableWhitespace(self, chars):
74 node = self.document.createTextNode(chars[start:start + length])
75 parent = self.curNode
76 node.parentNode = parent
Fred Drake55c38192000-06-29 19:39:57 +000077 if parent.childNodes:
Fred Drake1f549022000-09-24 05:21:58 +000078 node.previousSibling = parent.childNodes[-1]
79 node.previousSibling.nextSibling = node
80 self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
81 self.lastEvent = self.lastEvent[1]
82 #self.events.append((IGNORABLE_WHITESPACE, node))
Fred Drake55c38192000-06-29 19:39:57 +000083
Fred Drake1f549022000-09-24 05:21:58 +000084 def characters(self, chars):
85 node = self.document.createTextNode(chars)
86 node.parentNode = self.curNode
87 self.lastEvent[1] = [(CHARACTERS, node), None]
88 self.lastEvent = self.lastEvent[1]
Fred Drake55c38192000-06-29 19:39:57 +000089
Fred Drake1f549022000-09-24 05:21:58 +000090 def startDocument(self):
Fred Drake55c38192000-06-29 19:39:57 +000091 node = self.curNode = self.document = minidom.Document()
Fred Drake1f549022000-09-24 05:21:58 +000092 node.parentNode = None
93 self.lastEvent[1] = [(START_DOCUMENT, node), None]
94 self.lastEvent = self.lastEvent[1]
95 #self.events.append((START_DOCUMENT, node))
Fred Drake55c38192000-06-29 19:39:57 +000096
Fred Drake1f549022000-09-24 05:21:58 +000097 def endDocument(self):
98 assert not self.curNode.parentNode
99 for node in self.curNode.childNodes:
100 if node.nodeType == node.ELEMENT_NODE:
101 self.document.documentElement = node
102 #if not self.document.documentElement:
103 # raise Error, "No document element"
104
105 self.lastEvent[1] = [(END_DOCUMENT, node), None]
106 #self.events.append((END_DOCUMENT, self.curNode))
Fred Drake55c38192000-06-29 19:39:57 +0000107
108class ErrorHandler:
Fred Drake1f549022000-09-24 05:21:58 +0000109 def warning(self, exception):
Fred Drake55c38192000-06-29 19:39:57 +0000110 print exception
Fred Drake1f549022000-09-24 05:21:58 +0000111 def error(self, exception):
Fred Drake55c38192000-06-29 19:39:57 +0000112 raise exception
Fred Drake1f549022000-09-24 05:21:58 +0000113 def fatalError(self, exception):
Fred Drake55c38192000-06-29 19:39:57 +0000114 raise exception
115
116class DOMEventStream:
Fred Drake1f549022000-09-24 05:21:58 +0000117 def __init__(self, stream, parser, bufsize):
118 self.stream = stream
119 self.parser = parser
120 self.bufsize = bufsize
Fred Drake55c38192000-06-29 19:39:57 +0000121 self.reset()
122
Fred Drake1f549022000-09-24 05:21:58 +0000123 def reset(self):
Fred Drake55c38192000-06-29 19:39:57 +0000124 self.pulldom = PullDOM()
Fred Drake1f549022000-09-24 05:21:58 +0000125 self.parser.setContentHandler(self.pulldom)
Fred Drake55c38192000-06-29 19:39:57 +0000126
Fred Drake1f549022000-09-24 05:21:58 +0000127 def __getitem__(self, pos):
128 rc = self.getEvent()
129 if rc:
130 return rc
Fred Drake55c38192000-06-29 19:39:57 +0000131 raise IndexError
132
Fred Drake1f549022000-09-24 05:21:58 +0000133 def expandNode(self, node):
134 event = self.getEvent()
Fred Drake55c38192000-06-29 19:39:57 +0000135 while event:
Fred Drake1f549022000-09-24 05:21:58 +0000136 token, cur_node = event
137 if cur_node is node:
138 return
139 if token != END_ELEMENT:
140 cur_node.parentNode.appendChild(cur_node)
141 event = self.getEvent()
Fred Drake55c38192000-06-29 19:39:57 +0000142
Fred Drake1f549022000-09-24 05:21:58 +0000143 def getEvent(self):
Fred Drake55c38192000-06-29 19:39:57 +0000144 if not self.pulldom.firstEvent[1]:
Fred Drake1f549022000-09-24 05:21:58 +0000145 self.pulldom.lastEvent = self.pulldom.firstEvent
Fred Drake55c38192000-06-29 19:39:57 +0000146 while not self.pulldom.firstEvent[1]:
Fred Drake1f549022000-09-24 05:21:58 +0000147 buf=self.stream.read(self.bufsize)
Fred Drake55c38192000-06-29 19:39:57 +0000148 if not buf:
149 #FIXME: why doesn't Expat close work?
150 #self.parser.close()
151 return None
Fred Drake1f549022000-09-24 05:21:58 +0000152 self.parser.feed(buf)
153 rc = self.pulldom.firstEvent[1][0]
154 self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
Fred Drake55c38192000-06-29 19:39:57 +0000155 return rc
156
157# FIXME: sax2
158#def _getParser( ):
159 # from xml.sax.saxexts import make_parser
160 # expat doesn't report errors properly! Figure it out
161 # return make_parser()
162 # return make_parser("xml.sax.drivers.drv_xmllib")
163
164
165
166def _getParser():
Lars Gustäbelb798c012000-09-21 08:38:46 +0000167 return xml.sax.make_parser()
Fred Drake55c38192000-06-29 19:39:57 +0000168
Fred Drake1f549022000-09-24 05:21:58 +0000169default_bufsize = (2 ** 14) - 20
170
Fred Drake55c38192000-06-29 19:39:57 +0000171# FIXME: move into sax package for common usage
Fred Drake1f549022000-09-24 05:21:58 +0000172def parse(stream_or_string, parser=None, bufsize=default_bufsize):
173 if type(stream_or_string) is type(""):
174 stream = open(stream_or_string)
Fred Drake55c38192000-06-29 19:39:57 +0000175 else:
Fred Drake1f549022000-09-24 05:21:58 +0000176 stream = stream_or_string
Fred Drake55c38192000-06-29 19:39:57 +0000177 if not parser:
Fred Drake1f549022000-09-24 05:21:58 +0000178 parser = _getParser()
179 return DOMEventStream(stream, parser, bufsize)
Fred Drake55c38192000-06-29 19:39:57 +0000180
Fred Drake1f549022000-09-24 05:21:58 +0000181def parseString(string, parser=None):
Fred Drake55c38192000-06-29 19:39:57 +0000182 try:
Fred Drake1f549022000-09-24 05:21:58 +0000183 from cStringIO import StringIO
Fred Drake55c38192000-06-29 19:39:57 +0000184 except ImportError:
Fred Drake1f549022000-09-24 05:21:58 +0000185 from StringIO import StringIO
Fred Drake55c38192000-06-29 19:39:57 +0000186
Fred Drake1f549022000-09-24 05:21:58 +0000187 bufsize = len(string)
188 buf = StringIO(string)
189 parser = _getParser()
190 return DOMEventStream(buf, parser, bufsize)