blob: 417152670c54d923536ab3117cccbed74cfd5328 [file] [log] [blame]
Benjamin Peterson863a0c32011-03-02 23:40:36 +00001import io
2import unittest
3import sys
4import xml.sax
5
6from xml.sax.xmlreader import AttributesImpl
7from xml.dom import pulldom
8
9from test.support import run_unittest, findfile
10
11
12tstfile = findfile("test.xml", subdir="xmltestdata")
13
14# A handy XML snippet, containing attributes, a namespace prefix, and a
15# self-closing tag:
16SMALL_SAMPLE = """<?xml version="1.0"?>
17<html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books">
18<!-- A comment -->
19<title>Introduction to XSL</title>
20<hr/>
21<p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p>
22</html>"""
23
24
25class PullDOMTestCase(unittest.TestCase):
26
27 def test_parse(self):
28 """Minimal test of DOMEventStream.parse()"""
29
30 # This just tests that parsing from a stream works. Actual parser
31 # semantics are tested using parseString with a more focused XML
32 # fragment.
33
34 # Test with a filename:
35 list(pulldom.parse(tstfile))
36
37 # Test with a file object:
38 with open(tstfile, "rb") as fin:
39 list(pulldom.parse(fin))
40
41 def test_parse_semantics(self):
42 """Test DOMEventStream parsing semantics."""
43
44 items = pulldom.parseString(SMALL_SAMPLE)
45 evt, node = next(items)
46 # Just check the node is a Document:
47 self.assertTrue(hasattr(node, "createElement"))
48 self.assertEqual(pulldom.START_DOCUMENT, evt)
49 evt, node = next(items)
50 self.assertEqual(pulldom.START_ELEMENT, evt)
51 self.assertEqual("html", node.tagName)
52 self.assertEqual(2, len(node.attributes))
53 self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value,
54 "http://www.xml.com/books")
55 evt, node = next(items)
56 self.assertEqual(pulldom.CHARACTERS, evt) # Line break
57 evt, node = next(items)
58 # XXX - A comment should be reported here!
59 # self.assertEqual(pulldom.COMMENT, evt)
60 # Line break after swallowed comment:
61 self.assertEqual(pulldom.CHARACTERS, evt)
62 evt, node = next(items)
63 self.assertEqual("title", node.tagName)
64 title_node = node
65 evt, node = next(items)
66 self.assertEqual(pulldom.CHARACTERS, evt)
67 self.assertEqual("Introduction to XSL", node.data)
68 evt, node = next(items)
69 self.assertEqual(pulldom.END_ELEMENT, evt)
70 self.assertEqual("title", node.tagName)
71 self.assertTrue(title_node is node)
72 evt, node = next(items)
73 self.assertEqual(pulldom.CHARACTERS, evt)
74 evt, node = next(items)
75 self.assertEqual(pulldom.START_ELEMENT, evt)
76 self.assertEqual("hr", node.tagName)
77 evt, node = next(items)
78 self.assertEqual(pulldom.END_ELEMENT, evt)
79 self.assertEqual("hr", node.tagName)
80 evt, node = next(items)
81 self.assertEqual(pulldom.CHARACTERS, evt)
82 evt, node = next(items)
83 self.assertEqual(pulldom.START_ELEMENT, evt)
84 self.assertEqual("p", node.tagName)
85 evt, node = next(items)
86 self.assertEqual(pulldom.START_ELEMENT, evt)
87 self.assertEqual("xdc:author", node.tagName)
88 evt, node = next(items)
89 self.assertEqual(pulldom.CHARACTERS, evt)
90 evt, node = next(items)
91 self.assertEqual(pulldom.END_ELEMENT, evt)
92 self.assertEqual("xdc:author", node.tagName)
93 evt, node = next(items)
94 self.assertEqual(pulldom.END_ELEMENT, evt)
95 evt, node = next(items)
96 self.assertEqual(pulldom.CHARACTERS, evt)
97 evt, node = next(items)
98 self.assertEqual(pulldom.END_ELEMENT, evt)
99 # XXX No END_DOCUMENT item is ever obtained:
100 #evt, node = next(items)
101 #self.assertEqual(pulldom.END_DOCUMENT, evt)
102
103 def test_expandItem(self):
104 """Ensure expandItem works as expected."""
105 items = pulldom.parseString(SMALL_SAMPLE)
106 # Loop through the nodes until we get to a "title" start tag:
107 for evt, item in items:
108 if evt == pulldom.START_ELEMENT and item.tagName == "title":
109 items.expandNode(item)
110 self.assertEqual(1, len(item.childNodes))
111 break
112 else:
113 self.fail("No \"title\" element detected in SMALL_SAMPLE!")
114 # Loop until we get to the next start-element:
115 for evt, node in items:
116 if evt == pulldom.START_ELEMENT:
117 break
118 self.assertEqual("hr", node.tagName,
119 "expandNode did not leave DOMEventStream in the correct state.")
120 # Attempt to expand a standalone element:
121 items.expandNode(node)
122 self.assertEqual(next(items)[0], pulldom.CHARACTERS)
123 evt, node = next(items)
124 self.assertEqual(node.tagName, "p")
125 items.expandNode(node)
126 next(items) # Skip character data
127 evt, node = next(items)
128 self.assertEqual(node.tagName, "html")
129 with self.assertRaises(StopIteration):
130 next(items)
131 items.clear()
132 self.assertIsNone(items.parser)
133 self.assertIsNone(items.stream)
134
135 @unittest.expectedFailure
136 def test_comment(self):
137 """PullDOM does not receive "comment" events."""
138 items = pulldom.parseString(SMALL_SAMPLE)
139 for evt, _ in items:
140 if evt == pulldom.COMMENT:
141 break
142 else:
143 self.fail("No comment was encountered")
144
145 @unittest.expectedFailure
146 def test_end_document(self):
147 """PullDOM does not receive "end-document" events."""
148 items = pulldom.parseString(SMALL_SAMPLE)
149 # Read all of the nodes up to and including </html>:
150 for evt, node in items:
151 if evt == pulldom.END_ELEMENT and node.tagName == "html":
152 break
153 try:
154 # Assert that the next node is END_DOCUMENT:
155 evt, node = next(items)
156 self.assertEqual(pulldom.END_DOCUMENT, evt)
157 except StopIteration:
158 self.fail(
159 "Ran out of events, but should have received END_DOCUMENT")
160
161
162class ThoroughTestCase(unittest.TestCase):
163 """Test the hard-to-reach parts of pulldom."""
164
165 def test_thorough_parse(self):
166 """Test some of the hard-to-reach parts of PullDOM."""
167 self._test_thorough(pulldom.parse(None, parser=SAXExerciser()))
168
169 @unittest.expectedFailure
170 def test_sax2dom_fail(self):
171 """SAX2DOM can"t handle a PI before the root element."""
172 pd = SAX2DOMTestHelper(None, SAXExerciser(), 12)
173 self._test_thorough(pd)
174
175 def test_thorough_sax2dom(self):
176 """Test some of the hard-to-reach parts of SAX2DOM."""
177 pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12)
178 self._test_thorough(pd, False)
179
180 def _test_thorough(self, pd, before_root=True):
181 """Test some of the hard-to-reach parts of the parser, using a mock
182 parser."""
183
184 evt, node = next(pd)
185 self.assertEqual(pulldom.START_DOCUMENT, evt)
186 # Just check the node is a Document:
187 self.assertTrue(hasattr(node, "createElement"))
188
189 if before_root:
190 evt, node = next(pd)
191 self.assertEqual(pulldom.COMMENT, evt)
192 self.assertEqual("a comment", node.data)
193 evt, node = next(pd)
194 self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
195 self.assertEqual("target", node.target)
196 self.assertEqual("data", node.data)
197
198 evt, node = next(pd)
199 self.assertEqual(pulldom.START_ELEMENT, evt)
200 self.assertEqual("html", node.tagName)
201
202 evt, node = next(pd)
203 self.assertEqual(pulldom.COMMENT, evt)
204 self.assertEqual("a comment", node.data)
205 evt, node = next(pd)
206 self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
207 self.assertEqual("target", node.target)
208 self.assertEqual("data", node.data)
209
210 evt, node = next(pd)
211 self.assertEqual(pulldom.START_ELEMENT, evt)
212 self.assertEqual("p", node.tagName)
213
214 evt, node = next(pd)
215 self.assertEqual(pulldom.CHARACTERS, evt)
216 self.assertEqual("text", node.data)
217 evt, node = next(pd)
218 self.assertEqual(pulldom.END_ELEMENT, evt)
219 self.assertEqual("p", node.tagName)
220 evt, node = next(pd)
221 self.assertEqual(pulldom.END_ELEMENT, evt)
222 self.assertEqual("html", node.tagName)
223 evt, node = next(pd)
224 self.assertEqual(pulldom.END_DOCUMENT, evt)
225
226
227class SAXExerciser(object):
228 """A fake sax parser that calls some of the harder-to-reach sax methods to
229 ensure it emits the correct events"""
230
231 def setContentHandler(self, handler):
232 self._handler = handler
233
234 def parse(self, _):
235 h = self._handler
236 h.startDocument()
237
238 # The next two items ensure that items preceding the first
239 # start_element are properly stored and emitted:
240 h.comment("a comment")
241 h.processingInstruction("target", "data")
242
243 h.startElement("html", AttributesImpl({}))
244
245 h.comment("a comment")
246 h.processingInstruction("target", "data")
247
248 h.startElement("p", AttributesImpl({"class": "paraclass"}))
249 h.characters("text")
250 h.endElement("p")
251 h.endElement("html")
252 h.endDocument()
253
254 def stub(self, *args, **kwargs):
255 """Stub method. Does nothing."""
256 pass
257 setProperty = stub
258 setFeature = stub
259
260
261class SAX2DOMExerciser(SAXExerciser):
262 """The same as SAXExerciser, but without the processing instruction and
263 comment before the root element, because S2D can"t handle it"""
264
265 def parse(self, _):
266 h = self._handler
267 h.startDocument()
268 h.startElement("html", AttributesImpl({}))
269 h.comment("a comment")
270 h.processingInstruction("target", "data")
271 h.startElement("p", AttributesImpl({"class": "paraclass"}))
272 h.characters("text")
273 h.endElement("p")
274 h.endElement("html")
275 h.endDocument()
276
277
278class SAX2DOMTestHelper(pulldom.DOMEventStream):
279 """Allows us to drive SAX2DOM from a DOMEventStream."""
280
281 def reset(self):
282 self.pulldom = pulldom.SAX2DOM()
283 # This content handler relies on namespace support
284 self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
285 self.parser.setContentHandler(self.pulldom)
286
287
288class SAX2DOMTestCase(unittest.TestCase):
289
290 def confirm(self, test, testname="Test"):
291 self.assertTrue(test, testname)
292
293 def test_basic(self):
294 """Ensure SAX2DOM can parse from a stream."""
295 with io.StringIO(SMALL_SAMPLE) as fin:
296 sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(),
297 len(SMALL_SAMPLE))
298 for evt, node in sd:
299 if evt == pulldom.START_ELEMENT and node.tagName == "html":
300 break
301 # Because the buffer is the same length as the XML, all the
302 # nodes should have been parsed and added:
303 self.assertGreater(len(node.childNodes), 0)
304
305 def testSAX2DOM(self):
306 """Ensure SAX2DOM expands nodes as expected."""
307 sax2dom = pulldom.SAX2DOM()
308 sax2dom.startDocument()
309 sax2dom.startElement("doc", {})
310 sax2dom.characters("text")
311 sax2dom.startElement("subelm", {})
312 sax2dom.characters("text")
313 sax2dom.endElement("subelm")
314 sax2dom.characters("text")
315 sax2dom.endElement("doc")
316 sax2dom.endDocument()
317
318 doc = sax2dom.document
319 root = doc.documentElement
320 (text1, elm1, text2) = root.childNodes
321 text3 = elm1.childNodes[0]
322
323 self.assertIsNone(text1.previousSibling)
324 self.assertIs(text1.nextSibling, elm1)
325 self.assertIs(elm1.previousSibling, text1)
326 self.assertIs(elm1.nextSibling, text2)
327 self.assertIs(text2.previousSibling, elm1)
328 self.assertIsNone(text2.nextSibling)
329 self.assertIsNone(text3.previousSibling)
330 self.assertIsNone(text3.nextSibling)
331
332 self.assertIs(root.parentNode, doc)
333 self.assertIs(text1.parentNode, root)
334 self.assertIs(elm1.parentNode, root)
335 self.assertIs(text2.parentNode, root)
336 self.assertIs(text3.parentNode, elm1)
337 doc.unlink()
338
339
340def test_main():
341 run_unittest(PullDOMTestCase, ThoroughTestCase, SAX2DOMTestCase)
342
343
344if __name__ == "__main__":
345 test_main()