blob: 1932c6bb99f0e0d727251867f62662f1119b19a3 [file] [log] [blame]
Benjamin Peterson863a0c32011-03-02 23:40:36 +00001import io
2import unittest
3import sys
4import xml.sax
5
6from xml.sax.xmlreader import AttributesImpl
7from xml.dom import pulldom
8
Zachary Ware38c707e2015-04-13 15:00:43 -05009from test.support import findfile
Benjamin Peterson863a0c32011-03-02 23:40:36 +000010
11
12tstfile = findfile("test.xml", subdir="xmltestdata")
13
14# A handy XML snippet, containing attributes, a namespace prefix, and a
15# self-closing tag:
16SMALL_SAMPLE = """<?xml version="1.0"?>
17<html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books">
18<!-- A comment -->
19<title>Introduction to XSL</title>
20<hr/>
21<p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p>
22</html>"""
23
24
25class PullDOMTestCase(unittest.TestCase):
26
27 def test_parse(self):
28 """Minimal test of DOMEventStream.parse()"""
29
30 # This just tests that parsing from a stream works. Actual parser
31 # semantics are tested using parseString with a more focused XML
32 # fragment.
33
34 # Test with a filename:
Brett Cannonf30645d2011-03-15 17:22:52 -040035 handler = pulldom.parse(tstfile)
36 self.addCleanup(handler.stream.close)
37 list(handler)
Benjamin Peterson863a0c32011-03-02 23:40:36 +000038
39 # Test with a file object:
40 with open(tstfile, "rb") as fin:
41 list(pulldom.parse(fin))
42
43 def test_parse_semantics(self):
44 """Test DOMEventStream parsing semantics."""
45
46 items = pulldom.parseString(SMALL_SAMPLE)
47 evt, node = next(items)
48 # Just check the node is a Document:
49 self.assertTrue(hasattr(node, "createElement"))
50 self.assertEqual(pulldom.START_DOCUMENT, evt)
51 evt, node = next(items)
52 self.assertEqual(pulldom.START_ELEMENT, evt)
53 self.assertEqual("html", node.tagName)
54 self.assertEqual(2, len(node.attributes))
55 self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value,
56 "http://www.xml.com/books")
57 evt, node = next(items)
58 self.assertEqual(pulldom.CHARACTERS, evt) # Line break
59 evt, node = next(items)
60 # XXX - A comment should be reported here!
61 # self.assertEqual(pulldom.COMMENT, evt)
62 # Line break after swallowed comment:
63 self.assertEqual(pulldom.CHARACTERS, evt)
64 evt, node = next(items)
65 self.assertEqual("title", node.tagName)
66 title_node = node
67 evt, node = next(items)
68 self.assertEqual(pulldom.CHARACTERS, evt)
69 self.assertEqual("Introduction to XSL", node.data)
70 evt, node = next(items)
71 self.assertEqual(pulldom.END_ELEMENT, evt)
72 self.assertEqual("title", node.tagName)
73 self.assertTrue(title_node is node)
74 evt, node = next(items)
75 self.assertEqual(pulldom.CHARACTERS, evt)
76 evt, node = next(items)
77 self.assertEqual(pulldom.START_ELEMENT, evt)
78 self.assertEqual("hr", node.tagName)
79 evt, node = next(items)
80 self.assertEqual(pulldom.END_ELEMENT, evt)
81 self.assertEqual("hr", node.tagName)
82 evt, node = next(items)
83 self.assertEqual(pulldom.CHARACTERS, evt)
84 evt, node = next(items)
85 self.assertEqual(pulldom.START_ELEMENT, evt)
86 self.assertEqual("p", node.tagName)
87 evt, node = next(items)
88 self.assertEqual(pulldom.START_ELEMENT, evt)
89 self.assertEqual("xdc:author", node.tagName)
90 evt, node = next(items)
91 self.assertEqual(pulldom.CHARACTERS, evt)
92 evt, node = next(items)
93 self.assertEqual(pulldom.END_ELEMENT, evt)
94 self.assertEqual("xdc:author", node.tagName)
95 evt, node = next(items)
96 self.assertEqual(pulldom.END_ELEMENT, evt)
97 evt, node = next(items)
98 self.assertEqual(pulldom.CHARACTERS, evt)
99 evt, node = next(items)
100 self.assertEqual(pulldom.END_ELEMENT, evt)
101 # XXX No END_DOCUMENT item is ever obtained:
102 #evt, node = next(items)
103 #self.assertEqual(pulldom.END_DOCUMENT, evt)
104
105 def test_expandItem(self):
106 """Ensure expandItem works as expected."""
107 items = pulldom.parseString(SMALL_SAMPLE)
108 # Loop through the nodes until we get to a "title" start tag:
109 for evt, item in items:
110 if evt == pulldom.START_ELEMENT and item.tagName == "title":
111 items.expandNode(item)
112 self.assertEqual(1, len(item.childNodes))
113 break
114 else:
115 self.fail("No \"title\" element detected in SMALL_SAMPLE!")
116 # Loop until we get to the next start-element:
117 for evt, node in items:
118 if evt == pulldom.START_ELEMENT:
119 break
120 self.assertEqual("hr", node.tagName,
121 "expandNode did not leave DOMEventStream in the correct state.")
122 # Attempt to expand a standalone element:
123 items.expandNode(node)
124 self.assertEqual(next(items)[0], pulldom.CHARACTERS)
125 evt, node = next(items)
126 self.assertEqual(node.tagName, "p")
127 items.expandNode(node)
128 next(items) # Skip character data
129 evt, node = next(items)
130 self.assertEqual(node.tagName, "html")
131 with self.assertRaises(StopIteration):
132 next(items)
133 items.clear()
134 self.assertIsNone(items.parser)
135 self.assertIsNone(items.stream)
136
137 @unittest.expectedFailure
138 def test_comment(self):
139 """PullDOM does not receive "comment" events."""
140 items = pulldom.parseString(SMALL_SAMPLE)
141 for evt, _ in items:
142 if evt == pulldom.COMMENT:
143 break
144 else:
145 self.fail("No comment was encountered")
146
147 @unittest.expectedFailure
148 def test_end_document(self):
149 """PullDOM does not receive "end-document" events."""
150 items = pulldom.parseString(SMALL_SAMPLE)
151 # Read all of the nodes up to and including </html>:
152 for evt, node in items:
153 if evt == pulldom.END_ELEMENT and node.tagName == "html":
154 break
155 try:
156 # Assert that the next node is END_DOCUMENT:
157 evt, node = next(items)
158 self.assertEqual(pulldom.END_DOCUMENT, evt)
159 except StopIteration:
160 self.fail(
161 "Ran out of events, but should have received END_DOCUMENT")
162
163
164class ThoroughTestCase(unittest.TestCase):
165 """Test the hard-to-reach parts of pulldom."""
166
167 def test_thorough_parse(self):
168 """Test some of the hard-to-reach parts of PullDOM."""
169 self._test_thorough(pulldom.parse(None, parser=SAXExerciser()))
170
171 @unittest.expectedFailure
172 def test_sax2dom_fail(self):
173 """SAX2DOM can"t handle a PI before the root element."""
174 pd = SAX2DOMTestHelper(None, SAXExerciser(), 12)
175 self._test_thorough(pd)
176
177 def test_thorough_sax2dom(self):
178 """Test some of the hard-to-reach parts of SAX2DOM."""
179 pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12)
180 self._test_thorough(pd, False)
181
182 def _test_thorough(self, pd, before_root=True):
183 """Test some of the hard-to-reach parts of the parser, using a mock
184 parser."""
185
186 evt, node = next(pd)
187 self.assertEqual(pulldom.START_DOCUMENT, evt)
188 # Just check the node is a Document:
189 self.assertTrue(hasattr(node, "createElement"))
190
191 if before_root:
192 evt, node = next(pd)
193 self.assertEqual(pulldom.COMMENT, evt)
194 self.assertEqual("a comment", node.data)
195 evt, node = next(pd)
196 self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
197 self.assertEqual("target", node.target)
198 self.assertEqual("data", node.data)
199
200 evt, node = next(pd)
201 self.assertEqual(pulldom.START_ELEMENT, evt)
202 self.assertEqual("html", node.tagName)
203
204 evt, node = next(pd)
205 self.assertEqual(pulldom.COMMENT, evt)
206 self.assertEqual("a comment", node.data)
207 evt, node = next(pd)
208 self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
209 self.assertEqual("target", node.target)
210 self.assertEqual("data", node.data)
211
212 evt, node = next(pd)
213 self.assertEqual(pulldom.START_ELEMENT, evt)
214 self.assertEqual("p", node.tagName)
215
216 evt, node = next(pd)
217 self.assertEqual(pulldom.CHARACTERS, evt)
218 self.assertEqual("text", node.data)
219 evt, node = next(pd)
220 self.assertEqual(pulldom.END_ELEMENT, evt)
221 self.assertEqual("p", node.tagName)
222 evt, node = next(pd)
223 self.assertEqual(pulldom.END_ELEMENT, evt)
224 self.assertEqual("html", node.tagName)
225 evt, node = next(pd)
226 self.assertEqual(pulldom.END_DOCUMENT, evt)
227
228
229class SAXExerciser(object):
230 """A fake sax parser that calls some of the harder-to-reach sax methods to
231 ensure it emits the correct events"""
232
233 def setContentHandler(self, handler):
234 self._handler = handler
235
236 def parse(self, _):
237 h = self._handler
238 h.startDocument()
239
240 # The next two items ensure that items preceding the first
241 # start_element are properly stored and emitted:
242 h.comment("a comment")
243 h.processingInstruction("target", "data")
244
245 h.startElement("html", AttributesImpl({}))
246
247 h.comment("a comment")
248 h.processingInstruction("target", "data")
249
250 h.startElement("p", AttributesImpl({"class": "paraclass"}))
251 h.characters("text")
252 h.endElement("p")
253 h.endElement("html")
254 h.endDocument()
255
256 def stub(self, *args, **kwargs):
257 """Stub method. Does nothing."""
258 pass
259 setProperty = stub
260 setFeature = stub
261
262
263class SAX2DOMExerciser(SAXExerciser):
264 """The same as SAXExerciser, but without the processing instruction and
265 comment before the root element, because S2D can"t handle it"""
266
267 def parse(self, _):
268 h = self._handler
269 h.startDocument()
270 h.startElement("html", AttributesImpl({}))
271 h.comment("a comment")
272 h.processingInstruction("target", "data")
273 h.startElement("p", AttributesImpl({"class": "paraclass"}))
274 h.characters("text")
275 h.endElement("p")
276 h.endElement("html")
277 h.endDocument()
278
279
280class SAX2DOMTestHelper(pulldom.DOMEventStream):
281 """Allows us to drive SAX2DOM from a DOMEventStream."""
282
283 def reset(self):
284 self.pulldom = pulldom.SAX2DOM()
285 # This content handler relies on namespace support
286 self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
287 self.parser.setContentHandler(self.pulldom)
288
289
290class SAX2DOMTestCase(unittest.TestCase):
291
292 def confirm(self, test, testname="Test"):
293 self.assertTrue(test, testname)
294
295 def test_basic(self):
296 """Ensure SAX2DOM can parse from a stream."""
297 with io.StringIO(SMALL_SAMPLE) as fin:
298 sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(),
299 len(SMALL_SAMPLE))
300 for evt, node in sd:
301 if evt == pulldom.START_ELEMENT and node.tagName == "html":
302 break
303 # Because the buffer is the same length as the XML, all the
304 # nodes should have been parsed and added:
305 self.assertGreater(len(node.childNodes), 0)
306
307 def testSAX2DOM(self):
308 """Ensure SAX2DOM expands nodes as expected."""
309 sax2dom = pulldom.SAX2DOM()
310 sax2dom.startDocument()
311 sax2dom.startElement("doc", {})
312 sax2dom.characters("text")
313 sax2dom.startElement("subelm", {})
314 sax2dom.characters("text")
315 sax2dom.endElement("subelm")
316 sax2dom.characters("text")
317 sax2dom.endElement("doc")
318 sax2dom.endDocument()
319
320 doc = sax2dom.document
321 root = doc.documentElement
322 (text1, elm1, text2) = root.childNodes
323 text3 = elm1.childNodes[0]
324
325 self.assertIsNone(text1.previousSibling)
326 self.assertIs(text1.nextSibling, elm1)
327 self.assertIs(elm1.previousSibling, text1)
328 self.assertIs(elm1.nextSibling, text2)
329 self.assertIs(text2.previousSibling, elm1)
330 self.assertIsNone(text2.nextSibling)
331 self.assertIsNone(text3.previousSibling)
332 self.assertIsNone(text3.nextSibling)
333
334 self.assertIs(root.parentNode, doc)
335 self.assertIs(text1.parentNode, root)
336 self.assertIs(elm1.parentNode, root)
337 self.assertIs(text2.parentNode, root)
338 self.assertIs(text3.parentNode, elm1)
339 doc.unlink()
340
341
Benjamin Peterson863a0c32011-03-02 23:40:36 +0000342if __name__ == "__main__":
Zachary Ware38c707e2015-04-13 15:00:43 -0500343 unittest.main()