blob: f454098c65b1e98ccabea814fa988a1d4387c71c [file] [log] [blame]
Benjamin Peterson863a0c32011-03-02 23:40:36 +00001import io
2import unittest
Benjamin Peterson863a0c32011-03-02 23:40:36 +00003import xml.sax
4
5from xml.sax.xmlreader import AttributesImpl
6from xml.dom import pulldom
7
Zachary Ware38c707e2015-04-13 15:00:43 -05008from test.support import findfile
Benjamin Peterson863a0c32011-03-02 23:40:36 +00009
10
11tstfile = findfile("test.xml", subdir="xmltestdata")
12
13# A handy XML snippet, containing attributes, a namespace prefix, and a
14# self-closing tag:
15SMALL_SAMPLE = """<?xml version="1.0"?>
16<html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books">
17<!-- A comment -->
18<title>Introduction to XSL</title>
19<hr/>
20<p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p>
21</html>"""
22
23
24class PullDOMTestCase(unittest.TestCase):
25
26 def test_parse(self):
27 """Minimal test of DOMEventStream.parse()"""
28
29 # This just tests that parsing from a stream works. Actual parser
30 # semantics are tested using parseString with a more focused XML
31 # fragment.
32
33 # Test with a filename:
Brett Cannonf30645d2011-03-15 17:22:52 -040034 handler = pulldom.parse(tstfile)
35 self.addCleanup(handler.stream.close)
36 list(handler)
Benjamin Peterson863a0c32011-03-02 23:40:36 +000037
38 # Test with a file object:
39 with open(tstfile, "rb") as fin:
40 list(pulldom.parse(fin))
41
42 def test_parse_semantics(self):
43 """Test DOMEventStream parsing semantics."""
44
45 items = pulldom.parseString(SMALL_SAMPLE)
46 evt, node = next(items)
47 # Just check the node is a Document:
48 self.assertTrue(hasattr(node, "createElement"))
49 self.assertEqual(pulldom.START_DOCUMENT, evt)
50 evt, node = next(items)
51 self.assertEqual(pulldom.START_ELEMENT, evt)
52 self.assertEqual("html", node.tagName)
53 self.assertEqual(2, len(node.attributes))
54 self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value,
55 "http://www.xml.com/books")
56 evt, node = next(items)
57 self.assertEqual(pulldom.CHARACTERS, evt) # Line break
58 evt, node = next(items)
59 # XXX - A comment should be reported here!
60 # self.assertEqual(pulldom.COMMENT, evt)
61 # Line break after swallowed comment:
62 self.assertEqual(pulldom.CHARACTERS, evt)
63 evt, node = next(items)
64 self.assertEqual("title", node.tagName)
65 title_node = node
66 evt, node = next(items)
67 self.assertEqual(pulldom.CHARACTERS, evt)
68 self.assertEqual("Introduction to XSL", node.data)
69 evt, node = next(items)
70 self.assertEqual(pulldom.END_ELEMENT, evt)
71 self.assertEqual("title", node.tagName)
72 self.assertTrue(title_node is node)
73 evt, node = next(items)
74 self.assertEqual(pulldom.CHARACTERS, evt)
75 evt, node = next(items)
76 self.assertEqual(pulldom.START_ELEMENT, evt)
77 self.assertEqual("hr", node.tagName)
78 evt, node = next(items)
79 self.assertEqual(pulldom.END_ELEMENT, evt)
80 self.assertEqual("hr", node.tagName)
81 evt, node = next(items)
82 self.assertEqual(pulldom.CHARACTERS, evt)
83 evt, node = next(items)
84 self.assertEqual(pulldom.START_ELEMENT, evt)
85 self.assertEqual("p", node.tagName)
86 evt, node = next(items)
87 self.assertEqual(pulldom.START_ELEMENT, evt)
88 self.assertEqual("xdc:author", node.tagName)
89 evt, node = next(items)
90 self.assertEqual(pulldom.CHARACTERS, evt)
91 evt, node = next(items)
92 self.assertEqual(pulldom.END_ELEMENT, evt)
93 self.assertEqual("xdc:author", node.tagName)
94 evt, node = next(items)
95 self.assertEqual(pulldom.END_ELEMENT, evt)
96 evt, node = next(items)
97 self.assertEqual(pulldom.CHARACTERS, evt)
98 evt, node = next(items)
99 self.assertEqual(pulldom.END_ELEMENT, evt)
100 # XXX No END_DOCUMENT item is ever obtained:
101 #evt, node = next(items)
102 #self.assertEqual(pulldom.END_DOCUMENT, evt)
103
104 def test_expandItem(self):
105 """Ensure expandItem works as expected."""
106 items = pulldom.parseString(SMALL_SAMPLE)
107 # Loop through the nodes until we get to a "title" start tag:
108 for evt, item in items:
109 if evt == pulldom.START_ELEMENT and item.tagName == "title":
110 items.expandNode(item)
111 self.assertEqual(1, len(item.childNodes))
112 break
113 else:
114 self.fail("No \"title\" element detected in SMALL_SAMPLE!")
115 # Loop until we get to the next start-element:
116 for evt, node in items:
117 if evt == pulldom.START_ELEMENT:
118 break
119 self.assertEqual("hr", node.tagName,
120 "expandNode did not leave DOMEventStream in the correct state.")
121 # Attempt to expand a standalone element:
122 items.expandNode(node)
123 self.assertEqual(next(items)[0], pulldom.CHARACTERS)
124 evt, node = next(items)
125 self.assertEqual(node.tagName, "p")
126 items.expandNode(node)
127 next(items) # Skip character data
128 evt, node = next(items)
129 self.assertEqual(node.tagName, "html")
130 with self.assertRaises(StopIteration):
131 next(items)
132 items.clear()
133 self.assertIsNone(items.parser)
134 self.assertIsNone(items.stream)
135
136 @unittest.expectedFailure
137 def test_comment(self):
138 """PullDOM does not receive "comment" events."""
139 items = pulldom.parseString(SMALL_SAMPLE)
140 for evt, _ in items:
141 if evt == pulldom.COMMENT:
142 break
143 else:
144 self.fail("No comment was encountered")
145
146 @unittest.expectedFailure
147 def test_end_document(self):
148 """PullDOM does not receive "end-document" events."""
149 items = pulldom.parseString(SMALL_SAMPLE)
150 # Read all of the nodes up to and including </html>:
151 for evt, node in items:
152 if evt == pulldom.END_ELEMENT and node.tagName == "html":
153 break
154 try:
155 # Assert that the next node is END_DOCUMENT:
156 evt, node = next(items)
157 self.assertEqual(pulldom.END_DOCUMENT, evt)
158 except StopIteration:
159 self.fail(
160 "Ran out of events, but should have received END_DOCUMENT")
161
Berker Peksag84a13fb2018-08-11 09:05:04 +0300162 def test_getitem_deprecation(self):
163 parser = pulldom.parseString(SMALL_SAMPLE)
164 with self.assertWarnsRegex(DeprecationWarning,
165 r'Use iterator protocol instead'):
166 # This should have returned 'END_ELEMENT'.
167 self.assertEqual(parser[-1][0], pulldom.START_DOCUMENT)
168
Benjamin Peterson863a0c32011-03-02 23:40:36 +0000169
170class ThoroughTestCase(unittest.TestCase):
171 """Test the hard-to-reach parts of pulldom."""
172
173 def test_thorough_parse(self):
174 """Test some of the hard-to-reach parts of PullDOM."""
175 self._test_thorough(pulldom.parse(None, parser=SAXExerciser()))
176
177 @unittest.expectedFailure
178 def test_sax2dom_fail(self):
179 """SAX2DOM can"t handle a PI before the root element."""
180 pd = SAX2DOMTestHelper(None, SAXExerciser(), 12)
181 self._test_thorough(pd)
182
183 def test_thorough_sax2dom(self):
184 """Test some of the hard-to-reach parts of SAX2DOM."""
185 pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12)
186 self._test_thorough(pd, False)
187
188 def _test_thorough(self, pd, before_root=True):
189 """Test some of the hard-to-reach parts of the parser, using a mock
190 parser."""
191
192 evt, node = next(pd)
193 self.assertEqual(pulldom.START_DOCUMENT, evt)
194 # Just check the node is a Document:
195 self.assertTrue(hasattr(node, "createElement"))
196
197 if before_root:
198 evt, node = next(pd)
199 self.assertEqual(pulldom.COMMENT, evt)
200 self.assertEqual("a comment", node.data)
201 evt, node = next(pd)
202 self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
203 self.assertEqual("target", node.target)
204 self.assertEqual("data", node.data)
205
206 evt, node = next(pd)
207 self.assertEqual(pulldom.START_ELEMENT, evt)
208 self.assertEqual("html", node.tagName)
209
210 evt, node = next(pd)
211 self.assertEqual(pulldom.COMMENT, evt)
212 self.assertEqual("a comment", node.data)
213 evt, node = next(pd)
214 self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
215 self.assertEqual("target", node.target)
216 self.assertEqual("data", node.data)
217
218 evt, node = next(pd)
219 self.assertEqual(pulldom.START_ELEMENT, evt)
220 self.assertEqual("p", node.tagName)
221
222 evt, node = next(pd)
223 self.assertEqual(pulldom.CHARACTERS, evt)
224 self.assertEqual("text", node.data)
225 evt, node = next(pd)
226 self.assertEqual(pulldom.END_ELEMENT, evt)
227 self.assertEqual("p", node.tagName)
228 evt, node = next(pd)
229 self.assertEqual(pulldom.END_ELEMENT, evt)
230 self.assertEqual("html", node.tagName)
231 evt, node = next(pd)
232 self.assertEqual(pulldom.END_DOCUMENT, evt)
233
234
235class SAXExerciser(object):
236 """A fake sax parser that calls some of the harder-to-reach sax methods to
237 ensure it emits the correct events"""
238
239 def setContentHandler(self, handler):
240 self._handler = handler
241
242 def parse(self, _):
243 h = self._handler
244 h.startDocument()
245
246 # The next two items ensure that items preceding the first
247 # start_element are properly stored and emitted:
248 h.comment("a comment")
249 h.processingInstruction("target", "data")
250
251 h.startElement("html", AttributesImpl({}))
252
253 h.comment("a comment")
254 h.processingInstruction("target", "data")
255
256 h.startElement("p", AttributesImpl({"class": "paraclass"}))
257 h.characters("text")
258 h.endElement("p")
259 h.endElement("html")
260 h.endDocument()
261
262 def stub(self, *args, **kwargs):
263 """Stub method. Does nothing."""
264 pass
265 setProperty = stub
266 setFeature = stub
267
268
269class SAX2DOMExerciser(SAXExerciser):
270 """The same as SAXExerciser, but without the processing instruction and
271 comment before the root element, because S2D can"t handle it"""
272
273 def parse(self, _):
274 h = self._handler
275 h.startDocument()
276 h.startElement("html", AttributesImpl({}))
277 h.comment("a comment")
278 h.processingInstruction("target", "data")
279 h.startElement("p", AttributesImpl({"class": "paraclass"}))
280 h.characters("text")
281 h.endElement("p")
282 h.endElement("html")
283 h.endDocument()
284
285
286class SAX2DOMTestHelper(pulldom.DOMEventStream):
287 """Allows us to drive SAX2DOM from a DOMEventStream."""
288
289 def reset(self):
290 self.pulldom = pulldom.SAX2DOM()
291 # This content handler relies on namespace support
292 self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
293 self.parser.setContentHandler(self.pulldom)
294
295
296class SAX2DOMTestCase(unittest.TestCase):
297
298 def confirm(self, test, testname="Test"):
299 self.assertTrue(test, testname)
300
301 def test_basic(self):
302 """Ensure SAX2DOM can parse from a stream."""
303 with io.StringIO(SMALL_SAMPLE) as fin:
304 sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(),
305 len(SMALL_SAMPLE))
306 for evt, node in sd:
307 if evt == pulldom.START_ELEMENT and node.tagName == "html":
308 break
309 # Because the buffer is the same length as the XML, all the
310 # nodes should have been parsed and added:
311 self.assertGreater(len(node.childNodes), 0)
312
313 def testSAX2DOM(self):
314 """Ensure SAX2DOM expands nodes as expected."""
315 sax2dom = pulldom.SAX2DOM()
316 sax2dom.startDocument()
317 sax2dom.startElement("doc", {})
318 sax2dom.characters("text")
319 sax2dom.startElement("subelm", {})
320 sax2dom.characters("text")
321 sax2dom.endElement("subelm")
322 sax2dom.characters("text")
323 sax2dom.endElement("doc")
324 sax2dom.endDocument()
325
326 doc = sax2dom.document
327 root = doc.documentElement
328 (text1, elm1, text2) = root.childNodes
329 text3 = elm1.childNodes[0]
330
331 self.assertIsNone(text1.previousSibling)
332 self.assertIs(text1.nextSibling, elm1)
333 self.assertIs(elm1.previousSibling, text1)
334 self.assertIs(elm1.nextSibling, text2)
335 self.assertIs(text2.previousSibling, elm1)
336 self.assertIsNone(text2.nextSibling)
337 self.assertIsNone(text3.previousSibling)
338 self.assertIsNone(text3.nextSibling)
339
340 self.assertIs(root.parentNode, doc)
341 self.assertIs(text1.parentNode, root)
342 self.assertIs(elm1.parentNode, root)
343 self.assertIs(text2.parentNode, root)
344 self.assertIs(text3.parentNode, elm1)
345 doc.unlink()
346
347
Benjamin Peterson863a0c32011-03-02 23:40:36 +0000348if __name__ == "__main__":
Zachary Ware38c707e2015-04-13 15:00:43 -0500349 unittest.main()