blob: 873dd69a8c2522af9435e6ed4f93e6d0a705d3d7 [file] [log] [blame]
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +00001# Very simple test - Parse a file and print what happens
2
3# XXX TypeErrors on calling handlers, or on bad return values from a
4# handler, are obscure and unhelpful.
Fred Drake004d5e62000-10-23 17:22:08 +00005
Fred Drake7fbc85c2000-09-23 04:47:56 +00006from xml.parsers import expat
Fred Drake004d5e62000-10-23 17:22:08 +00007
Tim Peters2f228e72001-05-13 00:19:31 +00008from test_support import sortdict
9
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000010class Outputter:
11 def StartElementHandler(self, name, attrs):
Tim Peters2f228e72001-05-13 00:19:31 +000012 print 'Start element:\n\t', repr(name), sortdict(attrs)
Fred Drake004d5e62000-10-23 17:22:08 +000013
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000014 def EndElementHandler(self, name):
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000015 print 'End element:\n\t', repr(name)
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000016
17 def CharacterDataHandler(self, data):
Fred Drake265a8042000-09-21 20:32:13 +000018 data = data.strip()
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000019 if data:
20 print 'Character data:'
21 print '\t', repr(data)
22
23 def ProcessingInstructionHandler(self, target, data):
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000024 print 'PI:\n\t', repr(target), repr(data)
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000025
26 def StartNamespaceDeclHandler(self, prefix, uri):
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000027 print 'NS decl:\n\t', repr(prefix), repr(uri)
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000028
29 def EndNamespaceDeclHandler(self, prefix):
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000030 print 'End of NS decl:\n\t', repr(prefix)
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000031
32 def StartCdataSectionHandler(self):
Andrew M. Kuchlinge188d522000-04-02 05:15:38 +000033 print 'Start of CDATA section'
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000034
35 def EndCdataSectionHandler(self):
Andrew M. Kuchlinge188d522000-04-02 05:15:38 +000036 print 'End of CDATA section'
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000037
38 def CommentHandler(self, text):
Andrew M. Kuchlinge188d522000-04-02 05:15:38 +000039 print 'Comment:\n\t', repr(text)
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000040
41 def NotationDeclHandler(self, *args):
42 name, base, sysid, pubid = args
Andrew M. Kuchlinge188d522000-04-02 05:15:38 +000043 print 'Notation declared:', args
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000044
45 def UnparsedEntityDeclHandler(self, *args):
46 entityName, base, systemId, publicId, notationName = args
47 print 'Unparsed entity decl:\n\t', args
Fred Drake004d5e62000-10-23 17:22:08 +000048
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000049 def NotStandaloneHandler(self, userData):
50 print 'Not standalone'
51 return 1
Fred Drake004d5e62000-10-23 17:22:08 +000052
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000053 def ExternalEntityRefHandler(self, *args):
54 context, base, sysId, pubId = args
Fred Drake1e0611b2000-12-23 22:12:07 +000055 print 'External entity ref:', args[1:]
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000056 return 1
57
58 def DefaultHandler(self, userData):
59 pass
60
61 def DefaultHandlerExpand(self, userData):
62 pass
63
64
Fred Drake265a8042000-09-21 20:32:13 +000065def confirm(ok):
66 if ok:
67 print "OK."
68 else:
69 print "Not OK."
70
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000071out = Outputter()
Fred Drake7fbc85c2000-09-23 04:47:56 +000072parser = expat.ParserCreate(namespace_separator='!')
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000073
74# Test getting/setting returns_unicode
Fred Drake265a8042000-09-21 20:32:13 +000075parser.returns_unicode = 0; confirm(parser.returns_unicode == 0)
76parser.returns_unicode = 1; confirm(parser.returns_unicode == 1)
77parser.returns_unicode = 2; confirm(parser.returns_unicode == 1)
78parser.returns_unicode = 0; confirm(parser.returns_unicode == 0)
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000079
Fred Drake8f42e2b2001-04-25 16:03:54 +000080# Test getting/setting ordered_attributes
81parser.ordered_attributes = 0; confirm(parser.ordered_attributes == 0)
82parser.ordered_attributes = 1; confirm(parser.ordered_attributes == 1)
83parser.ordered_attributes = 2; confirm(parser.ordered_attributes == 1)
84parser.ordered_attributes = 0; confirm(parser.ordered_attributes == 0)
85
86# Test getting/setting specified_attributes
87parser.specified_attributes = 0; confirm(parser.specified_attributes == 0)
88parser.specified_attributes = 1; confirm(parser.specified_attributes == 1)
89parser.specified_attributes = 2; confirm(parser.specified_attributes == 1)
90parser.specified_attributes = 0; confirm(parser.specified_attributes == 0)
91
Fred Drake265a8042000-09-21 20:32:13 +000092HANDLER_NAMES = [
93 'StartElementHandler', 'EndElementHandler',
94 'CharacterDataHandler', 'ProcessingInstructionHandler',
95 'UnparsedEntityDeclHandler', 'NotationDeclHandler',
96 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler',
97 'CommentHandler', 'StartCdataSectionHandler',
98 'EndCdataSectionHandler',
99 'DefaultHandler', 'DefaultHandlerExpand',
100 #'NotStandaloneHandler',
101 'ExternalEntityRefHandler'
102 ]
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +0000103for name in HANDLER_NAMES:
Fred Drake265a8042000-09-21 20:32:13 +0000104 setattr(parser, name, getattr(out, name))
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +0000105
Fred Drake265a8042000-09-21 20:32:13 +0000106data = '''\
107<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +0000108<?xml-stylesheet href="stylesheet.css"?>
109<!-- comment data -->
110<!DOCTYPE quotations SYSTEM "quotations.dtd" [
111<!ELEMENT root ANY>
112<!NOTATION notation SYSTEM "notation.jpeg">
113<!ENTITY acirc "&#226;">
114<!ENTITY external_entity SYSTEM "entity.file">
115<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
116%unparsed_entity;
117]>
118
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +0000119<root attr1="value1" attr2="value2&#8000;">
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +0000120<myns:subelement xmlns:myns="http://www.python.org/namespace">
121 Contents of subelements
122</myns:subelement>
123<sub2><![CDATA[contents of CDATA section]]></sub2>
124&external_entity;
125</root>
Fred Drake265a8042000-09-21 20:32:13 +0000126'''
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +0000127
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +0000128# Produce UTF-8 output
129parser.returns_unicode = 0
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +0000130try:
131 parser.Parse(data, 1)
Fred Drake7fbc85c2000-09-23 04:47:56 +0000132except expat.error:
133 print '** Error', parser.ErrorCode, expat.ErrorString(parser.ErrorCode)
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +0000134 print '** Line', parser.ErrorLineNumber
135 print '** Column', parser.ErrorColumnNumber
136 print '** Byte', parser.ErrorByteIndex
137
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +0000138# Try the parse again, this time producing Unicode output
Fred Drake7fbc85c2000-09-23 04:47:56 +0000139parser = expat.ParserCreate(namespace_separator='!')
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +0000140parser.returns_unicode = 1
141
142for name in HANDLER_NAMES:
Fred Drake265a8042000-09-21 20:32:13 +0000143 setattr(parser, name, getattr(out, name))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +0000144try:
145 parser.Parse(data, 1)
Fred Drake7fbc85c2000-09-23 04:47:56 +0000146except expat.error:
147 print '** Error', parser.ErrorCode, expat.ErrorString(parser.ErrorCode)
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +0000148 print '** Line', parser.ErrorLineNumber
149 print '** Column', parser.ErrorColumnNumber
150 print '** Byte', parser.ErrorByteIndex
151
152# Try parsing a file
Fred Drake7fbc85c2000-09-23 04:47:56 +0000153parser = expat.ParserCreate(namespace_separator='!')
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +0000154parser.returns_unicode = 1
155
156for name in HANDLER_NAMES:
Fred Drake265a8042000-09-21 20:32:13 +0000157 setattr(parser, name, getattr(out, name))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +0000158import StringIO
159file = StringIO.StringIO(data)
160try:
161 parser.ParseFile(file)
Fred Drake7fbc85c2000-09-23 04:47:56 +0000162except expat.error:
163 print '** Error', parser.ErrorCode, expat.ErrorString(parser.ErrorCode)
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +0000164 print '** Line', parser.ErrorLineNumber
165 print '** Column', parser.ErrorColumnNumber
166 print '** Byte', parser.ErrorByteIndex
Fred Drake1e0611b2000-12-23 22:12:07 +0000167
168
169# Tests that make sure we get errors when the namespace_separator value
170# is illegal, and that we don't for good values:
171print
172print "Testing constructor for proper handling of namespace_separator values:"
173expat.ParserCreate()
174expat.ParserCreate(namespace_separator=None)
175expat.ParserCreate(namespace_separator=' ')
176print "Legal values tested o.k."
177try:
178 expat.ParserCreate(namespace_separator=42)
179except TypeError, e:
180 print "Caught expected TypeError:"
181 print e
182else:
183 print "Failed to catch expected TypeError."
Fred Drake8f42e2b2001-04-25 16:03:54 +0000184
Fred Drake1e0611b2000-12-23 22:12:07 +0000185try:
186 expat.ParserCreate(namespace_separator='too long')
187except ValueError, e:
188 print "Caught expected ValueError:"
189 print e
190else:
191 print "Failed to catch expected ValueError."
Fred Drake8f42e2b2001-04-25 16:03:54 +0000192
193# ParserCreate() needs to accept a namespace_separator of zero length
194# to satisfy the requirements of RDF applications that are required
195# to simply glue together the namespace URI and the localname. Though
196# considered a wart of the RDF specifications, it needs to be supported.
197#
198# See XML-SIG mailing list thread starting with
199# http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
200#
201expat.ParserCreate(namespace_separator='') # too short