Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 1 | # Very simple test - Parse a file and print what happens |
| 2 | |
| 3 | # XXX TypeErrors on calling handlers, or on bad return values from a |
| 4 | # handler, are obscure and unhelpful. |
Fred Drake | 004d5e6 | 2000-10-23 17:22:08 +0000 | [diff] [blame] | 5 | |
Fred Drake | 7fbc85c | 2000-09-23 04:47:56 +0000 | [diff] [blame] | 6 | from xml.parsers import expat |
Fred Drake | 004d5e6 | 2000-10-23 17:22:08 +0000 | [diff] [blame] | 7 | |
Tim Peters | 2f228e7 | 2001-05-13 00:19:31 +0000 | [diff] [blame] | 8 | from test_support import sortdict |
| 9 | |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 10 | class Outputter: |
| 11 | def StartElementHandler(self, name, attrs): |
Tim Peters | 2f228e7 | 2001-05-13 00:19:31 +0000 | [diff] [blame] | 12 | print 'Start element:\n\t', repr(name), sortdict(attrs) |
Fred Drake | 004d5e6 | 2000-10-23 17:22:08 +0000 | [diff] [blame] | 13 | |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 14 | def EndElementHandler(self, name): |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 15 | print 'End element:\n\t', repr(name) |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 16 | |
| 17 | def CharacterDataHandler(self, data): |
Fred Drake | 265a804 | 2000-09-21 20:32:13 +0000 | [diff] [blame] | 18 | data = data.strip() |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 19 | if data: |
| 20 | print 'Character data:' |
| 21 | print '\t', repr(data) |
| 22 | |
| 23 | def ProcessingInstructionHandler(self, target, data): |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 24 | print 'PI:\n\t', repr(target), repr(data) |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 25 | |
| 26 | def StartNamespaceDeclHandler(self, prefix, uri): |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 27 | print 'NS decl:\n\t', repr(prefix), repr(uri) |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 28 | |
| 29 | def EndNamespaceDeclHandler(self, prefix): |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 30 | print 'End of NS decl:\n\t', repr(prefix) |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 31 | |
| 32 | def StartCdataSectionHandler(self): |
Andrew M. Kuchling | e188d52 | 2000-04-02 05:15:38 +0000 | [diff] [blame] | 33 | print 'Start of CDATA section' |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 34 | |
| 35 | def EndCdataSectionHandler(self): |
Andrew M. Kuchling | e188d52 | 2000-04-02 05:15:38 +0000 | [diff] [blame] | 36 | print 'End of CDATA section' |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 37 | |
| 38 | def CommentHandler(self, text): |
Andrew M. Kuchling | e188d52 | 2000-04-02 05:15:38 +0000 | [diff] [blame] | 39 | print 'Comment:\n\t', repr(text) |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 40 | |
| 41 | def NotationDeclHandler(self, *args): |
| 42 | name, base, sysid, pubid = args |
Andrew M. Kuchling | e188d52 | 2000-04-02 05:15:38 +0000 | [diff] [blame] | 43 | print 'Notation declared:', args |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 44 | |
| 45 | def UnparsedEntityDeclHandler(self, *args): |
| 46 | entityName, base, systemId, publicId, notationName = args |
| 47 | print 'Unparsed entity decl:\n\t', args |
Fred Drake | 004d5e6 | 2000-10-23 17:22:08 +0000 | [diff] [blame] | 48 | |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 49 | def NotStandaloneHandler(self, userData): |
| 50 | print 'Not standalone' |
| 51 | return 1 |
Fred Drake | 004d5e6 | 2000-10-23 17:22:08 +0000 | [diff] [blame] | 52 | |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 53 | def ExternalEntityRefHandler(self, *args): |
| 54 | context, base, sysId, pubId = args |
Fred Drake | 1e0611b | 2000-12-23 22:12:07 +0000 | [diff] [blame] | 55 | print 'External entity ref:', args[1:] |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 56 | return 1 |
| 57 | |
| 58 | def DefaultHandler(self, userData): |
| 59 | pass |
| 60 | |
| 61 | def DefaultHandlerExpand(self, userData): |
| 62 | pass |
| 63 | |
| 64 | |
Fred Drake | 265a804 | 2000-09-21 20:32:13 +0000 | [diff] [blame] | 65 | def confirm(ok): |
| 66 | if ok: |
| 67 | print "OK." |
| 68 | else: |
| 69 | print "Not OK." |
| 70 | |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 71 | out = Outputter() |
Fred Drake | 7fbc85c | 2000-09-23 04:47:56 +0000 | [diff] [blame] | 72 | parser = expat.ParserCreate(namespace_separator='!') |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 73 | |
| 74 | # Test getting/setting returns_unicode |
Fred Drake | 265a804 | 2000-09-21 20:32:13 +0000 | [diff] [blame] | 75 | parser.returns_unicode = 0; confirm(parser.returns_unicode == 0) |
| 76 | parser.returns_unicode = 1; confirm(parser.returns_unicode == 1) |
| 77 | parser.returns_unicode = 2; confirm(parser.returns_unicode == 1) |
| 78 | parser.returns_unicode = 0; confirm(parser.returns_unicode == 0) |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 79 | |
Fred Drake | 8f42e2b | 2001-04-25 16:03:54 +0000 | [diff] [blame] | 80 | # Test getting/setting ordered_attributes |
| 81 | parser.ordered_attributes = 0; confirm(parser.ordered_attributes == 0) |
| 82 | parser.ordered_attributes = 1; confirm(parser.ordered_attributes == 1) |
| 83 | parser.ordered_attributes = 2; confirm(parser.ordered_attributes == 1) |
| 84 | parser.ordered_attributes = 0; confirm(parser.ordered_attributes == 0) |
| 85 | |
| 86 | # Test getting/setting specified_attributes |
| 87 | parser.specified_attributes = 0; confirm(parser.specified_attributes == 0) |
| 88 | parser.specified_attributes = 1; confirm(parser.specified_attributes == 1) |
| 89 | parser.specified_attributes = 2; confirm(parser.specified_attributes == 1) |
| 90 | parser.specified_attributes = 0; confirm(parser.specified_attributes == 0) |
| 91 | |
Fred Drake | 265a804 | 2000-09-21 20:32:13 +0000 | [diff] [blame] | 92 | HANDLER_NAMES = [ |
| 93 | 'StartElementHandler', 'EndElementHandler', |
| 94 | 'CharacterDataHandler', 'ProcessingInstructionHandler', |
| 95 | 'UnparsedEntityDeclHandler', 'NotationDeclHandler', |
| 96 | 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler', |
| 97 | 'CommentHandler', 'StartCdataSectionHandler', |
| 98 | 'EndCdataSectionHandler', |
| 99 | 'DefaultHandler', 'DefaultHandlerExpand', |
| 100 | #'NotStandaloneHandler', |
| 101 | 'ExternalEntityRefHandler' |
| 102 | ] |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 103 | for name in HANDLER_NAMES: |
Fred Drake | 265a804 | 2000-09-21 20:32:13 +0000 | [diff] [blame] | 104 | setattr(parser, name, getattr(out, name)) |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 105 | |
Fred Drake | 265a804 | 2000-09-21 20:32:13 +0000 | [diff] [blame] | 106 | data = '''\ |
| 107 | <?xml version="1.0" encoding="iso-8859-1" standalone="no"?> |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 108 | <?xml-stylesheet href="stylesheet.css"?> |
| 109 | <!-- comment data --> |
| 110 | <!DOCTYPE quotations SYSTEM "quotations.dtd" [ |
| 111 | <!ELEMENT root ANY> |
| 112 | <!NOTATION notation SYSTEM "notation.jpeg"> |
| 113 | <!ENTITY acirc "â"> |
| 114 | <!ENTITY external_entity SYSTEM "entity.file"> |
| 115 | <!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation> |
| 116 | %unparsed_entity; |
| 117 | ]> |
| 118 | |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 119 | <root attr1="value1" attr2="value2ὀ"> |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 120 | <myns:subelement xmlns:myns="http://www.python.org/namespace"> |
| 121 | Contents of subelements |
| 122 | </myns:subelement> |
| 123 | <sub2><![CDATA[contents of CDATA section]]></sub2> |
| 124 | &external_entity; |
| 125 | </root> |
Fred Drake | 265a804 | 2000-09-21 20:32:13 +0000 | [diff] [blame] | 126 | ''' |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 127 | |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 128 | # Produce UTF-8 output |
| 129 | parser.returns_unicode = 0 |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 130 | try: |
| 131 | parser.Parse(data, 1) |
Fred Drake | 7fbc85c | 2000-09-23 04:47:56 +0000 | [diff] [blame] | 132 | except expat.error: |
| 133 | print '** Error', parser.ErrorCode, expat.ErrorString(parser.ErrorCode) |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 134 | print '** Line', parser.ErrorLineNumber |
| 135 | print '** Column', parser.ErrorColumnNumber |
| 136 | print '** Byte', parser.ErrorByteIndex |
| 137 | |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 138 | # Try the parse again, this time producing Unicode output |
Fred Drake | 7fbc85c | 2000-09-23 04:47:56 +0000 | [diff] [blame] | 139 | parser = expat.ParserCreate(namespace_separator='!') |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 140 | parser.returns_unicode = 1 |
| 141 | |
| 142 | for name in HANDLER_NAMES: |
Fred Drake | 265a804 | 2000-09-21 20:32:13 +0000 | [diff] [blame] | 143 | setattr(parser, name, getattr(out, name)) |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 144 | try: |
| 145 | parser.Parse(data, 1) |
Fred Drake | 7fbc85c | 2000-09-23 04:47:56 +0000 | [diff] [blame] | 146 | except expat.error: |
| 147 | print '** Error', parser.ErrorCode, expat.ErrorString(parser.ErrorCode) |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 148 | print '** Line', parser.ErrorLineNumber |
| 149 | print '** Column', parser.ErrorColumnNumber |
| 150 | print '** Byte', parser.ErrorByteIndex |
| 151 | |
| 152 | # Try parsing a file |
Fred Drake | 7fbc85c | 2000-09-23 04:47:56 +0000 | [diff] [blame] | 153 | parser = expat.ParserCreate(namespace_separator='!') |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 154 | parser.returns_unicode = 1 |
| 155 | |
| 156 | for name in HANDLER_NAMES: |
Fred Drake | 265a804 | 2000-09-21 20:32:13 +0000 | [diff] [blame] | 157 | setattr(parser, name, getattr(out, name)) |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 158 | import StringIO |
| 159 | file = StringIO.StringIO(data) |
| 160 | try: |
| 161 | parser.ParseFile(file) |
Fred Drake | 7fbc85c | 2000-09-23 04:47:56 +0000 | [diff] [blame] | 162 | except expat.error: |
| 163 | print '** Error', parser.ErrorCode, expat.ErrorString(parser.ErrorCode) |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 164 | print '** Line', parser.ErrorLineNumber |
| 165 | print '** Column', parser.ErrorColumnNumber |
| 166 | print '** Byte', parser.ErrorByteIndex |
Fred Drake | 1e0611b | 2000-12-23 22:12:07 +0000 | [diff] [blame] | 167 | |
| 168 | |
| 169 | # Tests that make sure we get errors when the namespace_separator value |
| 170 | # is illegal, and that we don't for good values: |
| 171 | print |
| 172 | print "Testing constructor for proper handling of namespace_separator values:" |
| 173 | expat.ParserCreate() |
| 174 | expat.ParserCreate(namespace_separator=None) |
| 175 | expat.ParserCreate(namespace_separator=' ') |
| 176 | print "Legal values tested o.k." |
| 177 | try: |
| 178 | expat.ParserCreate(namespace_separator=42) |
| 179 | except TypeError, e: |
| 180 | print "Caught expected TypeError:" |
| 181 | print e |
| 182 | else: |
| 183 | print "Failed to catch expected TypeError." |
Fred Drake | 8f42e2b | 2001-04-25 16:03:54 +0000 | [diff] [blame] | 184 | |
Fred Drake | 1e0611b | 2000-12-23 22:12:07 +0000 | [diff] [blame] | 185 | try: |
| 186 | expat.ParserCreate(namespace_separator='too long') |
| 187 | except ValueError, e: |
| 188 | print "Caught expected ValueError:" |
| 189 | print e |
| 190 | else: |
| 191 | print "Failed to catch expected ValueError." |
Fred Drake | 8f42e2b | 2001-04-25 16:03:54 +0000 | [diff] [blame] | 192 | |
| 193 | # ParserCreate() needs to accept a namespace_separator of zero length |
| 194 | # to satisfy the requirements of RDF applications that are required |
| 195 | # to simply glue together the namespace URI and the localname. Though |
| 196 | # considered a wart of the RDF specifications, it needs to be supported. |
| 197 | # |
| 198 | # See XML-SIG mailing list thread starting with |
| 199 | # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html |
| 200 | # |
| 201 | expat.ParserCreate(namespace_separator='') # too short |