Guido van Rossum | aad6761 | 2000-05-08 17:31:04 +0000 | [diff] [blame] | 1 | # Very simple test - Parse a file and print what happens |
| 2 | |
| 3 | # XXX TypeErrors on calling handlers, or on bad return values from a |
| 4 | # handler, are obscure and unhelpful. |
| 5 | |
| 6 | import sys, string |
| 7 | import os |
| 8 | |
| 9 | import pyexpat |
| 10 | |
| 11 | class Outputter: |
| 12 | def StartElementHandler(self, name, attrs): |
Guido van Rossum | 3e06ab1 | 2000-06-29 19:35:29 +0000 | [diff] [blame^] | 13 | print 'Start element:\n\t', repr(name), attrs |
Guido van Rossum | aad6761 | 2000-05-08 17:31:04 +0000 | [diff] [blame] | 14 | |
| 15 | def EndElementHandler(self, name): |
Guido van Rossum | 3e06ab1 | 2000-06-29 19:35:29 +0000 | [diff] [blame^] | 16 | print 'End element:\n\t', repr(name) |
Guido van Rossum | aad6761 | 2000-05-08 17:31:04 +0000 | [diff] [blame] | 17 | |
| 18 | def CharacterDataHandler(self, data): |
| 19 | data = string.strip(data) |
| 20 | if data: |
| 21 | print 'Character data:' |
| 22 | print '\t', repr(data) |
| 23 | |
| 24 | def ProcessingInstructionHandler(self, target, data): |
Guido van Rossum | 3e06ab1 | 2000-06-29 19:35:29 +0000 | [diff] [blame^] | 25 | print 'PI:\n\t', repr(target), repr(data) |
Guido van Rossum | aad6761 | 2000-05-08 17:31:04 +0000 | [diff] [blame] | 26 | |
| 27 | def StartNamespaceDeclHandler(self, prefix, uri): |
Guido van Rossum | 3e06ab1 | 2000-06-29 19:35:29 +0000 | [diff] [blame^] | 28 | print 'NS decl:\n\t', repr(prefix), repr(uri) |
Guido van Rossum | aad6761 | 2000-05-08 17:31:04 +0000 | [diff] [blame] | 29 | |
| 30 | def EndNamespaceDeclHandler(self, prefix): |
Guido van Rossum | 3e06ab1 | 2000-06-29 19:35:29 +0000 | [diff] [blame^] | 31 | print 'End of NS decl:\n\t', repr(prefix) |
Guido van Rossum | aad6761 | 2000-05-08 17:31:04 +0000 | [diff] [blame] | 32 | |
| 33 | def StartCdataSectionHandler(self): |
| 34 | print 'Start of CDATA section' |
| 35 | |
| 36 | def EndCdataSectionHandler(self): |
| 37 | print 'End of CDATA section' |
| 38 | |
| 39 | def CommentHandler(self, text): |
| 40 | print 'Comment:\n\t', repr(text) |
| 41 | |
| 42 | def NotationDeclHandler(self, *args): |
| 43 | name, base, sysid, pubid = args |
| 44 | print 'Notation declared:', args |
| 45 | |
| 46 | def UnparsedEntityDeclHandler(self, *args): |
| 47 | entityName, base, systemId, publicId, notationName = args |
| 48 | print 'Unparsed entity decl:\n\t', args |
| 49 | |
| 50 | def NotStandaloneHandler(self, userData): |
| 51 | print 'Not standalone' |
| 52 | return 1 |
| 53 | |
Guido van Rossum | 3e06ab1 | 2000-06-29 19:35:29 +0000 | [diff] [blame^] | 54 | def ExternalEntityRefHandler(self, *args): |
| 55 | context, base, sysId, pubId = args |
| 56 | print 'External entity ref:', args |
Guido van Rossum | aad6761 | 2000-05-08 17:31:04 +0000 | [diff] [blame] | 57 | return 1 |
| 58 | |
| 59 | def DefaultHandler(self, userData): |
| 60 | pass |
| 61 | |
| 62 | def DefaultHandlerExpand(self, userData): |
| 63 | pass |
| 64 | |
| 65 | |
| 66 | out = Outputter() |
| 67 | parser = pyexpat.ParserCreate(namespace_separator='!') |
Guido van Rossum | 3e06ab1 | 2000-06-29 19:35:29 +0000 | [diff] [blame^] | 68 | |
| 69 | # Test getting/setting returns_unicode |
| 70 | parser.returns_unicode = 0 ; assert parser.returns_unicode == 0 |
| 71 | parser.returns_unicode = 1 ; assert parser.returns_unicode == 1 |
| 72 | parser.returns_unicode = 2 ; assert parser.returns_unicode == 1 |
| 73 | parser.returns_unicode = 0 ; assert parser.returns_unicode == 0 |
| 74 | |
| 75 | HANDLER_NAMES = ['StartElementHandler', 'EndElementHandler', |
Guido van Rossum | aad6761 | 2000-05-08 17:31:04 +0000 | [diff] [blame] | 76 | 'CharacterDataHandler', 'ProcessingInstructionHandler', |
| 77 | 'UnparsedEntityDeclHandler', 'NotationDeclHandler', |
| 78 | 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler', |
| 79 | 'CommentHandler', 'StartCdataSectionHandler', |
| 80 | 'EndCdataSectionHandler', |
| 81 | 'DefaultHandler', 'DefaultHandlerExpand', |
| 82 | #'NotStandaloneHandler', |
| 83 | 'ExternalEntityRefHandler' |
Guido van Rossum | 3e06ab1 | 2000-06-29 19:35:29 +0000 | [diff] [blame^] | 84 | ] |
| 85 | for name in HANDLER_NAMES: |
Guido van Rossum | aad6761 | 2000-05-08 17:31:04 +0000 | [diff] [blame] | 86 | setattr(parser, name, getattr(out, name) ) |
| 87 | |
| 88 | data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?> |
| 89 | <?xml-stylesheet href="stylesheet.css"?> |
| 90 | <!-- comment data --> |
| 91 | <!DOCTYPE quotations SYSTEM "quotations.dtd" [ |
| 92 | <!ELEMENT root ANY> |
| 93 | <!NOTATION notation SYSTEM "notation.jpeg"> |
| 94 | <!ENTITY acirc "â"> |
| 95 | <!ENTITY external_entity SYSTEM "entity.file"> |
| 96 | <!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation> |
| 97 | %unparsed_entity; |
| 98 | ]> |
| 99 | |
Guido van Rossum | 3e06ab1 | 2000-06-29 19:35:29 +0000 | [diff] [blame^] | 100 | <root attr1="value1" attr2="value2ὀ"> |
Guido van Rossum | aad6761 | 2000-05-08 17:31:04 +0000 | [diff] [blame] | 101 | <myns:subelement xmlns:myns="http://www.python.org/namespace"> |
| 102 | Contents of subelements |
| 103 | </myns:subelement> |
| 104 | <sub2><![CDATA[contents of CDATA section]]></sub2> |
| 105 | &external_entity; |
| 106 | </root> |
| 107 | """ |
| 108 | |
Guido van Rossum | 3e06ab1 | 2000-06-29 19:35:29 +0000 | [diff] [blame^] | 109 | # Produce UTF-8 output |
| 110 | parser.returns_unicode = 0 |
Guido van Rossum | aad6761 | 2000-05-08 17:31:04 +0000 | [diff] [blame] | 111 | try: |
| 112 | parser.Parse(data, 1) |
| 113 | except pyexpat.error: |
| 114 | print '** Error', parser.ErrorCode, pyexpat.ErrorString( parser.ErrorCode) |
| 115 | print '** Line', parser.ErrorLineNumber |
| 116 | print '** Column', parser.ErrorColumnNumber |
| 117 | print '** Byte', parser.ErrorByteIndex |
| 118 | |
Guido van Rossum | 3e06ab1 | 2000-06-29 19:35:29 +0000 | [diff] [blame^] | 119 | # Try the parse again, this time producing Unicode output |
| 120 | parser = pyexpat.ParserCreate(namespace_separator='!') |
| 121 | parser.returns_unicode = 1 |
| 122 | |
| 123 | for name in HANDLER_NAMES: |
| 124 | setattr(parser, name, getattr(out, name) ) |
| 125 | try: |
| 126 | parser.Parse(data, 1) |
| 127 | except pyexpat.error: |
| 128 | print '** Error', parser.ErrorCode, pyexpat.ErrorString( parser.ErrorCode) |
| 129 | print '** Line', parser.ErrorLineNumber |
| 130 | print '** Column', parser.ErrorColumnNumber |
| 131 | print '** Byte', parser.ErrorByteIndex |
| 132 | |
| 133 | # Try parsing a file |
| 134 | parser = pyexpat.ParserCreate(namespace_separator='!') |
| 135 | parser.returns_unicode = 1 |
| 136 | |
| 137 | for name in HANDLER_NAMES: |
| 138 | setattr(parser, name, getattr(out, name) ) |
| 139 | import StringIO |
| 140 | file = StringIO.StringIO(data) |
| 141 | try: |
| 142 | parser.ParseFile(file) |
| 143 | except pyexpat.error: |
| 144 | print '** Error', parser.ErrorCode, pyexpat.ErrorString( parser.ErrorCode) |
| 145 | print '** Line', parser.ErrorLineNumber |
| 146 | print '** Column', parser.ErrorColumnNumber |
| 147 | print '** Byte', parser.ErrorByteIndex |
| 148 | |