blob: d6bd84b30f607bdb72c2a12b01977a37b5b4da11 [file] [log] [blame]
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +00001# Very simple test - Parse a file and print what happens
2
3# XXX TypeErrors on calling handlers, or on bad return values from a
4# handler, are obscure and unhelpful.
5
6import sys, string
7import os
8
9import pyexpat
Andrew M. Kuchlinge188d522000-04-02 05:15:38 +000010
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000011class Outputter:
12 def StartElementHandler(self, name, attrs):
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000013 print 'Start element:\n\t', repr(name), attrs
Andrew M. Kuchlinge188d522000-04-02 05:15:38 +000014
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000015 def EndElementHandler(self, name):
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000016 print 'End element:\n\t', repr(name)
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000017
18 def CharacterDataHandler(self, data):
19 data = string.strip(data)
20 if data:
21 print 'Character data:'
22 print '\t', repr(data)
23
24 def ProcessingInstructionHandler(self, target, data):
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000025 print 'PI:\n\t', repr(target), repr(data)
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000026
27 def StartNamespaceDeclHandler(self, prefix, uri):
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000028 print 'NS decl:\n\t', repr(prefix), repr(uri)
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000029
30 def EndNamespaceDeclHandler(self, prefix):
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000031 print 'End of NS decl:\n\t', repr(prefix)
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000032
33 def StartCdataSectionHandler(self):
Andrew M. Kuchlinge188d522000-04-02 05:15:38 +000034 print 'Start of CDATA section'
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000035
36 def EndCdataSectionHandler(self):
Andrew M. Kuchlinge188d522000-04-02 05:15:38 +000037 print 'End of CDATA section'
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000038
39 def CommentHandler(self, text):
Andrew M. Kuchlinge188d522000-04-02 05:15:38 +000040 print 'Comment:\n\t', repr(text)
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000041
42 def NotationDeclHandler(self, *args):
43 name, base, sysid, pubid = args
Andrew M. Kuchlinge188d522000-04-02 05:15:38 +000044 print 'Notation declared:', args
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000045
46 def UnparsedEntityDeclHandler(self, *args):
47 entityName, base, systemId, publicId, notationName = args
48 print 'Unparsed entity decl:\n\t', args
49
50 def NotStandaloneHandler(self, userData):
51 print 'Not standalone'
52 return 1
53
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000054 def ExternalEntityRefHandler(self, *args):
55 context, base, sysId, pubId = args
56 print 'External entity ref:', args
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000057 return 1
58
59 def DefaultHandler(self, userData):
60 pass
61
62 def DefaultHandlerExpand(self, userData):
63 pass
64
65
66out = Outputter()
67parser = pyexpat.ParserCreate(namespace_separator='!')
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000068
69# Test getting/setting returns_unicode
70parser.returns_unicode = 0 ; assert parser.returns_unicode == 0
71parser.returns_unicode = 1 ; assert parser.returns_unicode == 1
72parser.returns_unicode = 2 ; assert parser.returns_unicode == 1
73parser.returns_unicode = 0 ; assert parser.returns_unicode == 0
74
75HANDLER_NAMES = ['StartElementHandler', 'EndElementHandler',
Andrew M. Kuchlinge188d522000-04-02 05:15:38 +000076 'CharacterDataHandler', 'ProcessingInstructionHandler',
77 'UnparsedEntityDeclHandler', 'NotationDeclHandler',
78 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler',
79 'CommentHandler', 'StartCdataSectionHandler',
80 'EndCdataSectionHandler',
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000081 'DefaultHandler', 'DefaultHandlerExpand',
82 #'NotStandaloneHandler',
Andrew M. Kuchlinge188d522000-04-02 05:15:38 +000083 'ExternalEntityRefHandler'
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000084 ]
85for name in HANDLER_NAMES:
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000086 setattr(parser, name, getattr(out, name) )
87
88data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
89<?xml-stylesheet href="stylesheet.css"?>
90<!-- comment data -->
91<!DOCTYPE quotations SYSTEM "quotations.dtd" [
92<!ELEMENT root ANY>
93<!NOTATION notation SYSTEM "notation.jpeg">
94<!ENTITY acirc "&#226;">
95<!ENTITY external_entity SYSTEM "entity.file">
96<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
97%unparsed_entity;
98]>
99
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +0000100<root attr1="value1" attr2="value2&#8000;">
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +0000101<myns:subelement xmlns:myns="http://www.python.org/namespace">
102 Contents of subelements
103</myns:subelement>
104<sub2><![CDATA[contents of CDATA section]]></sub2>
105&external_entity;
106</root>
107"""
108
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +0000109# Produce UTF-8 output
110parser.returns_unicode = 0
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +0000111try:
112 parser.Parse(data, 1)
113except pyexpat.error:
114 print '** Error', parser.ErrorCode, pyexpat.ErrorString( parser.ErrorCode)
115 print '** Line', parser.ErrorLineNumber
116 print '** Column', parser.ErrorColumnNumber
117 print '** Byte', parser.ErrorByteIndex
118
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +0000119# Try the parse again, this time producing Unicode output
120parser = pyexpat.ParserCreate(namespace_separator='!')
121parser.returns_unicode = 1
122
123for name in HANDLER_NAMES:
124 setattr(parser, name, getattr(out, name) )
125try:
126 parser.Parse(data, 1)
127except pyexpat.error:
128 print '** Error', parser.ErrorCode, pyexpat.ErrorString( parser.ErrorCode)
129 print '** Line', parser.ErrorLineNumber
130 print '** Column', parser.ErrorColumnNumber
131 print '** Byte', parser.ErrorByteIndex
132
133# Try parsing a file
134parser = pyexpat.ParserCreate(namespace_separator='!')
135parser.returns_unicode = 1
136
137for name in HANDLER_NAMES:
138 setattr(parser, name, getattr(out, name) )
139import StringIO
140file = StringIO.StringIO(data)
141try:
142 parser.ParseFile(file)
143except pyexpat.error:
144 print '** Error', parser.ErrorCode, pyexpat.ErrorString( parser.ErrorCode)
145 print '** Line', parser.ErrorLineNumber
146 print '** Column', parser.ErrorColumnNumber
147 print '** Byte', parser.ErrorByteIndex
148