blob: d6bd84b30f607bdb72c2a12b01977a37b5b4da11 [file] [log] [blame]
Guido van Rossumaad67612000-05-08 17:31:04 +00001# Very simple test - Parse a file and print what happens
2
3# XXX TypeErrors on calling handlers, or on bad return values from a
4# handler, are obscure and unhelpful.
5
6import sys, string
7import os
8
9import pyexpat
10
11class Outputter:
12 def StartElementHandler(self, name, attrs):
Guido van Rossum3e06ab12000-06-29 19:35:29 +000013 print 'Start element:\n\t', repr(name), attrs
Guido van Rossumaad67612000-05-08 17:31:04 +000014
15 def EndElementHandler(self, name):
Guido van Rossum3e06ab12000-06-29 19:35:29 +000016 print 'End element:\n\t', repr(name)
Guido van Rossumaad67612000-05-08 17:31:04 +000017
18 def CharacterDataHandler(self, data):
19 data = string.strip(data)
20 if data:
21 print 'Character data:'
22 print '\t', repr(data)
23
24 def ProcessingInstructionHandler(self, target, data):
Guido van Rossum3e06ab12000-06-29 19:35:29 +000025 print 'PI:\n\t', repr(target), repr(data)
Guido van Rossumaad67612000-05-08 17:31:04 +000026
27 def StartNamespaceDeclHandler(self, prefix, uri):
Guido van Rossum3e06ab12000-06-29 19:35:29 +000028 print 'NS decl:\n\t', repr(prefix), repr(uri)
Guido van Rossumaad67612000-05-08 17:31:04 +000029
30 def EndNamespaceDeclHandler(self, prefix):
Guido van Rossum3e06ab12000-06-29 19:35:29 +000031 print 'End of NS decl:\n\t', repr(prefix)
Guido van Rossumaad67612000-05-08 17:31:04 +000032
33 def StartCdataSectionHandler(self):
34 print 'Start of CDATA section'
35
36 def EndCdataSectionHandler(self):
37 print 'End of CDATA section'
38
39 def CommentHandler(self, text):
40 print 'Comment:\n\t', repr(text)
41
42 def NotationDeclHandler(self, *args):
43 name, base, sysid, pubid = args
44 print 'Notation declared:', args
45
46 def UnparsedEntityDeclHandler(self, *args):
47 entityName, base, systemId, publicId, notationName = args
48 print 'Unparsed entity decl:\n\t', args
49
50 def NotStandaloneHandler(self, userData):
51 print 'Not standalone'
52 return 1
53
Guido van Rossum3e06ab12000-06-29 19:35:29 +000054 def ExternalEntityRefHandler(self, *args):
55 context, base, sysId, pubId = args
56 print 'External entity ref:', args
Guido van Rossumaad67612000-05-08 17:31:04 +000057 return 1
58
59 def DefaultHandler(self, userData):
60 pass
61
62 def DefaultHandlerExpand(self, userData):
63 pass
64
65
66out = Outputter()
67parser = pyexpat.ParserCreate(namespace_separator='!')
Guido van Rossum3e06ab12000-06-29 19:35:29 +000068
69# Test getting/setting returns_unicode
70parser.returns_unicode = 0 ; assert parser.returns_unicode == 0
71parser.returns_unicode = 1 ; assert parser.returns_unicode == 1
72parser.returns_unicode = 2 ; assert parser.returns_unicode == 1
73parser.returns_unicode = 0 ; assert parser.returns_unicode == 0
74
75HANDLER_NAMES = ['StartElementHandler', 'EndElementHandler',
Guido van Rossumaad67612000-05-08 17:31:04 +000076 'CharacterDataHandler', 'ProcessingInstructionHandler',
77 'UnparsedEntityDeclHandler', 'NotationDeclHandler',
78 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler',
79 'CommentHandler', 'StartCdataSectionHandler',
80 'EndCdataSectionHandler',
81 'DefaultHandler', 'DefaultHandlerExpand',
82 #'NotStandaloneHandler',
83 'ExternalEntityRefHandler'
Guido van Rossum3e06ab12000-06-29 19:35:29 +000084 ]
85for name in HANDLER_NAMES:
Guido van Rossumaad67612000-05-08 17:31:04 +000086 setattr(parser, name, getattr(out, name) )
87
88data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
89<?xml-stylesheet href="stylesheet.css"?>
90<!-- comment data -->
91<!DOCTYPE quotations SYSTEM "quotations.dtd" [
92<!ELEMENT root ANY>
93<!NOTATION notation SYSTEM "notation.jpeg">
94<!ENTITY acirc "&#226;">
95<!ENTITY external_entity SYSTEM "entity.file">
96<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
97%unparsed_entity;
98]>
99
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000100<root attr1="value1" attr2="value2&#8000;">
Guido van Rossumaad67612000-05-08 17:31:04 +0000101<myns:subelement xmlns:myns="http://www.python.org/namespace">
102 Contents of subelements
103</myns:subelement>
104<sub2><![CDATA[contents of CDATA section]]></sub2>
105&external_entity;
106</root>
107"""
108
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000109# Produce UTF-8 output
110parser.returns_unicode = 0
Guido van Rossumaad67612000-05-08 17:31:04 +0000111try:
112 parser.Parse(data, 1)
113except pyexpat.error:
114 print '** Error', parser.ErrorCode, pyexpat.ErrorString( parser.ErrorCode)
115 print '** Line', parser.ErrorLineNumber
116 print '** Column', parser.ErrorColumnNumber
117 print '** Byte', parser.ErrorByteIndex
118
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000119# Try the parse again, this time producing Unicode output
120parser = pyexpat.ParserCreate(namespace_separator='!')
121parser.returns_unicode = 1
122
123for name in HANDLER_NAMES:
124 setattr(parser, name, getattr(out, name) )
125try:
126 parser.Parse(data, 1)
127except pyexpat.error:
128 print '** Error', parser.ErrorCode, pyexpat.ErrorString( parser.ErrorCode)
129 print '** Line', parser.ErrorLineNumber
130 print '** Column', parser.ErrorColumnNumber
131 print '** Byte', parser.ErrorByteIndex
132
133# Try parsing a file
134parser = pyexpat.ParserCreate(namespace_separator='!')
135parser.returns_unicode = 1
136
137for name in HANDLER_NAMES:
138 setattr(parser, name, getattr(out, name) )
139import StringIO
140file = StringIO.StringIO(data)
141try:
142 parser.ParseFile(file)
143except pyexpat.error:
144 print '** Error', parser.ErrorCode, pyexpat.ErrorString( parser.ErrorCode)
145 print '** Line', parser.ErrorLineNumber
146 print '** Column', parser.ErrorColumnNumber
147 print '** Byte', parser.ErrorByteIndex
148