blob: 3f130f3af5909fa3e6fee983ffd481d198c1bf2d [file] [log] [blame]
Fred Drakea12adfe2000-09-18 17:40:22 +00001"""\
2A library of useful helper classes to the SAX classes, for the
Fred Drake45cd9de2000-06-29 19:34:54 +00003convenience of application and driver writers.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Martin v. Löwis58af43f2000-09-24 21:31:06 +00006import os, urlparse, urllib, types
Fred Drake45cd9de2000-06-29 19:34:54 +00007import handler
Lars Gustäbelfc643c32000-09-24 10:53:31 +00008import xmlreader
Fred Drakea12adfe2000-09-18 17:40:22 +00009
Martin v. Löwis58af43f2000-09-24 21:31:06 +000010_StringTypes = [types.StringType, types.UnicodeType]
11
Fred Drakea12adfe2000-09-18 17:40:22 +000012def escape(data, entities={}):
Fred Drake45cd9de2000-06-29 19:34:54 +000013 """Escape &, <, and > in a string of data.
Fred Drakea12adfe2000-09-18 17:40:22 +000014
Fred Drake45cd9de2000-06-29 19:34:54 +000015 You can escape other strings of data by passing a dictionary as
16 the optional entities parameter. The keys and values must all be
17 strings; each key will be replaced with its corresponding value.
18 """
Fred Drakea12adfe2000-09-18 17:40:22 +000019 data = data.replace("&", "&amp;")
20 data = data.replace("<", "&lt;")
21 data = data.replace(">", "&gt;")
Fred Drake45cd9de2000-06-29 19:34:54 +000022 for chars, entity in entities.items():
Fred Drakea12adfe2000-09-18 17:40:22 +000023 data = data.replace(chars, entity)
Fred Drake45cd9de2000-06-29 19:34:54 +000024 return data
25
Fred Drakea12adfe2000-09-18 17:40:22 +000026
Fred Drake45cd9de2000-06-29 19:34:54 +000027class XMLGenerator(handler.ContentHandler):
28
Lars Gustäbelc5cec512000-09-21 08:25:28 +000029 def __init__(self, out=None, encoding="iso-8859-1"):
Fred Drakea12adfe2000-09-18 17:40:22 +000030 if out is None:
31 import sys
32 out = sys.stdout
Fred Drake45cd9de2000-06-29 19:34:54 +000033 handler.ContentHandler.__init__(self)
34 self._out = out
Lars Gustäbelc5cec512000-09-21 08:25:28 +000035 self._ns_contexts = [{}] # contains uri -> prefix dicts
36 self._current_context = self._ns_contexts[-1]
Lars Gustäbelfc643c32000-09-24 10:53:31 +000037 self._undeclared_ns_maps = []
Lars Gustäbelc5cec512000-09-21 08:25:28 +000038 self._encoding = encoding
Fred Drake45cd9de2000-06-29 19:34:54 +000039
40 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +000041
Fred Drake45cd9de2000-06-29 19:34:54 +000042 def startDocument(self):
Lars Gustäbelc5cec512000-09-21 08:25:28 +000043 self._out.write('<?xml version="1.0" encoding="%s"?>\n' %
44 self._encoding)
Fred Drake45cd9de2000-06-29 19:34:54 +000045
46 def startPrefixMapping(self, prefix, uri):
Lars Gustäbelc5cec512000-09-21 08:25:28 +000047 self._ns_contexts.append(self._current_context.copy())
48 self._current_context[uri] = prefix
Lars Gustäbelfc643c32000-09-24 10:53:31 +000049 self._undeclared_ns_maps.append((prefix, uri))
Fred Drake45cd9de2000-06-29 19:34:54 +000050
51 def endPrefixMapping(self, prefix):
Lars Gustäbelfc643c32000-09-24 10:53:31 +000052 self._current_context = self._ns_contexts[-1]
53 del self._ns_contexts[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +000054
55 def startElement(self, name, attrs):
Fred Drake45cd9de2000-06-29 19:34:54 +000056 self._out.write('<' + name)
57 for (name, value) in attrs.items():
58 self._out.write(' %s="%s"' % (name, escape(value)))
59 self._out.write('>')
Lars Gustäbelc5cec512000-09-21 08:25:28 +000060
Fred Drake45cd9de2000-06-29 19:34:54 +000061 def endElement(self, name):
Fred Drake45cd9de2000-06-29 19:34:54 +000062 self._out.write('</%s>' % name)
63
Lars Gustäbelc5cec512000-09-21 08:25:28 +000064 def startElementNS(self, name, qname, attrs):
65 name = self._current_context[name[0]] + ":" + name[1]
66 self._out.write('<' + name)
Lars Gustäbelfc643c32000-09-24 10:53:31 +000067
68 for pair in self._undeclared_ns_maps:
69 self._out.write(' xmlns:%s="%s"' % pair)
70 self._undeclared_ns_maps = []
71
Lars Gustäbelc5cec512000-09-21 08:25:28 +000072 for (name, value) in attrs.items():
73 name = self._current_context[name[0]] + ":" + name[1]
74 self._out.write(' %s="%s"' % (name, escape(value)))
75 self._out.write('>')
76
77 def endElementNS(self, name, qname):
78 name = self._current_context[name[0]] + ":" + name[1]
79 self._out.write('</%s>' % name)
80
Fred Drake45cd9de2000-06-29 19:34:54 +000081 def characters(self, content):
82 self._out.write(escape(content))
83
84 def ignorableWhitespace(self, content):
85 self._out.write(content)
Fred Drakea12adfe2000-09-18 17:40:22 +000086
Fred Drake45cd9de2000-06-29 19:34:54 +000087 def processingInstruction(self, target, data):
88 self._out.write('<?%s %s?>' % (target, data))
89
Fred Drakea12adfe2000-09-18 17:40:22 +000090
Lars Gustäbelfc643c32000-09-24 10:53:31 +000091class XMLFilterBase(xmlreader.XMLReader):
Fred Drake45cd9de2000-06-29 19:34:54 +000092 """This class is designed to sit between an XMLReader and the
93 client application's event handlers. By default, it does nothing
94 but pass requests up to the reader and events on to the handlers
95 unmodified, but subclasses can override specific methods to modify
96 the event stream or the configuration requests as they pass
97 through."""
98
99 # ErrorHandler methods
100
101 def error(self, exception):
102 self._err_handler.error(exception)
103
104 def fatalError(self, exception):
105 self._err_handler.fatalError(exception)
106
107 def warning(self, exception):
108 self._err_handler.warning(exception)
109
110 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000111
Fred Drake45cd9de2000-06-29 19:34:54 +0000112 def setDocumentLocator(self, locator):
113 self._cont_handler.setDocumentLocator(locator)
Fred Drakea12adfe2000-09-18 17:40:22 +0000114
Fred Drake45cd9de2000-06-29 19:34:54 +0000115 def startDocument(self):
116 self._cont_handler.startDocument()
117
118 def endDocument(self):
119 self._cont_handler.endDocument()
120
121 def startPrefixMapping(self, prefix, uri):
122 self._cont_handler.startPrefixMapping(prefix, uri)
123
124 def endPrefixMapping(self, prefix):
125 self._cont_handler.endPrefixMapping(prefix)
126
127 def startElement(self, name, attrs):
128 self._cont_handler.startElement(name, attrs)
129
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000130 def endElement(self, name):
131 self._cont_handler.endElement(name)
132
133 def startElementNS(self, name, qname, attrs):
134 self._cont_handler.startElement(name, attrs)
135
136 def endElementNS(self, name, qname):
137 self._cont_handler.endElementNS(name, qname)
Fred Drake45cd9de2000-06-29 19:34:54 +0000138
139 def characters(self, content):
140 self._cont_handler.characters(content)
141
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000142 def ignorableWhitespace(self, chars):
143 self._cont_handler.ignorableWhitespace(chars)
Fred Drake45cd9de2000-06-29 19:34:54 +0000144
145 def processingInstruction(self, target, data):
146 self._cont_handler.processingInstruction(target, data)
147
148 def skippedEntity(self, name):
149 self._cont_handler.skippedEntity(name)
150
151 # DTDHandler methods
152
153 def notationDecl(self, name, publicId, systemId):
154 self._dtd_handler.notationDecl(name, publicId, systemId)
155
156 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
157 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
158
159 # EntityResolver methods
160
161 def resolveEntity(self, publicId, systemId):
162 self._ent_handler.resolveEntity(publicId, systemId)
163
164 # XMLReader methods
165
166 def parse(self, source):
167 self._parent.setContentHandler(self)
168 self._parent.setErrorHandler(self)
169 self._parent.setEntityResolver(self)
170 self._parent.setDTDHandler(self)
171 self._parent.parse(source)
172
173 def setLocale(self, locale):
174 self._parent.setLocale(locale)
Fred Drakea12adfe2000-09-18 17:40:22 +0000175
Fred Drake45cd9de2000-06-29 19:34:54 +0000176 def getFeature(self, name):
177 return self._parent.getFeature(name)
178
179 def setFeature(self, name, state):
180 self._parent.setFeature(name, state)
181
182 def getProperty(self, name):
183 return self._parent.getProperty(name)
184
185 def setProperty(self, name, value):
186 self._parent.setProperty(name, value)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000187
188# --- Utility functions
189
190def prepare_input_source(source, base = ""):
191 """This function takes an InputSource and an optional base URL and
192 returns a fully resolved InputSource object ready for reading."""
193
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000194 if type(source) in _StringTypes:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000195 source = xmlreader.InputSource(source)
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000196 elif hasattr(source, "read"):
197 f = source
198 source = xmlreader.InputSource(source)
199 source.setByteStream(f)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000200
201 if source.getByteStream() == None:
202 sysid = source.getSystemId()
203 if urlparse.urlparse(sysid)[0] == '':
204 basehead = os.path.split(os.path.normpath(base))[0]
205 source.setSystemId(os.path.join(basehead, sysid))
206 else:
207 source.setSystemId(urlparse.urljoin(base, sysid))
208
209 source.setByteStream(urllib.urlopen(source.getSystemId()))
210
211 return source