blob: 035715c289141d10afc4439d905fd21d2a9f3207 [file] [log] [blame]
Fred Drakea12adfe2000-09-18 17:40:22 +00001"""\
2A library of useful helper classes to the SAX classes, for the
Fred Drake45cd9de2000-06-29 19:34:54 +00003convenience of application and driver writers.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Martin v. Löwis58af43f2000-09-24 21:31:06 +00006import os, urlparse, urllib, types
Fred Drake45cd9de2000-06-29 19:34:54 +00007import handler
Lars Gustäbelfc643c32000-09-24 10:53:31 +00008import xmlreader
Fred Drakea12adfe2000-09-18 17:40:22 +00009
Fred Drake95b4ec52000-12-16 01:45:11 +000010try:
11 _StringTypes = [types.StringType, types.UnicodeType]
12except AttributeError:
13 _StringTypes = [types.StringType]
14
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000015def __dict_replace(s, d):
16 """Replace substrings of a string using a dictionary."""
17 for key, value in d.items():
18 s = s.replace(key, value)
19 return s
Martin v. Löwis58af43f2000-09-24 21:31:06 +000020
Fred Drakea12adfe2000-09-18 17:40:22 +000021def escape(data, entities={}):
Fred Drake45cd9de2000-06-29 19:34:54 +000022 """Escape &, <, and > in a string of data.
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000023
Fred Drake16f63292000-10-23 18:09:50 +000024 You can escape other strings of data by passing a dictionary as
Fred Drake45cd9de2000-06-29 19:34:54 +000025 the optional entities parameter. The keys and values must all be
26 strings; each key will be replaced with its corresponding value.
27 """
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000028
29 # must do ampersand first
Fred Drakea12adfe2000-09-18 17:40:22 +000030 data = data.replace("&", "&amp;")
Fred Drakef55222d2002-10-28 17:29:01 +000031 data = data.replace(">", "&gt;")
32 data = data.replace("<", "&lt;")
33 if entities:
34 data = __dict_replace(data, entities)
35 return data
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000036
37def unescape(data, entities={}):
38 """Unescape &amp;, &lt;, and &gt; in a string of data.
39
40 You can unescape other strings of data by passing a dictionary as
41 the optional entities parameter. The keys and values must all be
42 strings; each key will be replaced with its corresponding value.
43 """
Fred Drakef55222d2002-10-28 17:29:01 +000044 data = data.replace("&lt;", "<")
45 data = data.replace("&gt;", ">")
Fred Drakef55222d2002-10-28 17:29:01 +000046 if entities:
47 data = __dict_replace(data, entities)
Fred Drake407fea52002-10-28 17:46:59 +000048 # must do ampersand last
49 data = data.replace("&amp;", "&")
Fred Drakef55222d2002-10-28 17:29:01 +000050 return data
Fred Drake45cd9de2000-06-29 19:34:54 +000051
Fred Drakeacd32d32001-07-19 16:10:15 +000052def quoteattr(data, entities={}):
53 """Escape and quote an attribute value.
54
55 Escape &, <, and > in a string of data, then quote it for use as
56 an attribute value. The \" character will be escaped as well, if
57 necessary.
58
59 You can escape other strings of data by passing a dictionary as
60 the optional entities parameter. The keys and values must all be
61 strings; each key will be replaced with its corresponding value.
62 """
63 data = escape(data, entities)
64 if '"' in data:
65 if "'" in data:
66 data = '"%s"' % data.replace('"', "&quot;")
67 else:
68 data = "'%s'" % data
69 else:
70 data = '"%s"' % data
71 return data
72
Fred Drakea12adfe2000-09-18 17:40:22 +000073
Fred Drake45cd9de2000-06-29 19:34:54 +000074class XMLGenerator(handler.ContentHandler):
75
Lars Gustäbelc5cec512000-09-21 08:25:28 +000076 def __init__(self, out=None, encoding="iso-8859-1"):
Fred Drakea12adfe2000-09-18 17:40:22 +000077 if out is None:
78 import sys
79 out = sys.stdout
Fred Drake45cd9de2000-06-29 19:34:54 +000080 handler.ContentHandler.__init__(self)
81 self._out = out
Lars Gustäbelc5cec512000-09-21 08:25:28 +000082 self._ns_contexts = [{}] # contains uri -> prefix dicts
83 self._current_context = self._ns_contexts[-1]
Lars Gustäbelfc643c32000-09-24 10:53:31 +000084 self._undeclared_ns_maps = []
Lars Gustäbelc5cec512000-09-21 08:25:28 +000085 self._encoding = encoding
Fred Drake45cd9de2000-06-29 19:34:54 +000086
87 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +000088
Fred Drake45cd9de2000-06-29 19:34:54 +000089 def startDocument(self):
Lars Gustäbelc5cec512000-09-21 08:25:28 +000090 self._out.write('<?xml version="1.0" encoding="%s"?>\n' %
91 self._encoding)
Fred Drake45cd9de2000-06-29 19:34:54 +000092
93 def startPrefixMapping(self, prefix, uri):
Lars Gustäbelc5cec512000-09-21 08:25:28 +000094 self._ns_contexts.append(self._current_context.copy())
95 self._current_context[uri] = prefix
Lars Gustäbelfc643c32000-09-24 10:53:31 +000096 self._undeclared_ns_maps.append((prefix, uri))
Fred Drake45cd9de2000-06-29 19:34:54 +000097
98 def endPrefixMapping(self, prefix):
Lars Gustäbelfc643c32000-09-24 10:53:31 +000099 self._current_context = self._ns_contexts[-1]
100 del self._ns_contexts[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +0000101
102 def startElement(self, name, attrs):
Fred Drake45cd9de2000-06-29 19:34:54 +0000103 self._out.write('<' + name)
104 for (name, value) in attrs.items():
Fred Drakedad91dd2001-08-07 19:14:46 +0000105 self._out.write(' %s=%s' % (name, quoteattr(value)))
Fred Drake45cd9de2000-06-29 19:34:54 +0000106 self._out.write('>')
Fred Drake16f63292000-10-23 18:09:50 +0000107
Fred Drake45cd9de2000-06-29 19:34:54 +0000108 def endElement(self, name):
Fred Drake45cd9de2000-06-29 19:34:54 +0000109 self._out.write('</%s>' % name)
110
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000111 def startElementNS(self, name, qname, attrs):
Martin v. Löwiscf0a1cc2000-10-03 22:35:29 +0000112 if name[0] is None:
113 # if the name was not namespace-scoped, use the unqualified part
114 name = name[1]
115 else:
116 # else try to restore the original prefix from the namespace
117 name = self._current_context[name[0]] + ":" + name[1]
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000118 self._out.write('<' + name)
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000119
120 for pair in self._undeclared_ns_maps:
121 self._out.write(' xmlns:%s="%s"' % pair)
122 self._undeclared_ns_maps = []
Fred Drake16f63292000-10-23 18:09:50 +0000123
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000124 for (name, value) in attrs.items():
125 name = self._current_context[name[0]] + ":" + name[1]
Fred Drakedad91dd2001-08-07 19:14:46 +0000126 self._out.write(' %s=%s' % (name, quoteattr(value)))
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000127 self._out.write('>')
128
129 def endElementNS(self, name, qname):
Martin v. Löwiscf0a1cc2000-10-03 22:35:29 +0000130 if name[0] is None:
131 name = name[1]
132 else:
133 name = self._current_context[name[0]] + ":" + name[1]
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000134 self._out.write('</%s>' % name)
Fred Drake16f63292000-10-23 18:09:50 +0000135
Fred Drake45cd9de2000-06-29 19:34:54 +0000136 def characters(self, content):
137 self._out.write(escape(content))
138
139 def ignorableWhitespace(self, content):
140 self._out.write(content)
Fred Drakea12adfe2000-09-18 17:40:22 +0000141
Fred Drake45cd9de2000-06-29 19:34:54 +0000142 def processingInstruction(self, target, data):
143 self._out.write('<?%s %s?>' % (target, data))
144
Fred Drakea12adfe2000-09-18 17:40:22 +0000145
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000146class XMLFilterBase(xmlreader.XMLReader):
Fred Drake45cd9de2000-06-29 19:34:54 +0000147 """This class is designed to sit between an XMLReader and the
148 client application's event handlers. By default, it does nothing
149 but pass requests up to the reader and events on to the handlers
150 unmodified, but subclasses can override specific methods to modify
151 the event stream or the configuration requests as they pass
152 through."""
153
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000154 def __init__(self, parent = None):
155 xmlreader.XMLReader.__init__(self)
156 self._parent = parent
Fred Drake16f63292000-10-23 18:09:50 +0000157
Fred Drake45cd9de2000-06-29 19:34:54 +0000158 # ErrorHandler methods
159
160 def error(self, exception):
161 self._err_handler.error(exception)
162
163 def fatalError(self, exception):
164 self._err_handler.fatalError(exception)
165
166 def warning(self, exception):
167 self._err_handler.warning(exception)
168
169 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000170
Fred Drake45cd9de2000-06-29 19:34:54 +0000171 def setDocumentLocator(self, locator):
172 self._cont_handler.setDocumentLocator(locator)
Fred Drakea12adfe2000-09-18 17:40:22 +0000173
Fred Drake45cd9de2000-06-29 19:34:54 +0000174 def startDocument(self):
175 self._cont_handler.startDocument()
176
177 def endDocument(self):
178 self._cont_handler.endDocument()
179
180 def startPrefixMapping(self, prefix, uri):
181 self._cont_handler.startPrefixMapping(prefix, uri)
182
183 def endPrefixMapping(self, prefix):
184 self._cont_handler.endPrefixMapping(prefix)
185
186 def startElement(self, name, attrs):
187 self._cont_handler.startElement(name, attrs)
188
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000189 def endElement(self, name):
190 self._cont_handler.endElement(name)
191
192 def startElementNS(self, name, qname, attrs):
193 self._cont_handler.startElement(name, attrs)
194
195 def endElementNS(self, name, qname):
196 self._cont_handler.endElementNS(name, qname)
Fred Drake45cd9de2000-06-29 19:34:54 +0000197
198 def characters(self, content):
199 self._cont_handler.characters(content)
200
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000201 def ignorableWhitespace(self, chars):
202 self._cont_handler.ignorableWhitespace(chars)
Fred Drake45cd9de2000-06-29 19:34:54 +0000203
204 def processingInstruction(self, target, data):
205 self._cont_handler.processingInstruction(target, data)
206
207 def skippedEntity(self, name):
208 self._cont_handler.skippedEntity(name)
209
210 # DTDHandler methods
211
212 def notationDecl(self, name, publicId, systemId):
213 self._dtd_handler.notationDecl(name, publicId, systemId)
214
215 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
216 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
217
218 # EntityResolver methods
219
220 def resolveEntity(self, publicId, systemId):
221 self._ent_handler.resolveEntity(publicId, systemId)
222
223 # XMLReader methods
224
225 def parse(self, source):
226 self._parent.setContentHandler(self)
227 self._parent.setErrorHandler(self)
228 self._parent.setEntityResolver(self)
229 self._parent.setDTDHandler(self)
230 self._parent.parse(source)
231
232 def setLocale(self, locale):
233 self._parent.setLocale(locale)
Fred Drakea12adfe2000-09-18 17:40:22 +0000234
Fred Drake45cd9de2000-06-29 19:34:54 +0000235 def getFeature(self, name):
236 return self._parent.getFeature(name)
237
238 def setFeature(self, name, state):
239 self._parent.setFeature(name, state)
240
241 def getProperty(self, name):
242 return self._parent.getProperty(name)
243
244 def setProperty(self, name, value):
245 self._parent.setProperty(name, value)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000246
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000247 # XMLFilter methods
248
249 def getParent(self):
250 return self._parent
251
252 def setParent(self, parent):
253 self._parent = parent
254
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000255# --- Utility functions
256
257def prepare_input_source(source, base = ""):
258 """This function takes an InputSource and an optional base URL and
259 returns a fully resolved InputSource object ready for reading."""
Fred Drake16f63292000-10-23 18:09:50 +0000260
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000261 if type(source) in _StringTypes:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000262 source = xmlreader.InputSource(source)
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000263 elif hasattr(source, "read"):
264 f = source
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000265 source = xmlreader.InputSource()
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000266 source.setByteStream(f)
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000267 if hasattr(f, "name"):
Lars Gustäbel4ced5e72000-10-24 15:53:12 +0000268 source.setSystemId(f.name)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000269
Fred Drake0872e052000-09-26 17:23:09 +0000270 if source.getByteStream() is None:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000271 sysid = source.getSystemId()
Fred Drake0872e052000-09-26 17:23:09 +0000272 if os.path.isfile(sysid):
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000273 basehead = os.path.split(os.path.normpath(base))[0]
274 source.setSystemId(os.path.join(basehead, sysid))
Fred Drake0872e052000-09-26 17:23:09 +0000275 f = open(sysid, "rb")
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000276 else:
277 source.setSystemId(urlparse.urljoin(base, sysid))
Fred Drake0872e052000-09-26 17:23:09 +0000278 f = urllib.urlopen(source.getSystemId())
Fred Drake16f63292000-10-23 18:09:50 +0000279
Fred Drake0872e052000-09-26 17:23:09 +0000280 source.setByteStream(f)
Fred Drake16f63292000-10-23 18:09:50 +0000281
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000282 return source