blob: c369f98fcfe77783a7c0a7020d46e7274fef119f [file] [log] [blame]
Fred Drakea12adfe2000-09-18 17:40:22 +00001"""\
2A library of useful helper classes to the SAX classes, for the
Fred Drake45cd9de2000-06-29 19:34:54 +00003convenience of application and driver writers.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Martin v. Löwis58af43f2000-09-24 21:31:06 +00006import os, urlparse, urllib, types
Fred Drake45cd9de2000-06-29 19:34:54 +00007import handler
Lars Gustäbelfc643c32000-09-24 10:53:31 +00008import xmlreader
Fred Drakea12adfe2000-09-18 17:40:22 +00009
Fred Drake95b4ec52000-12-16 01:45:11 +000010try:
11 _StringTypes = [types.StringType, types.UnicodeType]
12except AttributeError:
13 _StringTypes = [types.StringType]
14
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000015def __dict_replace(s, d):
16 """Replace substrings of a string using a dictionary."""
17 for key, value in d.items():
18 s = s.replace(key, value)
19 return s
Martin v. Löwis58af43f2000-09-24 21:31:06 +000020
Fred Drakea12adfe2000-09-18 17:40:22 +000021def escape(data, entities={}):
Fred Drake45cd9de2000-06-29 19:34:54 +000022 """Escape &, <, and > in a string of data.
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000023
Fred Drake16f63292000-10-23 18:09:50 +000024 You can escape other strings of data by passing a dictionary as
Fred Drake45cd9de2000-06-29 19:34:54 +000025 the optional entities parameter. The keys and values must all be
26 strings; each key will be replaced with its corresponding value.
27 """
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000028
29 # must do ampersand first
Fred Drakea12adfe2000-09-18 17:40:22 +000030 data = data.replace("&", "&amp;")
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000031 data = __dict_replace(data, {"<" : "&lt;",
32 ">" : "&gt;",
33 })
34 return __dict_replace(data, entities)
35
36def unescape(data, entities={}):
37 """Unescape &amp;, &lt;, and &gt; in a string of data.
38
39 You can unescape other strings of data by passing a dictionary as
40 the optional entities parameter. The keys and values must all be
41 strings; each key will be replaced with its corresponding value.
42 """
43 data = __dict_replace(data, {"&lt;" : "<",
44 "&gt;" : ">",
45 })
46 # must do ampersand last
47 data = data.replace("&amp;", "&")
48 return __dict_replace(data, entities)
Fred Drake45cd9de2000-06-29 19:34:54 +000049
Fred Drakeacd32d32001-07-19 16:10:15 +000050def quoteattr(data, entities={}):
51 """Escape and quote an attribute value.
52
53 Escape &, <, and > in a string of data, then quote it for use as
54 an attribute value. The \" character will be escaped as well, if
55 necessary.
56
57 You can escape other strings of data by passing a dictionary as
58 the optional entities parameter. The keys and values must all be
59 strings; each key will be replaced with its corresponding value.
60 """
61 data = escape(data, entities)
62 if '"' in data:
63 if "'" in data:
64 data = '"%s"' % data.replace('"', "&quot;")
65 else:
66 data = "'%s'" % data
67 else:
68 data = '"%s"' % data
69 return data
70
Fred Drakea12adfe2000-09-18 17:40:22 +000071
Fred Drake45cd9de2000-06-29 19:34:54 +000072class XMLGenerator(handler.ContentHandler):
73
Lars Gustäbelc5cec512000-09-21 08:25:28 +000074 def __init__(self, out=None, encoding="iso-8859-1"):
Fred Drakea12adfe2000-09-18 17:40:22 +000075 if out is None:
76 import sys
77 out = sys.stdout
Fred Drake45cd9de2000-06-29 19:34:54 +000078 handler.ContentHandler.__init__(self)
79 self._out = out
Lars Gustäbelc5cec512000-09-21 08:25:28 +000080 self._ns_contexts = [{}] # contains uri -> prefix dicts
81 self._current_context = self._ns_contexts[-1]
Lars Gustäbelfc643c32000-09-24 10:53:31 +000082 self._undeclared_ns_maps = []
Lars Gustäbelc5cec512000-09-21 08:25:28 +000083 self._encoding = encoding
Fred Drake45cd9de2000-06-29 19:34:54 +000084
85 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +000086
Fred Drake45cd9de2000-06-29 19:34:54 +000087 def startDocument(self):
Lars Gustäbelc5cec512000-09-21 08:25:28 +000088 self._out.write('<?xml version="1.0" encoding="%s"?>\n' %
89 self._encoding)
Fred Drake45cd9de2000-06-29 19:34:54 +000090
91 def startPrefixMapping(self, prefix, uri):
Lars Gustäbelc5cec512000-09-21 08:25:28 +000092 self._ns_contexts.append(self._current_context.copy())
93 self._current_context[uri] = prefix
Lars Gustäbelfc643c32000-09-24 10:53:31 +000094 self._undeclared_ns_maps.append((prefix, uri))
Fred Drake45cd9de2000-06-29 19:34:54 +000095
96 def endPrefixMapping(self, prefix):
Lars Gustäbelfc643c32000-09-24 10:53:31 +000097 self._current_context = self._ns_contexts[-1]
98 del self._ns_contexts[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +000099
100 def startElement(self, name, attrs):
Fred Drake45cd9de2000-06-29 19:34:54 +0000101 self._out.write('<' + name)
102 for (name, value) in attrs.items():
Fred Drakedad91dd2001-08-07 19:14:46 +0000103 self._out.write(' %s=%s' % (name, quoteattr(value)))
Fred Drake45cd9de2000-06-29 19:34:54 +0000104 self._out.write('>')
Fred Drake16f63292000-10-23 18:09:50 +0000105
Fred Drake45cd9de2000-06-29 19:34:54 +0000106 def endElement(self, name):
Fred Drake45cd9de2000-06-29 19:34:54 +0000107 self._out.write('</%s>' % name)
108
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000109 def startElementNS(self, name, qname, attrs):
Martin v. Löwiscf0a1cc2000-10-03 22:35:29 +0000110 if name[0] is None:
111 # if the name was not namespace-scoped, use the unqualified part
112 name = name[1]
113 else:
114 # else try to restore the original prefix from the namespace
115 name = self._current_context[name[0]] + ":" + name[1]
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000116 self._out.write('<' + name)
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000117
118 for pair in self._undeclared_ns_maps:
119 self._out.write(' xmlns:%s="%s"' % pair)
120 self._undeclared_ns_maps = []
Fred Drake16f63292000-10-23 18:09:50 +0000121
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000122 for (name, value) in attrs.items():
123 name = self._current_context[name[0]] + ":" + name[1]
Fred Drakedad91dd2001-08-07 19:14:46 +0000124 self._out.write(' %s=%s' % (name, quoteattr(value)))
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000125 self._out.write('>')
126
127 def endElementNS(self, name, qname):
Martin v. Löwiscf0a1cc2000-10-03 22:35:29 +0000128 if name[0] is None:
129 name = name[1]
130 else:
131 name = self._current_context[name[0]] + ":" + name[1]
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000132 self._out.write('</%s>' % name)
Fred Drake16f63292000-10-23 18:09:50 +0000133
Fred Drake45cd9de2000-06-29 19:34:54 +0000134 def characters(self, content):
135 self._out.write(escape(content))
136
137 def ignorableWhitespace(self, content):
138 self._out.write(content)
Fred Drakea12adfe2000-09-18 17:40:22 +0000139
Fred Drake45cd9de2000-06-29 19:34:54 +0000140 def processingInstruction(self, target, data):
141 self._out.write('<?%s %s?>' % (target, data))
142
Fred Drakea12adfe2000-09-18 17:40:22 +0000143
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000144class XMLFilterBase(xmlreader.XMLReader):
Fred Drake45cd9de2000-06-29 19:34:54 +0000145 """This class is designed to sit between an XMLReader and the
146 client application's event handlers. By default, it does nothing
147 but pass requests up to the reader and events on to the handlers
148 unmodified, but subclasses can override specific methods to modify
149 the event stream or the configuration requests as they pass
150 through."""
151
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000152 def __init__(self, parent = None):
153 xmlreader.XMLReader.__init__(self)
154 self._parent = parent
Fred Drake16f63292000-10-23 18:09:50 +0000155
Fred Drake45cd9de2000-06-29 19:34:54 +0000156 # ErrorHandler methods
157
158 def error(self, exception):
159 self._err_handler.error(exception)
160
161 def fatalError(self, exception):
162 self._err_handler.fatalError(exception)
163
164 def warning(self, exception):
165 self._err_handler.warning(exception)
166
167 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000168
Fred Drake45cd9de2000-06-29 19:34:54 +0000169 def setDocumentLocator(self, locator):
170 self._cont_handler.setDocumentLocator(locator)
Fred Drakea12adfe2000-09-18 17:40:22 +0000171
Fred Drake45cd9de2000-06-29 19:34:54 +0000172 def startDocument(self):
173 self._cont_handler.startDocument()
174
175 def endDocument(self):
176 self._cont_handler.endDocument()
177
178 def startPrefixMapping(self, prefix, uri):
179 self._cont_handler.startPrefixMapping(prefix, uri)
180
181 def endPrefixMapping(self, prefix):
182 self._cont_handler.endPrefixMapping(prefix)
183
184 def startElement(self, name, attrs):
185 self._cont_handler.startElement(name, attrs)
186
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000187 def endElement(self, name):
188 self._cont_handler.endElement(name)
189
190 def startElementNS(self, name, qname, attrs):
191 self._cont_handler.startElement(name, attrs)
192
193 def endElementNS(self, name, qname):
194 self._cont_handler.endElementNS(name, qname)
Fred Drake45cd9de2000-06-29 19:34:54 +0000195
196 def characters(self, content):
197 self._cont_handler.characters(content)
198
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000199 def ignorableWhitespace(self, chars):
200 self._cont_handler.ignorableWhitespace(chars)
Fred Drake45cd9de2000-06-29 19:34:54 +0000201
202 def processingInstruction(self, target, data):
203 self._cont_handler.processingInstruction(target, data)
204
205 def skippedEntity(self, name):
206 self._cont_handler.skippedEntity(name)
207
208 # DTDHandler methods
209
210 def notationDecl(self, name, publicId, systemId):
211 self._dtd_handler.notationDecl(name, publicId, systemId)
212
213 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
214 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
215
216 # EntityResolver methods
217
218 def resolveEntity(self, publicId, systemId):
219 self._ent_handler.resolveEntity(publicId, systemId)
220
221 # XMLReader methods
222
223 def parse(self, source):
224 self._parent.setContentHandler(self)
225 self._parent.setErrorHandler(self)
226 self._parent.setEntityResolver(self)
227 self._parent.setDTDHandler(self)
228 self._parent.parse(source)
229
230 def setLocale(self, locale):
231 self._parent.setLocale(locale)
Fred Drakea12adfe2000-09-18 17:40:22 +0000232
Fred Drake45cd9de2000-06-29 19:34:54 +0000233 def getFeature(self, name):
234 return self._parent.getFeature(name)
235
236 def setFeature(self, name, state):
237 self._parent.setFeature(name, state)
238
239 def getProperty(self, name):
240 return self._parent.getProperty(name)
241
242 def setProperty(self, name, value):
243 self._parent.setProperty(name, value)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000244
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000245 # XMLFilter methods
246
247 def getParent(self):
248 return self._parent
249
250 def setParent(self, parent):
251 self._parent = parent
252
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000253# --- Utility functions
254
255def prepare_input_source(source, base = ""):
256 """This function takes an InputSource and an optional base URL and
257 returns a fully resolved InputSource object ready for reading."""
Fred Drake16f63292000-10-23 18:09:50 +0000258
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000259 if type(source) in _StringTypes:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000260 source = xmlreader.InputSource(source)
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000261 elif hasattr(source, "read"):
262 f = source
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000263 source = xmlreader.InputSource()
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000264 source.setByteStream(f)
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000265 if hasattr(f, "name"):
Lars Gustäbel4ced5e72000-10-24 15:53:12 +0000266 source.setSystemId(f.name)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000267
Fred Drake0872e052000-09-26 17:23:09 +0000268 if source.getByteStream() is None:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000269 sysid = source.getSystemId()
Fred Drake0872e052000-09-26 17:23:09 +0000270 if os.path.isfile(sysid):
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000271 basehead = os.path.split(os.path.normpath(base))[0]
272 source.setSystemId(os.path.join(basehead, sysid))
Fred Drake0872e052000-09-26 17:23:09 +0000273 f = open(sysid, "rb")
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000274 else:
275 source.setSystemId(urlparse.urljoin(base, sysid))
Fred Drake0872e052000-09-26 17:23:09 +0000276 f = urllib.urlopen(source.getSystemId())
Fred Drake16f63292000-10-23 18:09:50 +0000277
Fred Drake0872e052000-09-26 17:23:09 +0000278 source.setByteStream(f)
Fred Drake16f63292000-10-23 18:09:50 +0000279
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000280 return source