blob: b5b9ff336273ee332c5d546b6bce6fbdac418519 [file] [log] [blame]
Fred Drakea12adfe2000-09-18 17:40:22 +00001"""\
2A library of useful helper classes to the SAX classes, for the
Fred Drake45cd9de2000-06-29 19:34:54 +00003convenience of application and driver writers.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Martin v. Löwis58af43f2000-09-24 21:31:06 +00006import os, urlparse, urllib, types
Guido van Rossum3b271052006-08-17 09:10:09 +00007from . import handler
8from . import xmlreader
Fred Drakea12adfe2000-09-18 17:40:22 +00009
Fred Drake95b4ec52000-12-16 01:45:11 +000010try:
11 _StringTypes = [types.StringType, types.UnicodeType]
12except AttributeError:
13 _StringTypes = [types.StringType]
14
Martin v. Löwisae207222004-05-06 02:22:43 +000015# See whether the xmlcharrefreplace error handler is
16# supported
17try:
18 from codecs import xmlcharrefreplace_errors
19 _error_handling = "xmlcharrefreplace"
20 del xmlcharrefreplace_errors
21except ImportError:
22 _error_handling = "strict"
23
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000024def __dict_replace(s, d):
25 """Replace substrings of a string using a dictionary."""
26 for key, value in d.items():
27 s = s.replace(key, value)
28 return s
Martin v. Löwis58af43f2000-09-24 21:31:06 +000029
Fred Drakea12adfe2000-09-18 17:40:22 +000030def escape(data, entities={}):
Fred Drake45cd9de2000-06-29 19:34:54 +000031 """Escape &, <, and > in a string of data.
Tim Peters0eadaac2003-04-24 16:02:54 +000032
Fred Drake16f63292000-10-23 18:09:50 +000033 You can escape other strings of data by passing a dictionary as
Fred Drake45cd9de2000-06-29 19:34:54 +000034 the optional entities parameter. The keys and values must all be
35 strings; each key will be replaced with its corresponding value.
36 """
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000037
38 # must do ampersand first
Fred Drakea12adfe2000-09-18 17:40:22 +000039 data = data.replace("&", "&amp;")
Fred Drakef55222d2002-10-28 17:29:01 +000040 data = data.replace(">", "&gt;")
41 data = data.replace("<", "&lt;")
42 if entities:
43 data = __dict_replace(data, entities)
44 return data
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000045
46def unescape(data, entities={}):
47 """Unescape &amp;, &lt;, and &gt; in a string of data.
48
49 You can unescape other strings of data by passing a dictionary as
50 the optional entities parameter. The keys and values must all be
51 strings; each key will be replaced with its corresponding value.
52 """
Fred Drakef55222d2002-10-28 17:29:01 +000053 data = data.replace("&lt;", "<")
54 data = data.replace("&gt;", ">")
Fred Drakef55222d2002-10-28 17:29:01 +000055 if entities:
56 data = __dict_replace(data, entities)
Fred Drake407fea52002-10-28 17:46:59 +000057 # must do ampersand last
Fred Drake6d890502002-10-28 18:09:41 +000058 return data.replace("&amp;", "&")
Fred Drake45cd9de2000-06-29 19:34:54 +000059
Fred Drakeacd32d32001-07-19 16:10:15 +000060def quoteattr(data, entities={}):
61 """Escape and quote an attribute value.
62
63 Escape &, <, and > in a string of data, then quote it for use as
64 an attribute value. The \" character will be escaped as well, if
65 necessary.
66
67 You can escape other strings of data by passing a dictionary as
68 the optional entities parameter. The keys and values must all be
69 strings; each key will be replaced with its corresponding value.
70 """
Thomas Wouters0e3f5912006-08-11 14:57:12 +000071 entities = entities.copy()
72 entities.update({'\n': '&#10;', '\r': '&#13;', '\t':'&#9;'})
Fred Drakeacd32d32001-07-19 16:10:15 +000073 data = escape(data, entities)
74 if '"' in data:
75 if "'" in data:
76 data = '"%s"' % data.replace('"', "&quot;")
77 else:
78 data = "'%s'" % data
79 else:
80 data = '"%s"' % data
81 return data
82
Fred Drakea12adfe2000-09-18 17:40:22 +000083
Fred Drake45cd9de2000-06-29 19:34:54 +000084class XMLGenerator(handler.ContentHandler):
85
Lars Gustäbelc5cec512000-09-21 08:25:28 +000086 def __init__(self, out=None, encoding="iso-8859-1"):
Fred Drakea12adfe2000-09-18 17:40:22 +000087 if out is None:
88 import sys
89 out = sys.stdout
Fred Drake45cd9de2000-06-29 19:34:54 +000090 handler.ContentHandler.__init__(self)
91 self._out = out
Lars Gustäbelc5cec512000-09-21 08:25:28 +000092 self._ns_contexts = [{}] # contains uri -> prefix dicts
93 self._current_context = self._ns_contexts[-1]
Lars Gustäbelfc643c32000-09-24 10:53:31 +000094 self._undeclared_ns_maps = []
Lars Gustäbelc5cec512000-09-21 08:25:28 +000095 self._encoding = encoding
Fred Drake45cd9de2000-06-29 19:34:54 +000096
Martin v. Löwisae207222004-05-06 02:22:43 +000097 def _write(self, text):
98 if isinstance(text, str):
99 self._out.write(text)
100 else:
101 self._out.write(text.encode(self._encoding, _error_handling))
102
Fred Drake45cd9de2000-06-29 19:34:54 +0000103 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000104
Fred Drake45cd9de2000-06-29 19:34:54 +0000105 def startDocument(self):
Martin v. Löwisae207222004-05-06 02:22:43 +0000106 self._write('<?xml version="1.0" encoding="%s"?>\n' %
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000107 self._encoding)
Fred Drake45cd9de2000-06-29 19:34:54 +0000108
109 def startPrefixMapping(self, prefix, uri):
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000110 self._ns_contexts.append(self._current_context.copy())
111 self._current_context[uri] = prefix
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000112 self._undeclared_ns_maps.append((prefix, uri))
Fred Drake45cd9de2000-06-29 19:34:54 +0000113
114 def endPrefixMapping(self, prefix):
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000115 self._current_context = self._ns_contexts[-1]
116 del self._ns_contexts[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +0000117
118 def startElement(self, name, attrs):
Martin v. Löwisae207222004-05-06 02:22:43 +0000119 self._write('<' + name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000120 for (name, value) in attrs.items():
Martin v. Löwisae207222004-05-06 02:22:43 +0000121 self._write(' %s=%s' % (name, quoteattr(value)))
122 self._write('>')
Fred Drake16f63292000-10-23 18:09:50 +0000123
Fred Drake45cd9de2000-06-29 19:34:54 +0000124 def endElement(self, name):
Martin v. Löwisae207222004-05-06 02:22:43 +0000125 self._write('</%s>' % name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000126
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000127 def startElementNS(self, name, qname, attrs):
Martin v. Löwiscf0a1cc2000-10-03 22:35:29 +0000128 if name[0] is None:
129 # if the name was not namespace-scoped, use the unqualified part
130 name = name[1]
131 else:
132 # else try to restore the original prefix from the namespace
133 name = self._current_context[name[0]] + ":" + name[1]
Martin v. Löwisae207222004-05-06 02:22:43 +0000134 self._write('<' + name)
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000135
136 for pair in self._undeclared_ns_maps:
Martin v. Löwisae207222004-05-06 02:22:43 +0000137 self._write(' xmlns:%s="%s"' % pair)
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000138 self._undeclared_ns_maps = []
Fred Drake16f63292000-10-23 18:09:50 +0000139
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000140 for (name, value) in attrs.items():
141 name = self._current_context[name[0]] + ":" + name[1]
Martin v. Löwisae207222004-05-06 02:22:43 +0000142 self._write(' %s=%s' % (name, quoteattr(value)))
143 self._write('>')
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000144
145 def endElementNS(self, name, qname):
Martin v. Löwiscf0a1cc2000-10-03 22:35:29 +0000146 if name[0] is None:
147 name = name[1]
148 else:
149 name = self._current_context[name[0]] + ":" + name[1]
Martin v. Löwisae207222004-05-06 02:22:43 +0000150 self._write('</%s>' % name)
Fred Drake16f63292000-10-23 18:09:50 +0000151
Fred Drake45cd9de2000-06-29 19:34:54 +0000152 def characters(self, content):
Martin v. Löwisae207222004-05-06 02:22:43 +0000153 self._write(escape(content))
Fred Drake45cd9de2000-06-29 19:34:54 +0000154
155 def ignorableWhitespace(self, content):
Martin v. Löwisae207222004-05-06 02:22:43 +0000156 self._write(content)
Fred Drakea12adfe2000-09-18 17:40:22 +0000157
Fred Drake45cd9de2000-06-29 19:34:54 +0000158 def processingInstruction(self, target, data):
Martin v. Löwisae207222004-05-06 02:22:43 +0000159 self._write('<?%s %s?>' % (target, data))
Fred Drake45cd9de2000-06-29 19:34:54 +0000160
Fred Drakea12adfe2000-09-18 17:40:22 +0000161
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000162class XMLFilterBase(xmlreader.XMLReader):
Fred Drake45cd9de2000-06-29 19:34:54 +0000163 """This class is designed to sit between an XMLReader and the
164 client application's event handlers. By default, it does nothing
165 but pass requests up to the reader and events on to the handlers
166 unmodified, but subclasses can override specific methods to modify
167 the event stream or the configuration requests as they pass
168 through."""
169
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000170 def __init__(self, parent = None):
171 xmlreader.XMLReader.__init__(self)
172 self._parent = parent
Fred Drake16f63292000-10-23 18:09:50 +0000173
Fred Drake45cd9de2000-06-29 19:34:54 +0000174 # ErrorHandler methods
175
176 def error(self, exception):
177 self._err_handler.error(exception)
178
179 def fatalError(self, exception):
180 self._err_handler.fatalError(exception)
181
182 def warning(self, exception):
183 self._err_handler.warning(exception)
184
185 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000186
Fred Drake45cd9de2000-06-29 19:34:54 +0000187 def setDocumentLocator(self, locator):
188 self._cont_handler.setDocumentLocator(locator)
Fred Drakea12adfe2000-09-18 17:40:22 +0000189
Fred Drake45cd9de2000-06-29 19:34:54 +0000190 def startDocument(self):
191 self._cont_handler.startDocument()
192
193 def endDocument(self):
194 self._cont_handler.endDocument()
195
196 def startPrefixMapping(self, prefix, uri):
197 self._cont_handler.startPrefixMapping(prefix, uri)
198
199 def endPrefixMapping(self, prefix):
200 self._cont_handler.endPrefixMapping(prefix)
201
202 def startElement(self, name, attrs):
203 self._cont_handler.startElement(name, attrs)
204
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000205 def endElement(self, name):
206 self._cont_handler.endElement(name)
207
208 def startElementNS(self, name, qname, attrs):
Martin v. Löwis0ea558f2004-05-06 02:04:21 +0000209 self._cont_handler.startElementNS(name, qname, attrs)
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000210
211 def endElementNS(self, name, qname):
212 self._cont_handler.endElementNS(name, qname)
Fred Drake45cd9de2000-06-29 19:34:54 +0000213
214 def characters(self, content):
215 self._cont_handler.characters(content)
216
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000217 def ignorableWhitespace(self, chars):
218 self._cont_handler.ignorableWhitespace(chars)
Fred Drake45cd9de2000-06-29 19:34:54 +0000219
220 def processingInstruction(self, target, data):
221 self._cont_handler.processingInstruction(target, data)
222
223 def skippedEntity(self, name):
224 self._cont_handler.skippedEntity(name)
225
226 # DTDHandler methods
227
228 def notationDecl(self, name, publicId, systemId):
229 self._dtd_handler.notationDecl(name, publicId, systemId)
230
231 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
232 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
233
234 # EntityResolver methods
235
236 def resolveEntity(self, publicId, systemId):
Fred Drakee4772f32005-02-03 17:31:39 +0000237 return self._ent_handler.resolveEntity(publicId, systemId)
Fred Drake45cd9de2000-06-29 19:34:54 +0000238
239 # XMLReader methods
240
241 def parse(self, source):
242 self._parent.setContentHandler(self)
243 self._parent.setErrorHandler(self)
244 self._parent.setEntityResolver(self)
245 self._parent.setDTDHandler(self)
246 self._parent.parse(source)
247
248 def setLocale(self, locale):
249 self._parent.setLocale(locale)
Fred Drakea12adfe2000-09-18 17:40:22 +0000250
Fred Drake45cd9de2000-06-29 19:34:54 +0000251 def getFeature(self, name):
252 return self._parent.getFeature(name)
253
254 def setFeature(self, name, state):
255 self._parent.setFeature(name, state)
256
257 def getProperty(self, name):
258 return self._parent.getProperty(name)
259
260 def setProperty(self, name, value):
261 self._parent.setProperty(name, value)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000262
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000263 # XMLFilter methods
264
265 def getParent(self):
266 return self._parent
267
268 def setParent(self, parent):
269 self._parent = parent
270
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000271# --- Utility functions
272
273def prepare_input_source(source, base = ""):
274 """This function takes an InputSource and an optional base URL and
275 returns a fully resolved InputSource object ready for reading."""
Fred Drake16f63292000-10-23 18:09:50 +0000276
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000277 if type(source) in _StringTypes:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000278 source = xmlreader.InputSource(source)
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000279 elif hasattr(source, "read"):
280 f = source
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000281 source = xmlreader.InputSource()
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000282 source.setByteStream(f)
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000283 if hasattr(f, "name"):
Lars Gustäbel4ced5e72000-10-24 15:53:12 +0000284 source.setSystemId(f.name)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000285
Fred Drake0872e052000-09-26 17:23:09 +0000286 if source.getByteStream() is None:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000287 sysid = source.getSystemId()
Fred Drake910b2822004-10-20 11:08:35 +0000288 basehead = os.path.dirname(os.path.normpath(base))
Raymond Hettinger06d9b1f2004-10-20 08:21:19 +0000289 sysidfilename = os.path.join(basehead, sysid)
290 if os.path.isfile(sysidfilename):
291 source.setSystemId(sysidfilename)
292 f = open(sysidfilename, "rb")
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000293 else:
294 source.setSystemId(urlparse.urljoin(base, sysid))
Fred Drake0872e052000-09-26 17:23:09 +0000295 f = urllib.urlopen(source.getSystemId())
Fred Drake16f63292000-10-23 18:09:50 +0000296
Fred Drake0872e052000-09-26 17:23:09 +0000297 source.setByteStream(f)
Fred Drake16f63292000-10-23 18:09:50 +0000298
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000299 return source