blob: 2b0d2047356802cd66de4607e056f89c5bce178f [file] [log] [blame]
Fred Drakea12adfe2000-09-18 17:40:22 +00001"""\
2A library of useful helper classes to the SAX classes, for the
Fred Drake45cd9de2000-06-29 19:34:54 +00003convenience of application and driver writers.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Martin v. Löwis58af43f2000-09-24 21:31:06 +00006import os, urlparse, urllib, types
Guido van Rossum3b271052006-08-17 09:10:09 +00007from . import handler
8from . import xmlreader
Fred Drakea12adfe2000-09-18 17:40:22 +00009
Fred Drake95b4ec52000-12-16 01:45:11 +000010try:
11 _StringTypes = [types.StringType, types.UnicodeType]
12except AttributeError:
13 _StringTypes = [types.StringType]
14
Martin v. Löwisae207222004-05-06 02:22:43 +000015# See whether the xmlcharrefreplace error handler is
16# supported
17try:
18 from codecs import xmlcharrefreplace_errors
19 _error_handling = "xmlcharrefreplace"
20 del xmlcharrefreplace_errors
21except ImportError:
22 _error_handling = "strict"
23
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000024def __dict_replace(s, d):
25 """Replace substrings of a string using a dictionary."""
26 for key, value in d.items():
27 s = s.replace(key, value)
28 return s
Martin v. Löwis58af43f2000-09-24 21:31:06 +000029
Fred Drakea12adfe2000-09-18 17:40:22 +000030def escape(data, entities={}):
Fred Drake45cd9de2000-06-29 19:34:54 +000031 """Escape &, <, and > in a string of data.
Tim Peters0eadaac2003-04-24 16:02:54 +000032
Fred Drake16f63292000-10-23 18:09:50 +000033 You can escape other strings of data by passing a dictionary as
Fred Drake45cd9de2000-06-29 19:34:54 +000034 the optional entities parameter. The keys and values must all be
35 strings; each key will be replaced with its corresponding value.
36 """
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000037
38 # must do ampersand first
Fred Drakea12adfe2000-09-18 17:40:22 +000039 data = data.replace("&", "&amp;")
Fred Drakef55222d2002-10-28 17:29:01 +000040 data = data.replace(">", "&gt;")
41 data = data.replace("<", "&lt;")
42 if entities:
43 data = __dict_replace(data, entities)
44 return data
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000045
46def unescape(data, entities={}):
47 """Unescape &amp;, &lt;, and &gt; in a string of data.
48
49 You can unescape other strings of data by passing a dictionary as
50 the optional entities parameter. The keys and values must all be
51 strings; each key will be replaced with its corresponding value.
52 """
Fred Drakef55222d2002-10-28 17:29:01 +000053 data = data.replace("&lt;", "<")
54 data = data.replace("&gt;", ">")
Fred Drakef55222d2002-10-28 17:29:01 +000055 if entities:
56 data = __dict_replace(data, entities)
Fred Drake407fea52002-10-28 17:46:59 +000057 # must do ampersand last
Fred Drake6d890502002-10-28 18:09:41 +000058 return data.replace("&amp;", "&")
Fred Drake45cd9de2000-06-29 19:34:54 +000059
Fred Drakeacd32d32001-07-19 16:10:15 +000060def quoteattr(data, entities={}):
61 """Escape and quote an attribute value.
62
63 Escape &, <, and > in a string of data, then quote it for use as
64 an attribute value. The \" character will be escaped as well, if
65 necessary.
66
67 You can escape other strings of data by passing a dictionary as
68 the optional entities parameter. The keys and values must all be
69 strings; each key will be replaced with its corresponding value.
70 """
Thomas Wouters0e3f5912006-08-11 14:57:12 +000071 entities = entities.copy()
72 entities.update({'\n': '&#10;', '\r': '&#13;', '\t':'&#9;'})
Fred Drakeacd32d32001-07-19 16:10:15 +000073 data = escape(data, entities)
74 if '"' in data:
75 if "'" in data:
76 data = '"%s"' % data.replace('"', "&quot;")
77 else:
78 data = "'%s'" % data
79 else:
80 data = '"%s"' % data
81 return data
82
Fred Drakea12adfe2000-09-18 17:40:22 +000083
Fred Drake45cd9de2000-06-29 19:34:54 +000084class XMLGenerator(handler.ContentHandler):
85
Lars Gustäbelc5cec512000-09-21 08:25:28 +000086 def __init__(self, out=None, encoding="iso-8859-1"):
Fred Drakea12adfe2000-09-18 17:40:22 +000087 if out is None:
88 import sys
89 out = sys.stdout
Fred Drake45cd9de2000-06-29 19:34:54 +000090 handler.ContentHandler.__init__(self)
91 self._out = out
Lars Gustäbelc5cec512000-09-21 08:25:28 +000092 self._ns_contexts = [{}] # contains uri -> prefix dicts
93 self._current_context = self._ns_contexts[-1]
Lars Gustäbelfc643c32000-09-24 10:53:31 +000094 self._undeclared_ns_maps = []
Lars Gustäbelc5cec512000-09-21 08:25:28 +000095 self._encoding = encoding
Fred Drake45cd9de2000-06-29 19:34:54 +000096
Martin v. Löwisae207222004-05-06 02:22:43 +000097 def _write(self, text):
98 if isinstance(text, str):
99 self._out.write(text)
100 else:
101 self._out.write(text.encode(self._encoding, _error_handling))
102
Thomas Wouterscf297e42007-02-23 15:07:44 +0000103 def _qname(self, name):
104 """Builds a qualified name from a (ns_url, localname) pair"""
105 if name[0]:
106 # The name is in a non-empty namespace
107 prefix = self._current_context[name[0]]
108 if prefix:
109 # If it is not the default namespace, prepend the prefix
110 return prefix + ":" + name[1]
111 # Return the unqualified name
112 return name[1]
113
Fred Drake45cd9de2000-06-29 19:34:54 +0000114 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000115
Fred Drake45cd9de2000-06-29 19:34:54 +0000116 def startDocument(self):
Martin v. Löwisae207222004-05-06 02:22:43 +0000117 self._write('<?xml version="1.0" encoding="%s"?>\n' %
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000118 self._encoding)
Fred Drake45cd9de2000-06-29 19:34:54 +0000119
120 def startPrefixMapping(self, prefix, uri):
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000121 self._ns_contexts.append(self._current_context.copy())
122 self._current_context[uri] = prefix
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000123 self._undeclared_ns_maps.append((prefix, uri))
Fred Drake45cd9de2000-06-29 19:34:54 +0000124
125 def endPrefixMapping(self, prefix):
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000126 self._current_context = self._ns_contexts[-1]
127 del self._ns_contexts[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +0000128
129 def startElement(self, name, attrs):
Martin v. Löwisae207222004-05-06 02:22:43 +0000130 self._write('<' + name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000131 for (name, value) in attrs.items():
Martin v. Löwisae207222004-05-06 02:22:43 +0000132 self._write(' %s=%s' % (name, quoteattr(value)))
133 self._write('>')
Fred Drake16f63292000-10-23 18:09:50 +0000134
Fred Drake45cd9de2000-06-29 19:34:54 +0000135 def endElement(self, name):
Martin v. Löwisae207222004-05-06 02:22:43 +0000136 self._write('</%s>' % name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000137
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000138 def startElementNS(self, name, qname, attrs):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000139 self._write('<' + self._qname(name))
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000140
Thomas Wouterscf297e42007-02-23 15:07:44 +0000141 for prefix, uri in self._undeclared_ns_maps:
142 if prefix:
143 self._out.write(' xmlns:%s="%s"' % (prefix, uri))
144 else:
145 self._out.write(' xmlns="%s"' % uri)
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000146 self._undeclared_ns_maps = []
Fred Drake16f63292000-10-23 18:09:50 +0000147
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000148 for (name, value) in attrs.items():
Thomas Wouterscf297e42007-02-23 15:07:44 +0000149 self._write(' %s=%s' % (self._qname(name), quoteattr(value)))
Martin v. Löwisae207222004-05-06 02:22:43 +0000150 self._write('>')
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000151
152 def endElementNS(self, name, qname):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000153 self._write('</%s>' % self._qname(name))
Fred Drake16f63292000-10-23 18:09:50 +0000154
Fred Drake45cd9de2000-06-29 19:34:54 +0000155 def characters(self, content):
Martin v. Löwisae207222004-05-06 02:22:43 +0000156 self._write(escape(content))
Fred Drake45cd9de2000-06-29 19:34:54 +0000157
158 def ignorableWhitespace(self, content):
Martin v. Löwisae207222004-05-06 02:22:43 +0000159 self._write(content)
Fred Drakea12adfe2000-09-18 17:40:22 +0000160
Fred Drake45cd9de2000-06-29 19:34:54 +0000161 def processingInstruction(self, target, data):
Martin v. Löwisae207222004-05-06 02:22:43 +0000162 self._write('<?%s %s?>' % (target, data))
Fred Drake45cd9de2000-06-29 19:34:54 +0000163
Fred Drakea12adfe2000-09-18 17:40:22 +0000164
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000165class XMLFilterBase(xmlreader.XMLReader):
Fred Drake45cd9de2000-06-29 19:34:54 +0000166 """This class is designed to sit between an XMLReader and the
167 client application's event handlers. By default, it does nothing
168 but pass requests up to the reader and events on to the handlers
169 unmodified, but subclasses can override specific methods to modify
170 the event stream or the configuration requests as they pass
171 through."""
172
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000173 def __init__(self, parent = None):
174 xmlreader.XMLReader.__init__(self)
175 self._parent = parent
Fred Drake16f63292000-10-23 18:09:50 +0000176
Fred Drake45cd9de2000-06-29 19:34:54 +0000177 # ErrorHandler methods
178
179 def error(self, exception):
180 self._err_handler.error(exception)
181
182 def fatalError(self, exception):
183 self._err_handler.fatalError(exception)
184
185 def warning(self, exception):
186 self._err_handler.warning(exception)
187
188 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000189
Fred Drake45cd9de2000-06-29 19:34:54 +0000190 def setDocumentLocator(self, locator):
191 self._cont_handler.setDocumentLocator(locator)
Fred Drakea12adfe2000-09-18 17:40:22 +0000192
Fred Drake45cd9de2000-06-29 19:34:54 +0000193 def startDocument(self):
194 self._cont_handler.startDocument()
195
196 def endDocument(self):
197 self._cont_handler.endDocument()
198
199 def startPrefixMapping(self, prefix, uri):
200 self._cont_handler.startPrefixMapping(prefix, uri)
201
202 def endPrefixMapping(self, prefix):
203 self._cont_handler.endPrefixMapping(prefix)
204
205 def startElement(self, name, attrs):
206 self._cont_handler.startElement(name, attrs)
207
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000208 def endElement(self, name):
209 self._cont_handler.endElement(name)
210
211 def startElementNS(self, name, qname, attrs):
Martin v. Löwis0ea558f2004-05-06 02:04:21 +0000212 self._cont_handler.startElementNS(name, qname, attrs)
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000213
214 def endElementNS(self, name, qname):
215 self._cont_handler.endElementNS(name, qname)
Fred Drake45cd9de2000-06-29 19:34:54 +0000216
217 def characters(self, content):
218 self._cont_handler.characters(content)
219
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000220 def ignorableWhitespace(self, chars):
221 self._cont_handler.ignorableWhitespace(chars)
Fred Drake45cd9de2000-06-29 19:34:54 +0000222
223 def processingInstruction(self, target, data):
224 self._cont_handler.processingInstruction(target, data)
225
226 def skippedEntity(self, name):
227 self._cont_handler.skippedEntity(name)
228
229 # DTDHandler methods
230
231 def notationDecl(self, name, publicId, systemId):
232 self._dtd_handler.notationDecl(name, publicId, systemId)
233
234 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
235 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
236
237 # EntityResolver methods
238
239 def resolveEntity(self, publicId, systemId):
Fred Drakee4772f32005-02-03 17:31:39 +0000240 return self._ent_handler.resolveEntity(publicId, systemId)
Fred Drake45cd9de2000-06-29 19:34:54 +0000241
242 # XMLReader methods
243
244 def parse(self, source):
245 self._parent.setContentHandler(self)
246 self._parent.setErrorHandler(self)
247 self._parent.setEntityResolver(self)
248 self._parent.setDTDHandler(self)
249 self._parent.parse(source)
250
251 def setLocale(self, locale):
252 self._parent.setLocale(locale)
Fred Drakea12adfe2000-09-18 17:40:22 +0000253
Fred Drake45cd9de2000-06-29 19:34:54 +0000254 def getFeature(self, name):
255 return self._parent.getFeature(name)
256
257 def setFeature(self, name, state):
258 self._parent.setFeature(name, state)
259
260 def getProperty(self, name):
261 return self._parent.getProperty(name)
262
263 def setProperty(self, name, value):
264 self._parent.setProperty(name, value)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000265
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000266 # XMLFilter methods
267
268 def getParent(self):
269 return self._parent
270
271 def setParent(self, parent):
272 self._parent = parent
273
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000274# --- Utility functions
275
276def prepare_input_source(source, base = ""):
277 """This function takes an InputSource and an optional base URL and
278 returns a fully resolved InputSource object ready for reading."""
Fred Drake16f63292000-10-23 18:09:50 +0000279
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000280 if type(source) in _StringTypes:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000281 source = xmlreader.InputSource(source)
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000282 elif hasattr(source, "read"):
283 f = source
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000284 source = xmlreader.InputSource()
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000285 source.setByteStream(f)
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000286 if hasattr(f, "name"):
Lars Gustäbel4ced5e72000-10-24 15:53:12 +0000287 source.setSystemId(f.name)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000288
Fred Drake0872e052000-09-26 17:23:09 +0000289 if source.getByteStream() is None:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000290 sysid = source.getSystemId()
Fred Drake910b2822004-10-20 11:08:35 +0000291 basehead = os.path.dirname(os.path.normpath(base))
Raymond Hettinger06d9b1f2004-10-20 08:21:19 +0000292 sysidfilename = os.path.join(basehead, sysid)
293 if os.path.isfile(sysidfilename):
294 source.setSystemId(sysidfilename)
295 f = open(sysidfilename, "rb")
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000296 else:
297 source.setSystemId(urlparse.urljoin(base, sysid))
Fred Drake0872e052000-09-26 17:23:09 +0000298 f = urllib.urlopen(source.getSystemId())
Fred Drake16f63292000-10-23 18:09:50 +0000299
Fred Drake0872e052000-09-26 17:23:09 +0000300 source.setByteStream(f)
Fred Drake16f63292000-10-23 18:09:50 +0000301
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000302 return source