blob: a62183a848329d1df6eca1fad7d2bca70c99001f [file] [log] [blame]
Fred Drakea12adfe2000-09-18 17:40:22 +00001"""\
2A library of useful helper classes to the SAX classes, for the
Fred Drake45cd9de2000-06-29 19:34:54 +00003convenience of application and driver writers.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Jeremy Hylton1afc1692008-06-18 20:49:58 +00006import os, urllib.parse, urllib.request
Serhiy Storchaka88efc522013-02-10 14:29:52 +02007import io
Guido van Rossum3b271052006-08-17 09:10:09 +00008from . import handler
9from . import xmlreader
Fred Drakea12adfe2000-09-18 17:40:22 +000010
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000011def __dict_replace(s, d):
12 """Replace substrings of a string using a dictionary."""
13 for key, value in d.items():
14 s = s.replace(key, value)
15 return s
Martin v. Löwis58af43f2000-09-24 21:31:06 +000016
Fred Drakea12adfe2000-09-18 17:40:22 +000017def escape(data, entities={}):
Fred Drake45cd9de2000-06-29 19:34:54 +000018 """Escape &, <, and > in a string of data.
Tim Peters0eadaac2003-04-24 16:02:54 +000019
Fred Drake16f63292000-10-23 18:09:50 +000020 You can escape other strings of data by passing a dictionary as
Fred Drake45cd9de2000-06-29 19:34:54 +000021 the optional entities parameter. The keys and values must all be
22 strings; each key will be replaced with its corresponding value.
23 """
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000024
25 # must do ampersand first
Fred Drakea12adfe2000-09-18 17:40:22 +000026 data = data.replace("&", "&amp;")
Fred Drakef55222d2002-10-28 17:29:01 +000027 data = data.replace(">", "&gt;")
28 data = data.replace("<", "&lt;")
29 if entities:
30 data = __dict_replace(data, entities)
31 return data
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000032
33def unescape(data, entities={}):
34 """Unescape &amp;, &lt;, and &gt; in a string of data.
35
36 You can unescape other strings of data by passing a dictionary as
37 the optional entities parameter. The keys and values must all be
38 strings; each key will be replaced with its corresponding value.
39 """
Fred Drakef55222d2002-10-28 17:29:01 +000040 data = data.replace("&lt;", "<")
41 data = data.replace("&gt;", ">")
Fred Drakef55222d2002-10-28 17:29:01 +000042 if entities:
43 data = __dict_replace(data, entities)
Fred Drake407fea52002-10-28 17:46:59 +000044 # must do ampersand last
Fred Drake6d890502002-10-28 18:09:41 +000045 return data.replace("&amp;", "&")
Fred Drake45cd9de2000-06-29 19:34:54 +000046
Fred Drakeacd32d32001-07-19 16:10:15 +000047def quoteattr(data, entities={}):
48 """Escape and quote an attribute value.
49
50 Escape &, <, and > in a string of data, then quote it for use as
51 an attribute value. The \" character will be escaped as well, if
52 necessary.
53
54 You can escape other strings of data by passing a dictionary as
55 the optional entities parameter. The keys and values must all be
56 strings; each key will be replaced with its corresponding value.
57 """
Thomas Wouters0e3f5912006-08-11 14:57:12 +000058 entities = entities.copy()
59 entities.update({'\n': '&#10;', '\r': '&#13;', '\t':'&#9;'})
Fred Drakeacd32d32001-07-19 16:10:15 +000060 data = escape(data, entities)
61 if '"' in data:
62 if "'" in data:
63 data = '"%s"' % data.replace('"', "&quot;")
64 else:
65 data = "'%s'" % data
66 else:
67 data = '"%s"' % data
68 return data
69
Fred Drakea12adfe2000-09-18 17:40:22 +000070
Serhiy Storchaka88efc522013-02-10 14:29:52 +020071def _gettextwriter(out, encoding):
72 if out is None:
73 import sys
74 return sys.stdout
75
76 if isinstance(out, io.TextIOBase):
77 # use a text writer as is
78 return out
79
80 # wrap a binary writer with TextIOWrapper
81 if isinstance(out, io.RawIOBase):
82 # Keep the original file open when the TextIOWrapper is
83 # destroyed
84 class _wrapper:
85 __class__ = out.__class__
86 def __getattr__(self, name):
87 return getattr(out, name)
88 buffer = _wrapper()
89 buffer.close = lambda: None
90 else:
91 # This is to handle passed objects that aren't in the
92 # IOBase hierarchy, but just have a write method
93 buffer = io.BufferedIOBase()
94 buffer.writable = lambda: True
95 buffer.write = out.write
96 try:
97 # TextIOWrapper uses this methods to determine
98 # if BOM (for UTF-16, etc) should be added
99 buffer.seekable = out.seekable
100 buffer.tell = out.tell
101 except AttributeError:
102 pass
103 return io.TextIOWrapper(buffer, encoding=encoding,
104 errors='xmlcharrefreplace',
105 newline='\n',
106 write_through=True)
107
Fred Drake45cd9de2000-06-29 19:34:54 +0000108class XMLGenerator(handler.ContentHandler):
109
R. David Murraya90032a2010-10-17 22:46:45 +0000110 def __init__(self, out=None, encoding="iso-8859-1", short_empty_elements=False):
Fred Drake45cd9de2000-06-29 19:34:54 +0000111 handler.ContentHandler.__init__(self)
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200112 out = _gettextwriter(out, encoding)
113 self._write = out.write
114 self._flush = out.flush
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000115 self._ns_contexts = [{}] # contains uri -> prefix dicts
116 self._current_context = self._ns_contexts[-1]
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000117 self._undeclared_ns_maps = []
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000118 self._encoding = encoding
R. David Murraya90032a2010-10-17 22:46:45 +0000119 self._short_empty_elements = short_empty_elements
120 self._pending_start_element = False
Fred Drake45cd9de2000-06-29 19:34:54 +0000121
Thomas Wouterscf297e42007-02-23 15:07:44 +0000122 def _qname(self, name):
123 """Builds a qualified name from a (ns_url, localname) pair"""
124 if name[0]:
Antoine Pitrou6b03ee62010-10-27 18:33:30 +0000125 # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is
126 # bound by definition to http://www.w3.org/XML/1998/namespace. It
127 # does not need to be declared and will not usually be found in
128 # self._current_context.
129 if 'http://www.w3.org/XML/1998/namespace' == name[0]:
130 return 'xml:' + name[1]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000131 # The name is in a non-empty namespace
132 prefix = self._current_context[name[0]]
133 if prefix:
134 # If it is not the default namespace, prepend the prefix
135 return prefix + ":" + name[1]
136 # Return the unqualified name
137 return name[1]
138
R. David Murraya90032a2010-10-17 22:46:45 +0000139 def _finish_pending_start_element(self,endElement=False):
140 if self._pending_start_element:
141 self._write('>')
142 self._pending_start_element = False
143
Fred Drake45cd9de2000-06-29 19:34:54 +0000144 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000145
Fred Drake45cd9de2000-06-29 19:34:54 +0000146 def startDocument(self):
Martin v. Löwisae207222004-05-06 02:22:43 +0000147 self._write('<?xml version="1.0" encoding="%s"?>\n' %
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000148 self._encoding)
Fred Drake45cd9de2000-06-29 19:34:54 +0000149
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200150 def endDocument(self):
151 self._flush()
152
Fred Drake45cd9de2000-06-29 19:34:54 +0000153 def startPrefixMapping(self, prefix, uri):
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000154 self._ns_contexts.append(self._current_context.copy())
155 self._current_context[uri] = prefix
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000156 self._undeclared_ns_maps.append((prefix, uri))
Fred Drake45cd9de2000-06-29 19:34:54 +0000157
158 def endPrefixMapping(self, prefix):
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000159 self._current_context = self._ns_contexts[-1]
160 del self._ns_contexts[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +0000161
162 def startElement(self, name, attrs):
R. David Murraya90032a2010-10-17 22:46:45 +0000163 self._finish_pending_start_element()
Martin v. Löwisae207222004-05-06 02:22:43 +0000164 self._write('<' + name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000165 for (name, value) in attrs.items():
Martin v. Löwisae207222004-05-06 02:22:43 +0000166 self._write(' %s=%s' % (name, quoteattr(value)))
R. David Murraya90032a2010-10-17 22:46:45 +0000167 if self._short_empty_elements:
168 self._pending_start_element = True
169 else:
170 self._write(">")
Fred Drake16f63292000-10-23 18:09:50 +0000171
Fred Drake45cd9de2000-06-29 19:34:54 +0000172 def endElement(self, name):
R. David Murraya90032a2010-10-17 22:46:45 +0000173 if self._pending_start_element:
174 self._write('/>')
175 self._pending_start_element = False
176 else:
177 self._write('</%s>' % name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000178
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000179 def startElementNS(self, name, qname, attrs):
R. David Murraya90032a2010-10-17 22:46:45 +0000180 self._finish_pending_start_element()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000181 self._write('<' + self._qname(name))
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000182
Thomas Wouterscf297e42007-02-23 15:07:44 +0000183 for prefix, uri in self._undeclared_ns_maps:
184 if prefix:
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200185 self._write(' xmlns:%s="%s"' % (prefix, uri))
Thomas Wouterscf297e42007-02-23 15:07:44 +0000186 else:
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200187 self._write(' xmlns="%s"' % uri)
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000188 self._undeclared_ns_maps = []
Fred Drake16f63292000-10-23 18:09:50 +0000189
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000190 for (name, value) in attrs.items():
Thomas Wouterscf297e42007-02-23 15:07:44 +0000191 self._write(' %s=%s' % (self._qname(name), quoteattr(value)))
R. David Murraya90032a2010-10-17 22:46:45 +0000192 if self._short_empty_elements:
193 self._pending_start_element = True
194 else:
195 self._write(">")
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000196
197 def endElementNS(self, name, qname):
R. David Murraya90032a2010-10-17 22:46:45 +0000198 if self._pending_start_element:
199 self._write('/>')
200 self._pending_start_element = False
201 else:
202 self._write('</%s>' % self._qname(name))
Fred Drake16f63292000-10-23 18:09:50 +0000203
Fred Drake45cd9de2000-06-29 19:34:54 +0000204 def characters(self, content):
R. David Murraya90032a2010-10-17 22:46:45 +0000205 if content:
206 self._finish_pending_start_element()
207 self._write(escape(content))
Fred Drake45cd9de2000-06-29 19:34:54 +0000208
209 def ignorableWhitespace(self, content):
R. David Murraya90032a2010-10-17 22:46:45 +0000210 if content:
211 self._finish_pending_start_element()
212 self._write(content)
Fred Drakea12adfe2000-09-18 17:40:22 +0000213
Fred Drake45cd9de2000-06-29 19:34:54 +0000214 def processingInstruction(self, target, data):
R. David Murraya90032a2010-10-17 22:46:45 +0000215 self._finish_pending_start_element()
Martin v. Löwisae207222004-05-06 02:22:43 +0000216 self._write('<?%s %s?>' % (target, data))
Fred Drake45cd9de2000-06-29 19:34:54 +0000217
Fred Drakea12adfe2000-09-18 17:40:22 +0000218
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000219class XMLFilterBase(xmlreader.XMLReader):
Fred Drake45cd9de2000-06-29 19:34:54 +0000220 """This class is designed to sit between an XMLReader and the
221 client application's event handlers. By default, it does nothing
222 but pass requests up to the reader and events on to the handlers
223 unmodified, but subclasses can override specific methods to modify
224 the event stream or the configuration requests as they pass
225 through."""
226
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000227 def __init__(self, parent = None):
228 xmlreader.XMLReader.__init__(self)
229 self._parent = parent
Fred Drake16f63292000-10-23 18:09:50 +0000230
Fred Drake45cd9de2000-06-29 19:34:54 +0000231 # ErrorHandler methods
232
233 def error(self, exception):
234 self._err_handler.error(exception)
235
236 def fatalError(self, exception):
237 self._err_handler.fatalError(exception)
238
239 def warning(self, exception):
240 self._err_handler.warning(exception)
241
242 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000243
Fred Drake45cd9de2000-06-29 19:34:54 +0000244 def setDocumentLocator(self, locator):
245 self._cont_handler.setDocumentLocator(locator)
Fred Drakea12adfe2000-09-18 17:40:22 +0000246
Fred Drake45cd9de2000-06-29 19:34:54 +0000247 def startDocument(self):
248 self._cont_handler.startDocument()
249
250 def endDocument(self):
251 self._cont_handler.endDocument()
252
253 def startPrefixMapping(self, prefix, uri):
254 self._cont_handler.startPrefixMapping(prefix, uri)
255
256 def endPrefixMapping(self, prefix):
257 self._cont_handler.endPrefixMapping(prefix)
258
259 def startElement(self, name, attrs):
260 self._cont_handler.startElement(name, attrs)
261
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000262 def endElement(self, name):
263 self._cont_handler.endElement(name)
264
265 def startElementNS(self, name, qname, attrs):
Martin v. Löwis0ea558f2004-05-06 02:04:21 +0000266 self._cont_handler.startElementNS(name, qname, attrs)
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000267
268 def endElementNS(self, name, qname):
269 self._cont_handler.endElementNS(name, qname)
Fred Drake45cd9de2000-06-29 19:34:54 +0000270
271 def characters(self, content):
272 self._cont_handler.characters(content)
273
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000274 def ignorableWhitespace(self, chars):
275 self._cont_handler.ignorableWhitespace(chars)
Fred Drake45cd9de2000-06-29 19:34:54 +0000276
277 def processingInstruction(self, target, data):
278 self._cont_handler.processingInstruction(target, data)
279
280 def skippedEntity(self, name):
281 self._cont_handler.skippedEntity(name)
282
283 # DTDHandler methods
284
285 def notationDecl(self, name, publicId, systemId):
286 self._dtd_handler.notationDecl(name, publicId, systemId)
287
288 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
289 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
290
291 # EntityResolver methods
292
293 def resolveEntity(self, publicId, systemId):
Fred Drakee4772f32005-02-03 17:31:39 +0000294 return self._ent_handler.resolveEntity(publicId, systemId)
Fred Drake45cd9de2000-06-29 19:34:54 +0000295
296 # XMLReader methods
297
298 def parse(self, source):
299 self._parent.setContentHandler(self)
300 self._parent.setErrorHandler(self)
301 self._parent.setEntityResolver(self)
302 self._parent.setDTDHandler(self)
303 self._parent.parse(source)
304
305 def setLocale(self, locale):
306 self._parent.setLocale(locale)
Fred Drakea12adfe2000-09-18 17:40:22 +0000307
Fred Drake45cd9de2000-06-29 19:34:54 +0000308 def getFeature(self, name):
309 return self._parent.getFeature(name)
310
311 def setFeature(self, name, state):
312 self._parent.setFeature(name, state)
313
314 def getProperty(self, name):
315 return self._parent.getProperty(name)
316
317 def setProperty(self, name, value):
318 self._parent.setProperty(name, value)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000319
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000320 # XMLFilter methods
321
322 def getParent(self):
323 return self._parent
324
325 def setParent(self, parent):
326 self._parent = parent
327
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000328# --- Utility functions
329
Georg Brandlfe991052009-09-16 15:54:04 +0000330def prepare_input_source(source, base=""):
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000331 """This function takes an InputSource and an optional base URL and
332 returns a fully resolved InputSource object ready for reading."""
Fred Drake16f63292000-10-23 18:09:50 +0000333
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000334 if isinstance(source, str):
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000335 source = xmlreader.InputSource(source)
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000336 elif hasattr(source, "read"):
337 f = source
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000338 source = xmlreader.InputSource()
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000339 source.setByteStream(f)
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000340 if hasattr(f, "name"):
Lars Gustäbel4ced5e72000-10-24 15:53:12 +0000341 source.setSystemId(f.name)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000342
Fred Drake0872e052000-09-26 17:23:09 +0000343 if source.getByteStream() is None:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000344 sysid = source.getSystemId()
Fred Drake910b2822004-10-20 11:08:35 +0000345 basehead = os.path.dirname(os.path.normpath(base))
Raymond Hettinger06d9b1f2004-10-20 08:21:19 +0000346 sysidfilename = os.path.join(basehead, sysid)
347 if os.path.isfile(sysidfilename):
348 source.setSystemId(sysidfilename)
349 f = open(sysidfilename, "rb")
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000350 else:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000351 source.setSystemId(urllib.parse.urljoin(base, sysid))
352 f = urllib.request.urlopen(source.getSystemId())
Fred Drake16f63292000-10-23 18:09:50 +0000353
Fred Drake0872e052000-09-26 17:23:09 +0000354 source.setByteStream(f)
Fred Drake16f63292000-10-23 18:09:50 +0000355
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000356 return source