blob: 1d3d0ecc5f981d4bb3d0d58460bfce5969c87008 [file] [log] [blame]
Fred Drakea12adfe2000-09-18 17:40:22 +00001"""\
2A library of useful helper classes to the SAX classes, for the
Fred Drake45cd9de2000-06-29 19:34:54 +00003convenience of application and driver writers.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Jeremy Hylton1afc1692008-06-18 20:49:58 +00006import os, urllib.parse, urllib.request
Serhiy Storchaka88efc522013-02-10 14:29:52 +02007import io
Georg Brandlc502df42013-05-12 11:41:12 +02008import codecs
Guido van Rossum3b271052006-08-17 09:10:09 +00009from . import handler
10from . import xmlreader
Fred Drakea12adfe2000-09-18 17:40:22 +000011
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000012def __dict_replace(s, d):
13 """Replace substrings of a string using a dictionary."""
14 for key, value in d.items():
15 s = s.replace(key, value)
16 return s
Martin v. Löwis58af43f2000-09-24 21:31:06 +000017
Fred Drakea12adfe2000-09-18 17:40:22 +000018def escape(data, entities={}):
Fred Drake45cd9de2000-06-29 19:34:54 +000019 """Escape &, <, and > in a string of data.
Tim Peters0eadaac2003-04-24 16:02:54 +000020
Fred Drake16f63292000-10-23 18:09:50 +000021 You can escape other strings of data by passing a dictionary as
Fred Drake45cd9de2000-06-29 19:34:54 +000022 the optional entities parameter. The keys and values must all be
23 strings; each key will be replaced with its corresponding value.
24 """
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000025
26 # must do ampersand first
Fred Drakea12adfe2000-09-18 17:40:22 +000027 data = data.replace("&", "&amp;")
Fred Drakef55222d2002-10-28 17:29:01 +000028 data = data.replace(">", "&gt;")
29 data = data.replace("<", "&lt;")
30 if entities:
31 data = __dict_replace(data, entities)
32 return data
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000033
34def unescape(data, entities={}):
35 """Unescape &amp;, &lt;, and &gt; in a string of data.
36
37 You can unescape other strings of data by passing a dictionary as
38 the optional entities parameter. The keys and values must all be
39 strings; each key will be replaced with its corresponding value.
40 """
Fred Drakef55222d2002-10-28 17:29:01 +000041 data = data.replace("&lt;", "<")
42 data = data.replace("&gt;", ">")
Fred Drakef55222d2002-10-28 17:29:01 +000043 if entities:
44 data = __dict_replace(data, entities)
Fred Drake407fea52002-10-28 17:46:59 +000045 # must do ampersand last
Fred Drake6d890502002-10-28 18:09:41 +000046 return data.replace("&amp;", "&")
Fred Drake45cd9de2000-06-29 19:34:54 +000047
Fred Drakeacd32d32001-07-19 16:10:15 +000048def quoteattr(data, entities={}):
49 """Escape and quote an attribute value.
50
51 Escape &, <, and > in a string of data, then quote it for use as
52 an attribute value. The \" character will be escaped as well, if
53 necessary.
54
55 You can escape other strings of data by passing a dictionary as
56 the optional entities parameter. The keys and values must all be
57 strings; each key will be replaced with its corresponding value.
58 """
Thomas Wouters0e3f5912006-08-11 14:57:12 +000059 entities = entities.copy()
60 entities.update({'\n': '&#10;', '\r': '&#13;', '\t':'&#9;'})
Fred Drakeacd32d32001-07-19 16:10:15 +000061 data = escape(data, entities)
62 if '"' in data:
63 if "'" in data:
64 data = '"%s"' % data.replace('"', "&quot;")
65 else:
66 data = "'%s'" % data
67 else:
68 data = '"%s"' % data
69 return data
70
Fred Drakea12adfe2000-09-18 17:40:22 +000071
Serhiy Storchaka88efc522013-02-10 14:29:52 +020072def _gettextwriter(out, encoding):
73 if out is None:
74 import sys
75 return sys.stdout
76
77 if isinstance(out, io.TextIOBase):
78 # use a text writer as is
79 return out
80
Georg Brandlc502df42013-05-12 11:41:12 +020081 if isinstance(out, (codecs.StreamWriter, codecs.StreamReaderWriter)):
82 # use a codecs stream writer as is
83 return out
84
Serhiy Storchaka88efc522013-02-10 14:29:52 +020085 # wrap a binary writer with TextIOWrapper
86 if isinstance(out, io.RawIOBase):
87 # Keep the original file open when the TextIOWrapper is
88 # destroyed
89 class _wrapper:
90 __class__ = out.__class__
91 def __getattr__(self, name):
92 return getattr(out, name)
93 buffer = _wrapper()
94 buffer.close = lambda: None
95 else:
96 # This is to handle passed objects that aren't in the
97 # IOBase hierarchy, but just have a write method
98 buffer = io.BufferedIOBase()
99 buffer.writable = lambda: True
100 buffer.write = out.write
101 try:
102 # TextIOWrapper uses this methods to determine
103 # if BOM (for UTF-16, etc) should be added
104 buffer.seekable = out.seekable
105 buffer.tell = out.tell
106 except AttributeError:
107 pass
108 return io.TextIOWrapper(buffer, encoding=encoding,
109 errors='xmlcharrefreplace',
110 newline='\n',
111 write_through=True)
112
Fred Drake45cd9de2000-06-29 19:34:54 +0000113class XMLGenerator(handler.ContentHandler):
114
R. David Murraya90032a2010-10-17 22:46:45 +0000115 def __init__(self, out=None, encoding="iso-8859-1", short_empty_elements=False):
Fred Drake45cd9de2000-06-29 19:34:54 +0000116 handler.ContentHandler.__init__(self)
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200117 out = _gettextwriter(out, encoding)
118 self._write = out.write
119 self._flush = out.flush
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000120 self._ns_contexts = [{}] # contains uri -> prefix dicts
121 self._current_context = self._ns_contexts[-1]
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000122 self._undeclared_ns_maps = []
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000123 self._encoding = encoding
R. David Murraya90032a2010-10-17 22:46:45 +0000124 self._short_empty_elements = short_empty_elements
125 self._pending_start_element = False
Fred Drake45cd9de2000-06-29 19:34:54 +0000126
Thomas Wouterscf297e42007-02-23 15:07:44 +0000127 def _qname(self, name):
128 """Builds a qualified name from a (ns_url, localname) pair"""
129 if name[0]:
Antoine Pitrou6b03ee62010-10-27 18:33:30 +0000130 # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is
131 # bound by definition to http://www.w3.org/XML/1998/namespace. It
132 # does not need to be declared and will not usually be found in
133 # self._current_context.
134 if 'http://www.w3.org/XML/1998/namespace' == name[0]:
135 return 'xml:' + name[1]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000136 # The name is in a non-empty namespace
137 prefix = self._current_context[name[0]]
138 if prefix:
139 # If it is not the default namespace, prepend the prefix
140 return prefix + ":" + name[1]
141 # Return the unqualified name
142 return name[1]
143
R. David Murraya90032a2010-10-17 22:46:45 +0000144 def _finish_pending_start_element(self,endElement=False):
145 if self._pending_start_element:
146 self._write('>')
147 self._pending_start_element = False
148
Fred Drake45cd9de2000-06-29 19:34:54 +0000149 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000150
Fred Drake45cd9de2000-06-29 19:34:54 +0000151 def startDocument(self):
Martin v. Löwisae207222004-05-06 02:22:43 +0000152 self._write('<?xml version="1.0" encoding="%s"?>\n' %
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000153 self._encoding)
Fred Drake45cd9de2000-06-29 19:34:54 +0000154
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200155 def endDocument(self):
156 self._flush()
157
Fred Drake45cd9de2000-06-29 19:34:54 +0000158 def startPrefixMapping(self, prefix, uri):
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000159 self._ns_contexts.append(self._current_context.copy())
160 self._current_context[uri] = prefix
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000161 self._undeclared_ns_maps.append((prefix, uri))
Fred Drake45cd9de2000-06-29 19:34:54 +0000162
163 def endPrefixMapping(self, prefix):
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000164 self._current_context = self._ns_contexts[-1]
165 del self._ns_contexts[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +0000166
167 def startElement(self, name, attrs):
R. David Murraya90032a2010-10-17 22:46:45 +0000168 self._finish_pending_start_element()
Martin v. Löwisae207222004-05-06 02:22:43 +0000169 self._write('<' + name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000170 for (name, value) in attrs.items():
Martin v. Löwisae207222004-05-06 02:22:43 +0000171 self._write(' %s=%s' % (name, quoteattr(value)))
R. David Murraya90032a2010-10-17 22:46:45 +0000172 if self._short_empty_elements:
173 self._pending_start_element = True
174 else:
175 self._write(">")
Fred Drake16f63292000-10-23 18:09:50 +0000176
Fred Drake45cd9de2000-06-29 19:34:54 +0000177 def endElement(self, name):
R. David Murraya90032a2010-10-17 22:46:45 +0000178 if self._pending_start_element:
179 self._write('/>')
180 self._pending_start_element = False
181 else:
182 self._write('</%s>' % name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000183
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000184 def startElementNS(self, name, qname, attrs):
R. David Murraya90032a2010-10-17 22:46:45 +0000185 self._finish_pending_start_element()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000186 self._write('<' + self._qname(name))
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000187
Thomas Wouterscf297e42007-02-23 15:07:44 +0000188 for prefix, uri in self._undeclared_ns_maps:
189 if prefix:
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200190 self._write(' xmlns:%s="%s"' % (prefix, uri))
Thomas Wouterscf297e42007-02-23 15:07:44 +0000191 else:
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200192 self._write(' xmlns="%s"' % uri)
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000193 self._undeclared_ns_maps = []
Fred Drake16f63292000-10-23 18:09:50 +0000194
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000195 for (name, value) in attrs.items():
Thomas Wouterscf297e42007-02-23 15:07:44 +0000196 self._write(' %s=%s' % (self._qname(name), quoteattr(value)))
R. David Murraya90032a2010-10-17 22:46:45 +0000197 if self._short_empty_elements:
198 self._pending_start_element = True
199 else:
200 self._write(">")
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000201
202 def endElementNS(self, name, qname):
R. David Murraya90032a2010-10-17 22:46:45 +0000203 if self._pending_start_element:
204 self._write('/>')
205 self._pending_start_element = False
206 else:
207 self._write('</%s>' % self._qname(name))
Fred Drake16f63292000-10-23 18:09:50 +0000208
Fred Drake45cd9de2000-06-29 19:34:54 +0000209 def characters(self, content):
R. David Murraya90032a2010-10-17 22:46:45 +0000210 if content:
211 self._finish_pending_start_element()
Serhiy Storchaka3eab6b32013-05-12 17:31:16 +0300212 if not isinstance(content, str):
213 content = str(content, self._encoding)
R. David Murraya90032a2010-10-17 22:46:45 +0000214 self._write(escape(content))
Fred Drake45cd9de2000-06-29 19:34:54 +0000215
216 def ignorableWhitespace(self, content):
R. David Murraya90032a2010-10-17 22:46:45 +0000217 if content:
218 self._finish_pending_start_element()
Serhiy Storchaka3eab6b32013-05-12 17:31:16 +0300219 if not isinstance(content, str):
220 content = str(content, self._encoding)
R. David Murraya90032a2010-10-17 22:46:45 +0000221 self._write(content)
Fred Drakea12adfe2000-09-18 17:40:22 +0000222
Fred Drake45cd9de2000-06-29 19:34:54 +0000223 def processingInstruction(self, target, data):
R. David Murraya90032a2010-10-17 22:46:45 +0000224 self._finish_pending_start_element()
Martin v. Löwisae207222004-05-06 02:22:43 +0000225 self._write('<?%s %s?>' % (target, data))
Fred Drake45cd9de2000-06-29 19:34:54 +0000226
Fred Drakea12adfe2000-09-18 17:40:22 +0000227
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000228class XMLFilterBase(xmlreader.XMLReader):
Fred Drake45cd9de2000-06-29 19:34:54 +0000229 """This class is designed to sit between an XMLReader and the
230 client application's event handlers. By default, it does nothing
231 but pass requests up to the reader and events on to the handlers
232 unmodified, but subclasses can override specific methods to modify
233 the event stream or the configuration requests as they pass
234 through."""
235
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000236 def __init__(self, parent = None):
237 xmlreader.XMLReader.__init__(self)
238 self._parent = parent
Fred Drake16f63292000-10-23 18:09:50 +0000239
Fred Drake45cd9de2000-06-29 19:34:54 +0000240 # ErrorHandler methods
241
242 def error(self, exception):
243 self._err_handler.error(exception)
244
245 def fatalError(self, exception):
246 self._err_handler.fatalError(exception)
247
248 def warning(self, exception):
249 self._err_handler.warning(exception)
250
251 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000252
Fred Drake45cd9de2000-06-29 19:34:54 +0000253 def setDocumentLocator(self, locator):
254 self._cont_handler.setDocumentLocator(locator)
Fred Drakea12adfe2000-09-18 17:40:22 +0000255
Fred Drake45cd9de2000-06-29 19:34:54 +0000256 def startDocument(self):
257 self._cont_handler.startDocument()
258
259 def endDocument(self):
260 self._cont_handler.endDocument()
261
262 def startPrefixMapping(self, prefix, uri):
263 self._cont_handler.startPrefixMapping(prefix, uri)
264
265 def endPrefixMapping(self, prefix):
266 self._cont_handler.endPrefixMapping(prefix)
267
268 def startElement(self, name, attrs):
269 self._cont_handler.startElement(name, attrs)
270
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000271 def endElement(self, name):
272 self._cont_handler.endElement(name)
273
274 def startElementNS(self, name, qname, attrs):
Martin v. Löwis0ea558f2004-05-06 02:04:21 +0000275 self._cont_handler.startElementNS(name, qname, attrs)
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000276
277 def endElementNS(self, name, qname):
278 self._cont_handler.endElementNS(name, qname)
Fred Drake45cd9de2000-06-29 19:34:54 +0000279
280 def characters(self, content):
281 self._cont_handler.characters(content)
282
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000283 def ignorableWhitespace(self, chars):
284 self._cont_handler.ignorableWhitespace(chars)
Fred Drake45cd9de2000-06-29 19:34:54 +0000285
286 def processingInstruction(self, target, data):
287 self._cont_handler.processingInstruction(target, data)
288
289 def skippedEntity(self, name):
290 self._cont_handler.skippedEntity(name)
291
292 # DTDHandler methods
293
294 def notationDecl(self, name, publicId, systemId):
295 self._dtd_handler.notationDecl(name, publicId, systemId)
296
297 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
298 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
299
300 # EntityResolver methods
301
302 def resolveEntity(self, publicId, systemId):
Fred Drakee4772f32005-02-03 17:31:39 +0000303 return self._ent_handler.resolveEntity(publicId, systemId)
Fred Drake45cd9de2000-06-29 19:34:54 +0000304
305 # XMLReader methods
306
307 def parse(self, source):
308 self._parent.setContentHandler(self)
309 self._parent.setErrorHandler(self)
310 self._parent.setEntityResolver(self)
311 self._parent.setDTDHandler(self)
312 self._parent.parse(source)
313
314 def setLocale(self, locale):
315 self._parent.setLocale(locale)
Fred Drakea12adfe2000-09-18 17:40:22 +0000316
Fred Drake45cd9de2000-06-29 19:34:54 +0000317 def getFeature(self, name):
318 return self._parent.getFeature(name)
319
320 def setFeature(self, name, state):
321 self._parent.setFeature(name, state)
322
323 def getProperty(self, name):
324 return self._parent.getProperty(name)
325
326 def setProperty(self, name, value):
327 self._parent.setProperty(name, value)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000328
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000329 # XMLFilter methods
330
331 def getParent(self):
332 return self._parent
333
334 def setParent(self, parent):
335 self._parent = parent
336
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000337# --- Utility functions
338
Georg Brandlfe991052009-09-16 15:54:04 +0000339def prepare_input_source(source, base=""):
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000340 """This function takes an InputSource and an optional base URL and
341 returns a fully resolved InputSource object ready for reading."""
Fred Drake16f63292000-10-23 18:09:50 +0000342
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000343 if isinstance(source, str):
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000344 source = xmlreader.InputSource(source)
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000345 elif hasattr(source, "read"):
346 f = source
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000347 source = xmlreader.InputSource()
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000348 source.setByteStream(f)
Serhiy Storchakafc8e9b02014-11-27 22:13:16 +0200349 if hasattr(f, "name") and isinstance(f.name, str):
Lars Gustäbel4ced5e72000-10-24 15:53:12 +0000350 source.setSystemId(f.name)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000351
Fred Drake0872e052000-09-26 17:23:09 +0000352 if source.getByteStream() is None:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000353 sysid = source.getSystemId()
Fred Drake910b2822004-10-20 11:08:35 +0000354 basehead = os.path.dirname(os.path.normpath(base))
Raymond Hettinger06d9b1f2004-10-20 08:21:19 +0000355 sysidfilename = os.path.join(basehead, sysid)
356 if os.path.isfile(sysidfilename):
357 source.setSystemId(sysidfilename)
358 f = open(sysidfilename, "rb")
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000359 else:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000360 source.setSystemId(urllib.parse.urljoin(base, sysid))
361 f = urllib.request.urlopen(source.getSystemId())
Fred Drake16f63292000-10-23 18:09:50 +0000362
Fred Drake0872e052000-09-26 17:23:09 +0000363 source.setByteStream(f)
Fred Drake16f63292000-10-23 18:09:50 +0000364
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000365 return source