blob: 625bc122ac2c21124ee9ac5f5526d7d2a9c7f5d8 [file] [log] [blame]
Fred Drakea12adfe2000-09-18 17:40:22 +00001"""\
2A library of useful helper classes to the SAX classes, for the
Fred Drake45cd9de2000-06-29 19:34:54 +00003convenience of application and driver writers.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Jeremy Hylton1afc1692008-06-18 20:49:58 +00006import os, urllib.parse, urllib.request
Guido van Rossum3b271052006-08-17 09:10:09 +00007from . import handler
8from . import xmlreader
Fred Drakea12adfe2000-09-18 17:40:22 +00009
Martin v. Löwisae207222004-05-06 02:22:43 +000010# See whether the xmlcharrefreplace error handler is
11# supported
12try:
13 from codecs import xmlcharrefreplace_errors
14 _error_handling = "xmlcharrefreplace"
15 del xmlcharrefreplace_errors
16except ImportError:
17 _error_handling = "strict"
18
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000019def __dict_replace(s, d):
20 """Replace substrings of a string using a dictionary."""
21 for key, value in d.items():
22 s = s.replace(key, value)
23 return s
Martin v. Löwis58af43f2000-09-24 21:31:06 +000024
Fred Drakea12adfe2000-09-18 17:40:22 +000025def escape(data, entities={}):
Fred Drake45cd9de2000-06-29 19:34:54 +000026 """Escape &, <, and > in a string of data.
Tim Peters0eadaac2003-04-24 16:02:54 +000027
Fred Drake16f63292000-10-23 18:09:50 +000028 You can escape other strings of data by passing a dictionary as
Fred Drake45cd9de2000-06-29 19:34:54 +000029 the optional entities parameter. The keys and values must all be
30 strings; each key will be replaced with its corresponding value.
31 """
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000032
33 # must do ampersand first
Fred Drakea12adfe2000-09-18 17:40:22 +000034 data = data.replace("&", "&amp;")
Fred Drakef55222d2002-10-28 17:29:01 +000035 data = data.replace(">", "&gt;")
36 data = data.replace("<", "&lt;")
37 if entities:
38 data = __dict_replace(data, entities)
39 return data
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000040
41def unescape(data, entities={}):
42 """Unescape &amp;, &lt;, and &gt; in a string of data.
43
44 You can unescape other strings of data by passing a dictionary as
45 the optional entities parameter. The keys and values must all be
46 strings; each key will be replaced with its corresponding value.
47 """
Fred Drakef55222d2002-10-28 17:29:01 +000048 data = data.replace("&lt;", "<")
49 data = data.replace("&gt;", ">")
Fred Drakef55222d2002-10-28 17:29:01 +000050 if entities:
51 data = __dict_replace(data, entities)
Fred Drake407fea52002-10-28 17:46:59 +000052 # must do ampersand last
Fred Drake6d890502002-10-28 18:09:41 +000053 return data.replace("&amp;", "&")
Fred Drake45cd9de2000-06-29 19:34:54 +000054
Fred Drakeacd32d32001-07-19 16:10:15 +000055def quoteattr(data, entities={}):
56 """Escape and quote an attribute value.
57
58 Escape &, <, and > in a string of data, then quote it for use as
59 an attribute value. The \" character will be escaped as well, if
60 necessary.
61
62 You can escape other strings of data by passing a dictionary as
63 the optional entities parameter. The keys and values must all be
64 strings; each key will be replaced with its corresponding value.
65 """
Thomas Wouters0e3f5912006-08-11 14:57:12 +000066 entities = entities.copy()
67 entities.update({'\n': '&#10;', '\r': '&#13;', '\t':'&#9;'})
Fred Drakeacd32d32001-07-19 16:10:15 +000068 data = escape(data, entities)
69 if '"' in data:
70 if "'" in data:
71 data = '"%s"' % data.replace('"', "&quot;")
72 else:
73 data = "'%s'" % data
74 else:
75 data = '"%s"' % data
76 return data
77
Fred Drakea12adfe2000-09-18 17:40:22 +000078
Fred Drake45cd9de2000-06-29 19:34:54 +000079class XMLGenerator(handler.ContentHandler):
80
R. David Murraya90032a2010-10-17 22:46:45 +000081 def __init__(self, out=None, encoding="iso-8859-1", short_empty_elements=False):
Fred Drakea12adfe2000-09-18 17:40:22 +000082 if out is None:
83 import sys
84 out = sys.stdout
Fred Drake45cd9de2000-06-29 19:34:54 +000085 handler.ContentHandler.__init__(self)
86 self._out = out
Lars Gustäbelc5cec512000-09-21 08:25:28 +000087 self._ns_contexts = [{}] # contains uri -> prefix dicts
88 self._current_context = self._ns_contexts[-1]
Lars Gustäbelfc643c32000-09-24 10:53:31 +000089 self._undeclared_ns_maps = []
Lars Gustäbelc5cec512000-09-21 08:25:28 +000090 self._encoding = encoding
R. David Murraya90032a2010-10-17 22:46:45 +000091 self._short_empty_elements = short_empty_elements
92 self._pending_start_element = False
Fred Drake45cd9de2000-06-29 19:34:54 +000093
Martin v. Löwisae207222004-05-06 02:22:43 +000094 def _write(self, text):
95 if isinstance(text, str):
96 self._out.write(text)
97 else:
98 self._out.write(text.encode(self._encoding, _error_handling))
99
Thomas Wouterscf297e42007-02-23 15:07:44 +0000100 def _qname(self, name):
101 """Builds a qualified name from a (ns_url, localname) pair"""
102 if name[0]:
Antoine Pitrou6b03ee62010-10-27 18:33:30 +0000103 # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is
104 # bound by definition to http://www.w3.org/XML/1998/namespace. It
105 # does not need to be declared and will not usually be found in
106 # self._current_context.
107 if 'http://www.w3.org/XML/1998/namespace' == name[0]:
108 return 'xml:' + name[1]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000109 # The name is in a non-empty namespace
110 prefix = self._current_context[name[0]]
111 if prefix:
112 # If it is not the default namespace, prepend the prefix
113 return prefix + ":" + name[1]
114 # Return the unqualified name
115 return name[1]
116
R. David Murraya90032a2010-10-17 22:46:45 +0000117 def _finish_pending_start_element(self,endElement=False):
118 if self._pending_start_element:
119 self._write('>')
120 self._pending_start_element = False
121
Fred Drake45cd9de2000-06-29 19:34:54 +0000122 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000123
Fred Drake45cd9de2000-06-29 19:34:54 +0000124 def startDocument(self):
Martin v. Löwisae207222004-05-06 02:22:43 +0000125 self._write('<?xml version="1.0" encoding="%s"?>\n' %
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000126 self._encoding)
Fred Drake45cd9de2000-06-29 19:34:54 +0000127
128 def startPrefixMapping(self, prefix, uri):
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000129 self._ns_contexts.append(self._current_context.copy())
130 self._current_context[uri] = prefix
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000131 self._undeclared_ns_maps.append((prefix, uri))
Fred Drake45cd9de2000-06-29 19:34:54 +0000132
133 def endPrefixMapping(self, prefix):
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000134 self._current_context = self._ns_contexts[-1]
135 del self._ns_contexts[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +0000136
137 def startElement(self, name, attrs):
R. David Murraya90032a2010-10-17 22:46:45 +0000138 self._finish_pending_start_element()
Martin v. Löwisae207222004-05-06 02:22:43 +0000139 self._write('<' + name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000140 for (name, value) in attrs.items():
Martin v. Löwisae207222004-05-06 02:22:43 +0000141 self._write(' %s=%s' % (name, quoteattr(value)))
R. David Murraya90032a2010-10-17 22:46:45 +0000142 if self._short_empty_elements:
143 self._pending_start_element = True
144 else:
145 self._write(">")
Fred Drake16f63292000-10-23 18:09:50 +0000146
Fred Drake45cd9de2000-06-29 19:34:54 +0000147 def endElement(self, name):
R. David Murraya90032a2010-10-17 22:46:45 +0000148 if self._pending_start_element:
149 self._write('/>')
150 self._pending_start_element = False
151 else:
152 self._write('</%s>' % name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000153
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000154 def startElementNS(self, name, qname, attrs):
R. David Murraya90032a2010-10-17 22:46:45 +0000155 self._finish_pending_start_element()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000156 self._write('<' + self._qname(name))
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000157
Thomas Wouterscf297e42007-02-23 15:07:44 +0000158 for prefix, uri in self._undeclared_ns_maps:
159 if prefix:
160 self._out.write(' xmlns:%s="%s"' % (prefix, uri))
161 else:
162 self._out.write(' xmlns="%s"' % uri)
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000163 self._undeclared_ns_maps = []
Fred Drake16f63292000-10-23 18:09:50 +0000164
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000165 for (name, value) in attrs.items():
Thomas Wouterscf297e42007-02-23 15:07:44 +0000166 self._write(' %s=%s' % (self._qname(name), quoteattr(value)))
R. David Murraya90032a2010-10-17 22:46:45 +0000167 if self._short_empty_elements:
168 self._pending_start_element = True
169 else:
170 self._write(">")
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000171
172 def endElementNS(self, name, qname):
R. David Murraya90032a2010-10-17 22:46:45 +0000173 if self._pending_start_element:
174 self._write('/>')
175 self._pending_start_element = False
176 else:
177 self._write('</%s>' % self._qname(name))
Fred Drake16f63292000-10-23 18:09:50 +0000178
Fred Drake45cd9de2000-06-29 19:34:54 +0000179 def characters(self, content):
R. David Murraya90032a2010-10-17 22:46:45 +0000180 if content:
181 self._finish_pending_start_element()
182 self._write(escape(content))
Fred Drake45cd9de2000-06-29 19:34:54 +0000183
184 def ignorableWhitespace(self, content):
R. David Murraya90032a2010-10-17 22:46:45 +0000185 if content:
186 self._finish_pending_start_element()
187 self._write(content)
Fred Drakea12adfe2000-09-18 17:40:22 +0000188
Fred Drake45cd9de2000-06-29 19:34:54 +0000189 def processingInstruction(self, target, data):
R. David Murraya90032a2010-10-17 22:46:45 +0000190 self._finish_pending_start_element()
Martin v. Löwisae207222004-05-06 02:22:43 +0000191 self._write('<?%s %s?>' % (target, data))
Fred Drake45cd9de2000-06-29 19:34:54 +0000192
Fred Drakea12adfe2000-09-18 17:40:22 +0000193
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000194class XMLFilterBase(xmlreader.XMLReader):
Fred Drake45cd9de2000-06-29 19:34:54 +0000195 """This class is designed to sit between an XMLReader and the
196 client application's event handlers. By default, it does nothing
197 but pass requests up to the reader and events on to the handlers
198 unmodified, but subclasses can override specific methods to modify
199 the event stream or the configuration requests as they pass
200 through."""
201
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000202 def __init__(self, parent = None):
203 xmlreader.XMLReader.__init__(self)
204 self._parent = parent
Fred Drake16f63292000-10-23 18:09:50 +0000205
Fred Drake45cd9de2000-06-29 19:34:54 +0000206 # ErrorHandler methods
207
208 def error(self, exception):
209 self._err_handler.error(exception)
210
211 def fatalError(self, exception):
212 self._err_handler.fatalError(exception)
213
214 def warning(self, exception):
215 self._err_handler.warning(exception)
216
217 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000218
Fred Drake45cd9de2000-06-29 19:34:54 +0000219 def setDocumentLocator(self, locator):
220 self._cont_handler.setDocumentLocator(locator)
Fred Drakea12adfe2000-09-18 17:40:22 +0000221
Fred Drake45cd9de2000-06-29 19:34:54 +0000222 def startDocument(self):
223 self._cont_handler.startDocument()
224
225 def endDocument(self):
226 self._cont_handler.endDocument()
227
228 def startPrefixMapping(self, prefix, uri):
229 self._cont_handler.startPrefixMapping(prefix, uri)
230
231 def endPrefixMapping(self, prefix):
232 self._cont_handler.endPrefixMapping(prefix)
233
234 def startElement(self, name, attrs):
235 self._cont_handler.startElement(name, attrs)
236
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000237 def endElement(self, name):
238 self._cont_handler.endElement(name)
239
240 def startElementNS(self, name, qname, attrs):
Martin v. Löwis0ea558f2004-05-06 02:04:21 +0000241 self._cont_handler.startElementNS(name, qname, attrs)
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000242
243 def endElementNS(self, name, qname):
244 self._cont_handler.endElementNS(name, qname)
Fred Drake45cd9de2000-06-29 19:34:54 +0000245
246 def characters(self, content):
247 self._cont_handler.characters(content)
248
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000249 def ignorableWhitespace(self, chars):
250 self._cont_handler.ignorableWhitespace(chars)
Fred Drake45cd9de2000-06-29 19:34:54 +0000251
252 def processingInstruction(self, target, data):
253 self._cont_handler.processingInstruction(target, data)
254
255 def skippedEntity(self, name):
256 self._cont_handler.skippedEntity(name)
257
258 # DTDHandler methods
259
260 def notationDecl(self, name, publicId, systemId):
261 self._dtd_handler.notationDecl(name, publicId, systemId)
262
263 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
264 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
265
266 # EntityResolver methods
267
268 def resolveEntity(self, publicId, systemId):
Fred Drakee4772f32005-02-03 17:31:39 +0000269 return self._ent_handler.resolveEntity(publicId, systemId)
Fred Drake45cd9de2000-06-29 19:34:54 +0000270
271 # XMLReader methods
272
273 def parse(self, source):
274 self._parent.setContentHandler(self)
275 self._parent.setErrorHandler(self)
276 self._parent.setEntityResolver(self)
277 self._parent.setDTDHandler(self)
278 self._parent.parse(source)
279
280 def setLocale(self, locale):
281 self._parent.setLocale(locale)
Fred Drakea12adfe2000-09-18 17:40:22 +0000282
Fred Drake45cd9de2000-06-29 19:34:54 +0000283 def getFeature(self, name):
284 return self._parent.getFeature(name)
285
286 def setFeature(self, name, state):
287 self._parent.setFeature(name, state)
288
289 def getProperty(self, name):
290 return self._parent.getProperty(name)
291
292 def setProperty(self, name, value):
293 self._parent.setProperty(name, value)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000294
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000295 # XMLFilter methods
296
297 def getParent(self):
298 return self._parent
299
300 def setParent(self, parent):
301 self._parent = parent
302
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000303# --- Utility functions
304
Georg Brandlfe991052009-09-16 15:54:04 +0000305def prepare_input_source(source, base=""):
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000306 """This function takes an InputSource and an optional base URL and
307 returns a fully resolved InputSource object ready for reading."""
Fred Drake16f63292000-10-23 18:09:50 +0000308
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000309 if isinstance(source, str):
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000310 source = xmlreader.InputSource(source)
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000311 elif hasattr(source, "read"):
312 f = source
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000313 source = xmlreader.InputSource()
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000314 source.setByteStream(f)
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000315 if hasattr(f, "name"):
Lars Gustäbel4ced5e72000-10-24 15:53:12 +0000316 source.setSystemId(f.name)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000317
Fred Drake0872e052000-09-26 17:23:09 +0000318 if source.getByteStream() is None:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000319 sysid = source.getSystemId()
Fred Drake910b2822004-10-20 11:08:35 +0000320 basehead = os.path.dirname(os.path.normpath(base))
Raymond Hettinger06d9b1f2004-10-20 08:21:19 +0000321 sysidfilename = os.path.join(basehead, sysid)
322 if os.path.isfile(sysidfilename):
323 source.setSystemId(sysidfilename)
324 f = open(sysidfilename, "rb")
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000325 else:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000326 source.setSystemId(urllib.parse.urljoin(base, sysid))
327 f = urllib.request.urlopen(source.getSystemId())
Fred Drake16f63292000-10-23 18:09:50 +0000328
Fred Drake0872e052000-09-26 17:23:09 +0000329 source.setByteStream(f)
Fred Drake16f63292000-10-23 18:09:50 +0000330
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000331 return source