blob: b05e8150a6b3c94ab83bce746ca3fa8511417abd [file] [log] [blame]
Fred Drakea12adfe2000-09-18 17:40:22 +00001"""\
2A library of useful helper classes to the SAX classes, for the
Fred Drake45cd9de2000-06-29 19:34:54 +00003convenience of application and driver writers.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Jeremy Hylton1afc1692008-06-18 20:49:58 +00006import os, urllib.parse, urllib.request
Guido van Rossum3b271052006-08-17 09:10:09 +00007from . import handler
8from . import xmlreader
Fred Drakea12adfe2000-09-18 17:40:22 +00009
Martin v. Löwisae207222004-05-06 02:22:43 +000010# See whether the xmlcharrefreplace error handler is
11# supported
12try:
13 from codecs import xmlcharrefreplace_errors
14 _error_handling = "xmlcharrefreplace"
15 del xmlcharrefreplace_errors
16except ImportError:
17 _error_handling = "strict"
18
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000019def __dict_replace(s, d):
20 """Replace substrings of a string using a dictionary."""
21 for key, value in d.items():
22 s = s.replace(key, value)
23 return s
Martin v. Löwis58af43f2000-09-24 21:31:06 +000024
Fred Drakea12adfe2000-09-18 17:40:22 +000025def escape(data, entities={}):
Fred Drake45cd9de2000-06-29 19:34:54 +000026 """Escape &, <, and > in a string of data.
Tim Peters0eadaac2003-04-24 16:02:54 +000027
Fred Drake16f63292000-10-23 18:09:50 +000028 You can escape other strings of data by passing a dictionary as
Fred Drake45cd9de2000-06-29 19:34:54 +000029 the optional entities parameter. The keys and values must all be
30 strings; each key will be replaced with its corresponding value.
31 """
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000032
33 # must do ampersand first
Fred Drakea12adfe2000-09-18 17:40:22 +000034 data = data.replace("&", "&amp;")
Fred Drakef55222d2002-10-28 17:29:01 +000035 data = data.replace(">", "&gt;")
36 data = data.replace("<", "&lt;")
37 if entities:
38 data = __dict_replace(data, entities)
39 return data
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000040
41def unescape(data, entities={}):
42 """Unescape &amp;, &lt;, and &gt; in a string of data.
43
44 You can unescape other strings of data by passing a dictionary as
45 the optional entities parameter. The keys and values must all be
46 strings; each key will be replaced with its corresponding value.
47 """
Fred Drakef55222d2002-10-28 17:29:01 +000048 data = data.replace("&lt;", "<")
49 data = data.replace("&gt;", ">")
Fred Drakef55222d2002-10-28 17:29:01 +000050 if entities:
51 data = __dict_replace(data, entities)
Fred Drake407fea52002-10-28 17:46:59 +000052 # must do ampersand last
Fred Drake6d890502002-10-28 18:09:41 +000053 return data.replace("&amp;", "&")
Fred Drake45cd9de2000-06-29 19:34:54 +000054
Fred Drakeacd32d32001-07-19 16:10:15 +000055def quoteattr(data, entities={}):
56 """Escape and quote an attribute value.
57
58 Escape &, <, and > in a string of data, then quote it for use as
59 an attribute value. The \" character will be escaped as well, if
60 necessary.
61
62 You can escape other strings of data by passing a dictionary as
63 the optional entities parameter. The keys and values must all be
64 strings; each key will be replaced with its corresponding value.
65 """
Thomas Wouters0e3f5912006-08-11 14:57:12 +000066 entities = entities.copy()
67 entities.update({'\n': '&#10;', '\r': '&#13;', '\t':'&#9;'})
Fred Drakeacd32d32001-07-19 16:10:15 +000068 data = escape(data, entities)
69 if '"' in data:
70 if "'" in data:
71 data = '"%s"' % data.replace('"', "&quot;")
72 else:
73 data = "'%s'" % data
74 else:
75 data = '"%s"' % data
76 return data
77
Fred Drakea12adfe2000-09-18 17:40:22 +000078
Fred Drake45cd9de2000-06-29 19:34:54 +000079class XMLGenerator(handler.ContentHandler):
80
Lars Gustäbelc5cec512000-09-21 08:25:28 +000081 def __init__(self, out=None, encoding="iso-8859-1"):
Fred Drakea12adfe2000-09-18 17:40:22 +000082 if out is None:
83 import sys
84 out = sys.stdout
Fred Drake45cd9de2000-06-29 19:34:54 +000085 handler.ContentHandler.__init__(self)
86 self._out = out
Lars Gustäbelc5cec512000-09-21 08:25:28 +000087 self._ns_contexts = [{}] # contains uri -> prefix dicts
88 self._current_context = self._ns_contexts[-1]
Lars Gustäbelfc643c32000-09-24 10:53:31 +000089 self._undeclared_ns_maps = []
Lars Gustäbelc5cec512000-09-21 08:25:28 +000090 self._encoding = encoding
Fred Drake45cd9de2000-06-29 19:34:54 +000091
Martin v. Löwisae207222004-05-06 02:22:43 +000092 def _write(self, text):
93 if isinstance(text, str):
94 self._out.write(text)
95 else:
96 self._out.write(text.encode(self._encoding, _error_handling))
97
Thomas Wouterscf297e42007-02-23 15:07:44 +000098 def _qname(self, name):
99 """Builds a qualified name from a (ns_url, localname) pair"""
100 if name[0]:
Antoine Pitrou0619ae72010-10-27 18:37:51 +0000101 # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is
102 # bound by definition to http://www.w3.org/XML/1998/namespace. It
103 # does not need to be declared and will not usually be found in
104 # self._current_context.
105 if 'http://www.w3.org/XML/1998/namespace' == name[0]:
106 return 'xml:' + name[1]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000107 # The name is in a non-empty namespace
108 prefix = self._current_context[name[0]]
109 if prefix:
110 # If it is not the default namespace, prepend the prefix
111 return prefix + ":" + name[1]
112 # Return the unqualified name
113 return name[1]
114
Fred Drake45cd9de2000-06-29 19:34:54 +0000115 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000116
Fred Drake45cd9de2000-06-29 19:34:54 +0000117 def startDocument(self):
Martin v. Löwisae207222004-05-06 02:22:43 +0000118 self._write('<?xml version="1.0" encoding="%s"?>\n' %
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000119 self._encoding)
Fred Drake45cd9de2000-06-29 19:34:54 +0000120
121 def startPrefixMapping(self, prefix, uri):
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000122 self._ns_contexts.append(self._current_context.copy())
123 self._current_context[uri] = prefix
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000124 self._undeclared_ns_maps.append((prefix, uri))
Fred Drake45cd9de2000-06-29 19:34:54 +0000125
126 def endPrefixMapping(self, prefix):
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000127 self._current_context = self._ns_contexts[-1]
128 del self._ns_contexts[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +0000129
130 def startElement(self, name, attrs):
Martin v. Löwisae207222004-05-06 02:22:43 +0000131 self._write('<' + name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000132 for (name, value) in attrs.items():
Martin v. Löwisae207222004-05-06 02:22:43 +0000133 self._write(' %s=%s' % (name, quoteattr(value)))
134 self._write('>')
Fred Drake16f63292000-10-23 18:09:50 +0000135
Fred Drake45cd9de2000-06-29 19:34:54 +0000136 def endElement(self, name):
Martin v. Löwisae207222004-05-06 02:22:43 +0000137 self._write('</%s>' % name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000138
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000139 def startElementNS(self, name, qname, attrs):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000140 self._write('<' + self._qname(name))
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000141
Thomas Wouterscf297e42007-02-23 15:07:44 +0000142 for prefix, uri in self._undeclared_ns_maps:
143 if prefix:
144 self._out.write(' xmlns:%s="%s"' % (prefix, uri))
145 else:
146 self._out.write(' xmlns="%s"' % uri)
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000147 self._undeclared_ns_maps = []
Fred Drake16f63292000-10-23 18:09:50 +0000148
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000149 for (name, value) in attrs.items():
Thomas Wouterscf297e42007-02-23 15:07:44 +0000150 self._write(' %s=%s' % (self._qname(name), quoteattr(value)))
Martin v. Löwisae207222004-05-06 02:22:43 +0000151 self._write('>')
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000152
153 def endElementNS(self, name, qname):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000154 self._write('</%s>' % self._qname(name))
Fred Drake16f63292000-10-23 18:09:50 +0000155
Fred Drake45cd9de2000-06-29 19:34:54 +0000156 def characters(self, content):
Martin v. Löwisae207222004-05-06 02:22:43 +0000157 self._write(escape(content))
Fred Drake45cd9de2000-06-29 19:34:54 +0000158
159 def ignorableWhitespace(self, content):
Martin v. Löwisae207222004-05-06 02:22:43 +0000160 self._write(content)
Fred Drakea12adfe2000-09-18 17:40:22 +0000161
Fred Drake45cd9de2000-06-29 19:34:54 +0000162 def processingInstruction(self, target, data):
Martin v. Löwisae207222004-05-06 02:22:43 +0000163 self._write('<?%s %s?>' % (target, data))
Fred Drake45cd9de2000-06-29 19:34:54 +0000164
Fred Drakea12adfe2000-09-18 17:40:22 +0000165
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000166class XMLFilterBase(xmlreader.XMLReader):
Fred Drake45cd9de2000-06-29 19:34:54 +0000167 """This class is designed to sit between an XMLReader and the
168 client application's event handlers. By default, it does nothing
169 but pass requests up to the reader and events on to the handlers
170 unmodified, but subclasses can override specific methods to modify
171 the event stream or the configuration requests as they pass
172 through."""
173
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000174 def __init__(self, parent = None):
175 xmlreader.XMLReader.__init__(self)
176 self._parent = parent
Fred Drake16f63292000-10-23 18:09:50 +0000177
Fred Drake45cd9de2000-06-29 19:34:54 +0000178 # ErrorHandler methods
179
180 def error(self, exception):
181 self._err_handler.error(exception)
182
183 def fatalError(self, exception):
184 self._err_handler.fatalError(exception)
185
186 def warning(self, exception):
187 self._err_handler.warning(exception)
188
189 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000190
Fred Drake45cd9de2000-06-29 19:34:54 +0000191 def setDocumentLocator(self, locator):
192 self._cont_handler.setDocumentLocator(locator)
Fred Drakea12adfe2000-09-18 17:40:22 +0000193
Fred Drake45cd9de2000-06-29 19:34:54 +0000194 def startDocument(self):
195 self._cont_handler.startDocument()
196
197 def endDocument(self):
198 self._cont_handler.endDocument()
199
200 def startPrefixMapping(self, prefix, uri):
201 self._cont_handler.startPrefixMapping(prefix, uri)
202
203 def endPrefixMapping(self, prefix):
204 self._cont_handler.endPrefixMapping(prefix)
205
206 def startElement(self, name, attrs):
207 self._cont_handler.startElement(name, attrs)
208
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000209 def endElement(self, name):
210 self._cont_handler.endElement(name)
211
212 def startElementNS(self, name, qname, attrs):
Martin v. Löwis0ea558f2004-05-06 02:04:21 +0000213 self._cont_handler.startElementNS(name, qname, attrs)
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000214
215 def endElementNS(self, name, qname):
216 self._cont_handler.endElementNS(name, qname)
Fred Drake45cd9de2000-06-29 19:34:54 +0000217
218 def characters(self, content):
219 self._cont_handler.characters(content)
220
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000221 def ignorableWhitespace(self, chars):
222 self._cont_handler.ignorableWhitespace(chars)
Fred Drake45cd9de2000-06-29 19:34:54 +0000223
224 def processingInstruction(self, target, data):
225 self._cont_handler.processingInstruction(target, data)
226
227 def skippedEntity(self, name):
228 self._cont_handler.skippedEntity(name)
229
230 # DTDHandler methods
231
232 def notationDecl(self, name, publicId, systemId):
233 self._dtd_handler.notationDecl(name, publicId, systemId)
234
235 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
236 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
237
238 # EntityResolver methods
239
240 def resolveEntity(self, publicId, systemId):
Fred Drakee4772f32005-02-03 17:31:39 +0000241 return self._ent_handler.resolveEntity(publicId, systemId)
Fred Drake45cd9de2000-06-29 19:34:54 +0000242
243 # XMLReader methods
244
245 def parse(self, source):
246 self._parent.setContentHandler(self)
247 self._parent.setErrorHandler(self)
248 self._parent.setEntityResolver(self)
249 self._parent.setDTDHandler(self)
250 self._parent.parse(source)
251
252 def setLocale(self, locale):
253 self._parent.setLocale(locale)
Fred Drakea12adfe2000-09-18 17:40:22 +0000254
Fred Drake45cd9de2000-06-29 19:34:54 +0000255 def getFeature(self, name):
256 return self._parent.getFeature(name)
257
258 def setFeature(self, name, state):
259 self._parent.setFeature(name, state)
260
261 def getProperty(self, name):
262 return self._parent.getProperty(name)
263
264 def setProperty(self, name, value):
265 self._parent.setProperty(name, value)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000266
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000267 # XMLFilter methods
268
269 def getParent(self):
270 return self._parent
271
272 def setParent(self, parent):
273 self._parent = parent
274
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000275# --- Utility functions
276
Georg Brandlb044b2a2009-09-16 16:05:59 +0000277def prepare_input_source(source, base=""):
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000278 """This function takes an InputSource and an optional base URL and
279 returns a fully resolved InputSource object ready for reading."""
Fred Drake16f63292000-10-23 18:09:50 +0000280
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000281 if isinstance(source, str):
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000282 source = xmlreader.InputSource(source)
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000283 elif hasattr(source, "read"):
284 f = source
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000285 source = xmlreader.InputSource()
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000286 source.setByteStream(f)
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000287 if hasattr(f, "name"):
Lars Gustäbel4ced5e72000-10-24 15:53:12 +0000288 source.setSystemId(f.name)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000289
Fred Drake0872e052000-09-26 17:23:09 +0000290 if source.getByteStream() is None:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000291 sysid = source.getSystemId()
Fred Drake910b2822004-10-20 11:08:35 +0000292 basehead = os.path.dirname(os.path.normpath(base))
Raymond Hettinger06d9b1f2004-10-20 08:21:19 +0000293 sysidfilename = os.path.join(basehead, sysid)
294 if os.path.isfile(sysidfilename):
295 source.setSystemId(sysidfilename)
296 f = open(sysidfilename, "rb")
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000297 else:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000298 source.setSystemId(urllib.parse.urljoin(base, sysid))
299 f = urllib.request.urlopen(source.getSystemId())
Fred Drake16f63292000-10-23 18:09:50 +0000300
Fred Drake0872e052000-09-26 17:23:09 +0000301 source.setByteStream(f)
Fred Drake16f63292000-10-23 18:09:50 +0000302
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000303 return source