blob: e8450158cf2d3b0ee141f6ac8b759285624b4c59 [file] [log] [blame]
Fred Drakea12adfe2000-09-18 17:40:22 +00001"""\
2A library of useful helper classes to the SAX classes, for the
Fred Drake45cd9de2000-06-29 19:34:54 +00003convenience of application and driver writers.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Jeremy Hylton1afc1692008-06-18 20:49:58 +00006import os, urllib.parse, urllib.request
Guido van Rossum3b271052006-08-17 09:10:09 +00007from . import handler
8from . import xmlreader
Fred Drakea12adfe2000-09-18 17:40:22 +00009
Martin v. Löwisae207222004-05-06 02:22:43 +000010# See whether the xmlcharrefreplace error handler is
11# supported
12try:
13 from codecs import xmlcharrefreplace_errors
14 _error_handling = "xmlcharrefreplace"
15 del xmlcharrefreplace_errors
16except ImportError:
17 _error_handling = "strict"
18
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000019def __dict_replace(s, d):
20 """Replace substrings of a string using a dictionary."""
21 for key, value in d.items():
22 s = s.replace(key, value)
23 return s
Martin v. Löwis58af43f2000-09-24 21:31:06 +000024
Fred Drakea12adfe2000-09-18 17:40:22 +000025def escape(data, entities={}):
Fred Drake45cd9de2000-06-29 19:34:54 +000026 """Escape &, <, and > in a string of data.
Tim Peters0eadaac2003-04-24 16:02:54 +000027
Fred Drake16f63292000-10-23 18:09:50 +000028 You can escape other strings of data by passing a dictionary as
Fred Drake45cd9de2000-06-29 19:34:54 +000029 the optional entities parameter. The keys and values must all be
30 strings; each key will be replaced with its corresponding value.
31 """
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000032
33 # must do ampersand first
Fred Drakea12adfe2000-09-18 17:40:22 +000034 data = data.replace("&", "&amp;")
Fred Drakef55222d2002-10-28 17:29:01 +000035 data = data.replace(">", "&gt;")
36 data = data.replace("<", "&lt;")
37 if entities:
38 data = __dict_replace(data, entities)
39 return data
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000040
41def unescape(data, entities={}):
42 """Unescape &amp;, &lt;, and &gt; in a string of data.
43
44 You can unescape other strings of data by passing a dictionary as
45 the optional entities parameter. The keys and values must all be
46 strings; each key will be replaced with its corresponding value.
47 """
Fred Drakef55222d2002-10-28 17:29:01 +000048 data = data.replace("&lt;", "<")
49 data = data.replace("&gt;", ">")
Fred Drakef55222d2002-10-28 17:29:01 +000050 if entities:
51 data = __dict_replace(data, entities)
Fred Drake407fea52002-10-28 17:46:59 +000052 # must do ampersand last
Fred Drake6d890502002-10-28 18:09:41 +000053 return data.replace("&amp;", "&")
Fred Drake45cd9de2000-06-29 19:34:54 +000054
Fred Drakeacd32d32001-07-19 16:10:15 +000055def quoteattr(data, entities={}):
56 """Escape and quote an attribute value.
57
58 Escape &, <, and > in a string of data, then quote it for use as
59 an attribute value. The \" character will be escaped as well, if
60 necessary.
61
62 You can escape other strings of data by passing a dictionary as
63 the optional entities parameter. The keys and values must all be
64 strings; each key will be replaced with its corresponding value.
65 """
Thomas Wouters0e3f5912006-08-11 14:57:12 +000066 entities = entities.copy()
67 entities.update({'\n': '&#10;', '\r': '&#13;', '\t':'&#9;'})
Fred Drakeacd32d32001-07-19 16:10:15 +000068 data = escape(data, entities)
69 if '"' in data:
70 if "'" in data:
71 data = '"%s"' % data.replace('"', "&quot;")
72 else:
73 data = "'%s'" % data
74 else:
75 data = '"%s"' % data
76 return data
77
Fred Drakea12adfe2000-09-18 17:40:22 +000078
Fred Drake45cd9de2000-06-29 19:34:54 +000079class XMLGenerator(handler.ContentHandler):
80
Lars Gustäbelc5cec512000-09-21 08:25:28 +000081 def __init__(self, out=None, encoding="iso-8859-1"):
Fred Drakea12adfe2000-09-18 17:40:22 +000082 if out is None:
83 import sys
84 out = sys.stdout
Fred Drake45cd9de2000-06-29 19:34:54 +000085 handler.ContentHandler.__init__(self)
86 self._out = out
Lars Gustäbelc5cec512000-09-21 08:25:28 +000087 self._ns_contexts = [{}] # contains uri -> prefix dicts
88 self._current_context = self._ns_contexts[-1]
Lars Gustäbelfc643c32000-09-24 10:53:31 +000089 self._undeclared_ns_maps = []
Lars Gustäbelc5cec512000-09-21 08:25:28 +000090 self._encoding = encoding
Fred Drake45cd9de2000-06-29 19:34:54 +000091
Martin v. Löwisae207222004-05-06 02:22:43 +000092 def _write(self, text):
93 if isinstance(text, str):
94 self._out.write(text)
95 else:
96 self._out.write(text.encode(self._encoding, _error_handling))
97
Thomas Wouterscf297e42007-02-23 15:07:44 +000098 def _qname(self, name):
99 """Builds a qualified name from a (ns_url, localname) pair"""
100 if name[0]:
101 # The name is in a non-empty namespace
102 prefix = self._current_context[name[0]]
103 if prefix:
104 # If it is not the default namespace, prepend the prefix
105 return prefix + ":" + name[1]
106 # Return the unqualified name
107 return name[1]
108
Fred Drake45cd9de2000-06-29 19:34:54 +0000109 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000110
Fred Drake45cd9de2000-06-29 19:34:54 +0000111 def startDocument(self):
Martin v. Löwisae207222004-05-06 02:22:43 +0000112 self._write('<?xml version="1.0" encoding="%s"?>\n' %
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000113 self._encoding)
Fred Drake45cd9de2000-06-29 19:34:54 +0000114
115 def startPrefixMapping(self, prefix, uri):
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000116 self._ns_contexts.append(self._current_context.copy())
117 self._current_context[uri] = prefix
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000118 self._undeclared_ns_maps.append((prefix, uri))
Fred Drake45cd9de2000-06-29 19:34:54 +0000119
120 def endPrefixMapping(self, prefix):
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000121 self._current_context = self._ns_contexts[-1]
122 del self._ns_contexts[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +0000123
124 def startElement(self, name, attrs):
Martin v. Löwisae207222004-05-06 02:22:43 +0000125 self._write('<' + name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000126 for (name, value) in attrs.items():
Martin v. Löwisae207222004-05-06 02:22:43 +0000127 self._write(' %s=%s' % (name, quoteattr(value)))
128 self._write('>')
Fred Drake16f63292000-10-23 18:09:50 +0000129
Fred Drake45cd9de2000-06-29 19:34:54 +0000130 def endElement(self, name):
Martin v. Löwisae207222004-05-06 02:22:43 +0000131 self._write('</%s>' % name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000132
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000133 def startElementNS(self, name, qname, attrs):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000134 self._write('<' + self._qname(name))
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000135
Thomas Wouterscf297e42007-02-23 15:07:44 +0000136 for prefix, uri in self._undeclared_ns_maps:
137 if prefix:
138 self._out.write(' xmlns:%s="%s"' % (prefix, uri))
139 else:
140 self._out.write(' xmlns="%s"' % uri)
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000141 self._undeclared_ns_maps = []
Fred Drake16f63292000-10-23 18:09:50 +0000142
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000143 for (name, value) in attrs.items():
Thomas Wouterscf297e42007-02-23 15:07:44 +0000144 self._write(' %s=%s' % (self._qname(name), quoteattr(value)))
Martin v. Löwisae207222004-05-06 02:22:43 +0000145 self._write('>')
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000146
147 def endElementNS(self, name, qname):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000148 self._write('</%s>' % self._qname(name))
Fred Drake16f63292000-10-23 18:09:50 +0000149
Fred Drake45cd9de2000-06-29 19:34:54 +0000150 def characters(self, content):
Martin v. Löwisae207222004-05-06 02:22:43 +0000151 self._write(escape(content))
Fred Drake45cd9de2000-06-29 19:34:54 +0000152
153 def ignorableWhitespace(self, content):
Martin v. Löwisae207222004-05-06 02:22:43 +0000154 self._write(content)
Fred Drakea12adfe2000-09-18 17:40:22 +0000155
Fred Drake45cd9de2000-06-29 19:34:54 +0000156 def processingInstruction(self, target, data):
Martin v. Löwisae207222004-05-06 02:22:43 +0000157 self._write('<?%s %s?>' % (target, data))
Fred Drake45cd9de2000-06-29 19:34:54 +0000158
Fred Drakea12adfe2000-09-18 17:40:22 +0000159
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000160class XMLFilterBase(xmlreader.XMLReader):
Fred Drake45cd9de2000-06-29 19:34:54 +0000161 """This class is designed to sit between an XMLReader and the
162 client application's event handlers. By default, it does nothing
163 but pass requests up to the reader and events on to the handlers
164 unmodified, but subclasses can override specific methods to modify
165 the event stream or the configuration requests as they pass
166 through."""
167
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000168 def __init__(self, parent = None):
169 xmlreader.XMLReader.__init__(self)
170 self._parent = parent
Fred Drake16f63292000-10-23 18:09:50 +0000171
Fred Drake45cd9de2000-06-29 19:34:54 +0000172 # ErrorHandler methods
173
174 def error(self, exception):
175 self._err_handler.error(exception)
176
177 def fatalError(self, exception):
178 self._err_handler.fatalError(exception)
179
180 def warning(self, exception):
181 self._err_handler.warning(exception)
182
183 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000184
Fred Drake45cd9de2000-06-29 19:34:54 +0000185 def setDocumentLocator(self, locator):
186 self._cont_handler.setDocumentLocator(locator)
Fred Drakea12adfe2000-09-18 17:40:22 +0000187
Fred Drake45cd9de2000-06-29 19:34:54 +0000188 def startDocument(self):
189 self._cont_handler.startDocument()
190
191 def endDocument(self):
192 self._cont_handler.endDocument()
193
194 def startPrefixMapping(self, prefix, uri):
195 self._cont_handler.startPrefixMapping(prefix, uri)
196
197 def endPrefixMapping(self, prefix):
198 self._cont_handler.endPrefixMapping(prefix)
199
200 def startElement(self, name, attrs):
201 self._cont_handler.startElement(name, attrs)
202
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000203 def endElement(self, name):
204 self._cont_handler.endElement(name)
205
206 def startElementNS(self, name, qname, attrs):
Martin v. Löwis0ea558f2004-05-06 02:04:21 +0000207 self._cont_handler.startElementNS(name, qname, attrs)
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000208
209 def endElementNS(self, name, qname):
210 self._cont_handler.endElementNS(name, qname)
Fred Drake45cd9de2000-06-29 19:34:54 +0000211
212 def characters(self, content):
213 self._cont_handler.characters(content)
214
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000215 def ignorableWhitespace(self, chars):
216 self._cont_handler.ignorableWhitespace(chars)
Fred Drake45cd9de2000-06-29 19:34:54 +0000217
218 def processingInstruction(self, target, data):
219 self._cont_handler.processingInstruction(target, data)
220
221 def skippedEntity(self, name):
222 self._cont_handler.skippedEntity(name)
223
224 # DTDHandler methods
225
226 def notationDecl(self, name, publicId, systemId):
227 self._dtd_handler.notationDecl(name, publicId, systemId)
228
229 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
230 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
231
232 # EntityResolver methods
233
234 def resolveEntity(self, publicId, systemId):
Fred Drakee4772f32005-02-03 17:31:39 +0000235 return self._ent_handler.resolveEntity(publicId, systemId)
Fred Drake45cd9de2000-06-29 19:34:54 +0000236
237 # XMLReader methods
238
239 def parse(self, source):
240 self._parent.setContentHandler(self)
241 self._parent.setErrorHandler(self)
242 self._parent.setEntityResolver(self)
243 self._parent.setDTDHandler(self)
244 self._parent.parse(source)
245
246 def setLocale(self, locale):
247 self._parent.setLocale(locale)
Fred Drakea12adfe2000-09-18 17:40:22 +0000248
Fred Drake45cd9de2000-06-29 19:34:54 +0000249 def getFeature(self, name):
250 return self._parent.getFeature(name)
251
252 def setFeature(self, name, state):
253 self._parent.setFeature(name, state)
254
255 def getProperty(self, name):
256 return self._parent.getProperty(name)
257
258 def setProperty(self, name, value):
259 self._parent.setProperty(name, value)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000260
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000261 # XMLFilter methods
262
263 def getParent(self):
264 return self._parent
265
266 def setParent(self, parent):
267 self._parent = parent
268
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000269# --- Utility functions
270
Georg Brandlfe991052009-09-16 15:54:04 +0000271def prepare_input_source(source, base=""):
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000272 """This function takes an InputSource and an optional base URL and
273 returns a fully resolved InputSource object ready for reading."""
Fred Drake16f63292000-10-23 18:09:50 +0000274
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000275 if isinstance(source, str):
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000276 source = xmlreader.InputSource(source)
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000277 elif hasattr(source, "read"):
278 f = source
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000279 source = xmlreader.InputSource()
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000280 source.setByteStream(f)
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000281 if hasattr(f, "name"):
Lars Gustäbel4ced5e72000-10-24 15:53:12 +0000282 source.setSystemId(f.name)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000283
Fred Drake0872e052000-09-26 17:23:09 +0000284 if source.getByteStream() is None:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000285 sysid = source.getSystemId()
Fred Drake910b2822004-10-20 11:08:35 +0000286 basehead = os.path.dirname(os.path.normpath(base))
Raymond Hettinger06d9b1f2004-10-20 08:21:19 +0000287 sysidfilename = os.path.join(basehead, sysid)
288 if os.path.isfile(sysidfilename):
289 source.setSystemId(sysidfilename)
290 f = open(sysidfilename, "rb")
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000291 else:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000292 source.setSystemId(urllib.parse.urljoin(base, sysid))
293 f = urllib.request.urlopen(source.getSystemId())
Fred Drake16f63292000-10-23 18:09:50 +0000294
Fred Drake0872e052000-09-26 17:23:09 +0000295 source.setByteStream(f)
Fred Drake16f63292000-10-23 18:09:50 +0000296
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000297 return source