blob: 46946fcf72e8775d27a15b067d972ac817bbeb55 [file] [log] [blame]
Fred Drakea12adfe2000-09-18 17:40:22 +00001"""\
2A library of useful helper classes to the SAX classes, for the
Fred Drake45cd9de2000-06-29 19:34:54 +00003convenience of application and driver writers.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Jeremy Hylton1afc1692008-06-18 20:49:58 +00006import os, urllib.parse, urllib.request
Guido van Rossum3b271052006-08-17 09:10:09 +00007from . import handler
8from . import xmlreader
Fred Drakea12adfe2000-09-18 17:40:22 +00009
Martin v. Löwisae207222004-05-06 02:22:43 +000010# See whether the xmlcharrefreplace error handler is
11# supported
12try:
13 from codecs import xmlcharrefreplace_errors
14 _error_handling = "xmlcharrefreplace"
15 del xmlcharrefreplace_errors
16except ImportError:
17 _error_handling = "strict"
18
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000019def __dict_replace(s, d):
20 """Replace substrings of a string using a dictionary."""
21 for key, value in d.items():
22 s = s.replace(key, value)
23 return s
Martin v. Löwis58af43f2000-09-24 21:31:06 +000024
Fred Drakea12adfe2000-09-18 17:40:22 +000025def escape(data, entities={}):
Fred Drake45cd9de2000-06-29 19:34:54 +000026 """Escape &, <, and > in a string of data.
Tim Peters0eadaac2003-04-24 16:02:54 +000027
Fred Drake16f63292000-10-23 18:09:50 +000028 You can escape other strings of data by passing a dictionary as
Fred Drake45cd9de2000-06-29 19:34:54 +000029 the optional entities parameter. The keys and values must all be
30 strings; each key will be replaced with its corresponding value.
31 """
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000032
33 # must do ampersand first
Fred Drakea12adfe2000-09-18 17:40:22 +000034 data = data.replace("&", "&amp;")
Fred Drakef55222d2002-10-28 17:29:01 +000035 data = data.replace(">", "&gt;")
36 data = data.replace("<", "&lt;")
37 if entities:
38 data = __dict_replace(data, entities)
39 return data
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000040
41def unescape(data, entities={}):
42 """Unescape &amp;, &lt;, and &gt; in a string of data.
43
44 You can unescape other strings of data by passing a dictionary as
45 the optional entities parameter. The keys and values must all be
46 strings; each key will be replaced with its corresponding value.
47 """
Fred Drakef55222d2002-10-28 17:29:01 +000048 data = data.replace("&lt;", "<")
49 data = data.replace("&gt;", ">")
Fred Drakef55222d2002-10-28 17:29:01 +000050 if entities:
51 data = __dict_replace(data, entities)
Fred Drake407fea52002-10-28 17:46:59 +000052 # must do ampersand last
Fred Drake6d890502002-10-28 18:09:41 +000053 return data.replace("&amp;", "&")
Fred Drake45cd9de2000-06-29 19:34:54 +000054
Fred Drakeacd32d32001-07-19 16:10:15 +000055def quoteattr(data, entities={}):
56 """Escape and quote an attribute value.
57
58 Escape &, <, and > in a string of data, then quote it for use as
59 an attribute value. The \" character will be escaped as well, if
60 necessary.
61
62 You can escape other strings of data by passing a dictionary as
63 the optional entities parameter. The keys and values must all be
64 strings; each key will be replaced with its corresponding value.
65 """
Thomas Wouters0e3f5912006-08-11 14:57:12 +000066 entities = entities.copy()
67 entities.update({'\n': '&#10;', '\r': '&#13;', '\t':'&#9;'})
Fred Drakeacd32d32001-07-19 16:10:15 +000068 data = escape(data, entities)
69 if '"' in data:
70 if "'" in data:
71 data = '"%s"' % data.replace('"', "&quot;")
72 else:
73 data = "'%s'" % data
74 else:
75 data = '"%s"' % data
76 return data
77
Fred Drakea12adfe2000-09-18 17:40:22 +000078
Fred Drake45cd9de2000-06-29 19:34:54 +000079class XMLGenerator(handler.ContentHandler):
80
R. David Murraya90032a2010-10-17 22:46:45 +000081 def __init__(self, out=None, encoding="iso-8859-1", short_empty_elements=False):
Fred Drakea12adfe2000-09-18 17:40:22 +000082 if out is None:
83 import sys
84 out = sys.stdout
Fred Drake45cd9de2000-06-29 19:34:54 +000085 handler.ContentHandler.__init__(self)
86 self._out = out
Lars Gustäbelc5cec512000-09-21 08:25:28 +000087 self._ns_contexts = [{}] # contains uri -> prefix dicts
88 self._current_context = self._ns_contexts[-1]
Lars Gustäbelfc643c32000-09-24 10:53:31 +000089 self._undeclared_ns_maps = []
Lars Gustäbelc5cec512000-09-21 08:25:28 +000090 self._encoding = encoding
R. David Murraya90032a2010-10-17 22:46:45 +000091 self._short_empty_elements = short_empty_elements
92 self._pending_start_element = False
Fred Drake45cd9de2000-06-29 19:34:54 +000093
Martin v. Löwisae207222004-05-06 02:22:43 +000094 def _write(self, text):
95 if isinstance(text, str):
96 self._out.write(text)
97 else:
98 self._out.write(text.encode(self._encoding, _error_handling))
99
Thomas Wouterscf297e42007-02-23 15:07:44 +0000100 def _qname(self, name):
101 """Builds a qualified name from a (ns_url, localname) pair"""
102 if name[0]:
103 # The name is in a non-empty namespace
104 prefix = self._current_context[name[0]]
105 if prefix:
106 # If it is not the default namespace, prepend the prefix
107 return prefix + ":" + name[1]
108 # Return the unqualified name
109 return name[1]
110
R. David Murraya90032a2010-10-17 22:46:45 +0000111 def _finish_pending_start_element(self,endElement=False):
112 if self._pending_start_element:
113 self._write('>')
114 self._pending_start_element = False
115
Fred Drake45cd9de2000-06-29 19:34:54 +0000116 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000117
Fred Drake45cd9de2000-06-29 19:34:54 +0000118 def startDocument(self):
Martin v. Löwisae207222004-05-06 02:22:43 +0000119 self._write('<?xml version="1.0" encoding="%s"?>\n' %
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000120 self._encoding)
Fred Drake45cd9de2000-06-29 19:34:54 +0000121
122 def startPrefixMapping(self, prefix, uri):
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000123 self._ns_contexts.append(self._current_context.copy())
124 self._current_context[uri] = prefix
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000125 self._undeclared_ns_maps.append((prefix, uri))
Fred Drake45cd9de2000-06-29 19:34:54 +0000126
127 def endPrefixMapping(self, prefix):
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000128 self._current_context = self._ns_contexts[-1]
129 del self._ns_contexts[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +0000130
131 def startElement(self, name, attrs):
R. David Murraya90032a2010-10-17 22:46:45 +0000132 self._finish_pending_start_element()
Martin v. Löwisae207222004-05-06 02:22:43 +0000133 self._write('<' + name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000134 for (name, value) in attrs.items():
Martin v. Löwisae207222004-05-06 02:22:43 +0000135 self._write(' %s=%s' % (name, quoteattr(value)))
R. David Murraya90032a2010-10-17 22:46:45 +0000136 if self._short_empty_elements:
137 self._pending_start_element = True
138 else:
139 self._write(">")
Fred Drake16f63292000-10-23 18:09:50 +0000140
Fred Drake45cd9de2000-06-29 19:34:54 +0000141 def endElement(self, name):
R. David Murraya90032a2010-10-17 22:46:45 +0000142 if self._pending_start_element:
143 self._write('/>')
144 self._pending_start_element = False
145 else:
146 self._write('</%s>' % name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000147
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000148 def startElementNS(self, name, qname, attrs):
R. David Murraya90032a2010-10-17 22:46:45 +0000149 self._finish_pending_start_element()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000150 self._write('<' + self._qname(name))
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000151
Thomas Wouterscf297e42007-02-23 15:07:44 +0000152 for prefix, uri in self._undeclared_ns_maps:
153 if prefix:
154 self._out.write(' xmlns:%s="%s"' % (prefix, uri))
155 else:
156 self._out.write(' xmlns="%s"' % uri)
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000157 self._undeclared_ns_maps = []
Fred Drake16f63292000-10-23 18:09:50 +0000158
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000159 for (name, value) in attrs.items():
Thomas Wouterscf297e42007-02-23 15:07:44 +0000160 self._write(' %s=%s' % (self._qname(name), quoteattr(value)))
R. David Murraya90032a2010-10-17 22:46:45 +0000161 if self._short_empty_elements:
162 self._pending_start_element = True
163 else:
164 self._write(">")
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000165
166 def endElementNS(self, name, qname):
R. David Murraya90032a2010-10-17 22:46:45 +0000167 if self._pending_start_element:
168 self._write('/>')
169 self._pending_start_element = False
170 else:
171 self._write('</%s>' % self._qname(name))
Fred Drake16f63292000-10-23 18:09:50 +0000172
Fred Drake45cd9de2000-06-29 19:34:54 +0000173 def characters(self, content):
R. David Murraya90032a2010-10-17 22:46:45 +0000174 if content:
175 self._finish_pending_start_element()
176 self._write(escape(content))
Fred Drake45cd9de2000-06-29 19:34:54 +0000177
178 def ignorableWhitespace(self, content):
R. David Murraya90032a2010-10-17 22:46:45 +0000179 if content:
180 self._finish_pending_start_element()
181 self._write(content)
Fred Drakea12adfe2000-09-18 17:40:22 +0000182
Fred Drake45cd9de2000-06-29 19:34:54 +0000183 def processingInstruction(self, target, data):
R. David Murraya90032a2010-10-17 22:46:45 +0000184 self._finish_pending_start_element()
Martin v. Löwisae207222004-05-06 02:22:43 +0000185 self._write('<?%s %s?>' % (target, data))
Fred Drake45cd9de2000-06-29 19:34:54 +0000186
Fred Drakea12adfe2000-09-18 17:40:22 +0000187
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000188class XMLFilterBase(xmlreader.XMLReader):
Fred Drake45cd9de2000-06-29 19:34:54 +0000189 """This class is designed to sit between an XMLReader and the
190 client application's event handlers. By default, it does nothing
191 but pass requests up to the reader and events on to the handlers
192 unmodified, but subclasses can override specific methods to modify
193 the event stream or the configuration requests as they pass
194 through."""
195
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000196 def __init__(self, parent = None):
197 xmlreader.XMLReader.__init__(self)
198 self._parent = parent
Fred Drake16f63292000-10-23 18:09:50 +0000199
Fred Drake45cd9de2000-06-29 19:34:54 +0000200 # ErrorHandler methods
201
202 def error(self, exception):
203 self._err_handler.error(exception)
204
205 def fatalError(self, exception):
206 self._err_handler.fatalError(exception)
207
208 def warning(self, exception):
209 self._err_handler.warning(exception)
210
211 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000212
Fred Drake45cd9de2000-06-29 19:34:54 +0000213 def setDocumentLocator(self, locator):
214 self._cont_handler.setDocumentLocator(locator)
Fred Drakea12adfe2000-09-18 17:40:22 +0000215
Fred Drake45cd9de2000-06-29 19:34:54 +0000216 def startDocument(self):
217 self._cont_handler.startDocument()
218
219 def endDocument(self):
220 self._cont_handler.endDocument()
221
222 def startPrefixMapping(self, prefix, uri):
223 self._cont_handler.startPrefixMapping(prefix, uri)
224
225 def endPrefixMapping(self, prefix):
226 self._cont_handler.endPrefixMapping(prefix)
227
228 def startElement(self, name, attrs):
229 self._cont_handler.startElement(name, attrs)
230
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000231 def endElement(self, name):
232 self._cont_handler.endElement(name)
233
234 def startElementNS(self, name, qname, attrs):
Martin v. Löwis0ea558f2004-05-06 02:04:21 +0000235 self._cont_handler.startElementNS(name, qname, attrs)
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000236
237 def endElementNS(self, name, qname):
238 self._cont_handler.endElementNS(name, qname)
Fred Drake45cd9de2000-06-29 19:34:54 +0000239
240 def characters(self, content):
241 self._cont_handler.characters(content)
242
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000243 def ignorableWhitespace(self, chars):
244 self._cont_handler.ignorableWhitespace(chars)
Fred Drake45cd9de2000-06-29 19:34:54 +0000245
246 def processingInstruction(self, target, data):
247 self._cont_handler.processingInstruction(target, data)
248
249 def skippedEntity(self, name):
250 self._cont_handler.skippedEntity(name)
251
252 # DTDHandler methods
253
254 def notationDecl(self, name, publicId, systemId):
255 self._dtd_handler.notationDecl(name, publicId, systemId)
256
257 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
258 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
259
260 # EntityResolver methods
261
262 def resolveEntity(self, publicId, systemId):
Fred Drakee4772f32005-02-03 17:31:39 +0000263 return self._ent_handler.resolveEntity(publicId, systemId)
Fred Drake45cd9de2000-06-29 19:34:54 +0000264
265 # XMLReader methods
266
267 def parse(self, source):
268 self._parent.setContentHandler(self)
269 self._parent.setErrorHandler(self)
270 self._parent.setEntityResolver(self)
271 self._parent.setDTDHandler(self)
272 self._parent.parse(source)
273
274 def setLocale(self, locale):
275 self._parent.setLocale(locale)
Fred Drakea12adfe2000-09-18 17:40:22 +0000276
Fred Drake45cd9de2000-06-29 19:34:54 +0000277 def getFeature(self, name):
278 return self._parent.getFeature(name)
279
280 def setFeature(self, name, state):
281 self._parent.setFeature(name, state)
282
283 def getProperty(self, name):
284 return self._parent.getProperty(name)
285
286 def setProperty(self, name, value):
287 self._parent.setProperty(name, value)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000288
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000289 # XMLFilter methods
290
291 def getParent(self):
292 return self._parent
293
294 def setParent(self, parent):
295 self._parent = parent
296
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000297# --- Utility functions
298
Georg Brandlfe991052009-09-16 15:54:04 +0000299def prepare_input_source(source, base=""):
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000300 """This function takes an InputSource and an optional base URL and
301 returns a fully resolved InputSource object ready for reading."""
Fred Drake16f63292000-10-23 18:09:50 +0000302
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000303 if isinstance(source, str):
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000304 source = xmlreader.InputSource(source)
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000305 elif hasattr(source, "read"):
306 f = source
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000307 source = xmlreader.InputSource()
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000308 source.setByteStream(f)
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000309 if hasattr(f, "name"):
Lars Gustäbel4ced5e72000-10-24 15:53:12 +0000310 source.setSystemId(f.name)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000311
Fred Drake0872e052000-09-26 17:23:09 +0000312 if source.getByteStream() is None:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000313 sysid = source.getSystemId()
Fred Drake910b2822004-10-20 11:08:35 +0000314 basehead = os.path.dirname(os.path.normpath(base))
Raymond Hettinger06d9b1f2004-10-20 08:21:19 +0000315 sysidfilename = os.path.join(basehead, sysid)
316 if os.path.isfile(sysidfilename):
317 source.setSystemId(sysidfilename)
318 f = open(sysidfilename, "rb")
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000319 else:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000320 source.setSystemId(urllib.parse.urljoin(base, sysid))
321 f = urllib.request.urlopen(source.getSystemId())
Fred Drake16f63292000-10-23 18:09:50 +0000322
Fred Drake0872e052000-09-26 17:23:09 +0000323 source.setByteStream(f)
Fred Drake16f63292000-10-23 18:09:50 +0000324
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000325 return source