blob: dad74f5389d8e7f9de0be07cfc2033e132c7a243 [file] [log] [blame]
Fred Drakea12adfe2000-09-18 17:40:22 +00001"""\
2A library of useful helper classes to the SAX classes, for the
Fred Drake45cd9de2000-06-29 19:34:54 +00003convenience of application and driver writers.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Martin v. Löwis58af43f2000-09-24 21:31:06 +00006import os, urlparse, urllib, types
Serhiy Storchakaf8980382013-02-10 14:26:08 +02007import io
Serhiy Storchaka8673ab92013-02-02 10:28:30 +02008import sys
Fred Drake45cd9de2000-06-29 19:34:54 +00009import handler
Lars Gustäbelfc643c32000-09-24 10:53:31 +000010import xmlreader
Fred Drakea12adfe2000-09-18 17:40:22 +000011
Fred Drake95b4ec52000-12-16 01:45:11 +000012try:
13 _StringTypes = [types.StringType, types.UnicodeType]
14except AttributeError:
15 _StringTypes = [types.StringType]
16
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000017def __dict_replace(s, d):
18 """Replace substrings of a string using a dictionary."""
19 for key, value in d.items():
20 s = s.replace(key, value)
21 return s
Martin v. Löwis58af43f2000-09-24 21:31:06 +000022
Fred Drakea12adfe2000-09-18 17:40:22 +000023def escape(data, entities={}):
Fred Drake45cd9de2000-06-29 19:34:54 +000024 """Escape &, <, and > in a string of data.
Tim Peters0eadaac2003-04-24 16:02:54 +000025
Fred Drake16f63292000-10-23 18:09:50 +000026 You can escape other strings of data by passing a dictionary as
Fred Drake45cd9de2000-06-29 19:34:54 +000027 the optional entities parameter. The keys and values must all be
28 strings; each key will be replaced with its corresponding value.
29 """
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000030
31 # must do ampersand first
Fred Drakea12adfe2000-09-18 17:40:22 +000032 data = data.replace("&", "&amp;")
Fred Drakef55222d2002-10-28 17:29:01 +000033 data = data.replace(">", "&gt;")
34 data = data.replace("<", "&lt;")
35 if entities:
36 data = __dict_replace(data, entities)
37 return data
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000038
39def unescape(data, entities={}):
40 """Unescape &amp;, &lt;, and &gt; in a string of data.
41
42 You can unescape other strings of data by passing a dictionary as
43 the optional entities parameter. The keys and values must all be
44 strings; each key will be replaced with its corresponding value.
45 """
Fred Drakef55222d2002-10-28 17:29:01 +000046 data = data.replace("&lt;", "<")
47 data = data.replace("&gt;", ">")
Fred Drakef55222d2002-10-28 17:29:01 +000048 if entities:
49 data = __dict_replace(data, entities)
Fred Drake407fea52002-10-28 17:46:59 +000050 # must do ampersand last
Fred Drake6d890502002-10-28 18:09:41 +000051 return data.replace("&amp;", "&")
Fred Drake45cd9de2000-06-29 19:34:54 +000052
Fred Drakeacd32d32001-07-19 16:10:15 +000053def quoteattr(data, entities={}):
54 """Escape and quote an attribute value.
55
56 Escape &, <, and > in a string of data, then quote it for use as
57 an attribute value. The \" character will be escaped as well, if
58 necessary.
59
60 You can escape other strings of data by passing a dictionary as
61 the optional entities parameter. The keys and values must all be
62 strings; each key will be replaced with its corresponding value.
63 """
Andrew M. Kuchling91c64a02006-06-09 13:15:57 +000064 entities = entities.copy()
65 entities.update({'\n': '&#10;', '\r': '&#13;', '\t':'&#9;'})
Fred Drakeacd32d32001-07-19 16:10:15 +000066 data = escape(data, entities)
67 if '"' in data:
68 if "'" in data:
69 data = '"%s"' % data.replace('"', "&quot;")
70 else:
71 data = "'%s'" % data
72 else:
73 data = '"%s"' % data
74 return data
75
Fred Drakea12adfe2000-09-18 17:40:22 +000076
Serhiy Storchakaf8980382013-02-10 14:26:08 +020077def _gettextwriter(out, encoding):
78 if out is None:
79 import sys
80 out = sys.stdout
81
82 if isinstance(out, io.RawIOBase):
83 buffer = io.BufferedIOBase(out)
84 # Keep the original file open when the TextIOWrapper is
85 # destroyed
86 buffer.close = lambda: None
87 else:
88 # This is to handle passed objects that aren't in the
89 # IOBase hierarchy, but just have a write method
90 buffer = io.BufferedIOBase()
91 buffer.writable = lambda: True
92 buffer.write = out.write
93 try:
94 # TextIOWrapper uses this methods to determine
95 # if BOM (for UTF-16, etc) should be added
96 buffer.seekable = out.seekable
97 buffer.tell = out.tell
98 except AttributeError:
99 pass
100 # wrap a binary writer with TextIOWrapper
101 return io.TextIOWrapper(buffer, encoding=encoding,
102 errors='xmlcharrefreplace',
103 newline='\n')
104
Fred Drake45cd9de2000-06-29 19:34:54 +0000105class XMLGenerator(handler.ContentHandler):
106
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000107 def __init__(self, out=None, encoding="iso-8859-1"):
Fred Drake45cd9de2000-06-29 19:34:54 +0000108 handler.ContentHandler.__init__(self)
Serhiy Storchakaf8980382013-02-10 14:26:08 +0200109 out = _gettextwriter(out, encoding)
110 self._write = out.write
111 self._flush = out.flush
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000112 self._ns_contexts = [{}] # contains uri -> prefix dicts
113 self._current_context = self._ns_contexts[-1]
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000114 self._undeclared_ns_maps = []
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000115 self._encoding = encoding
Fred Drake45cd9de2000-06-29 19:34:54 +0000116
Martin v. Löwis2bad58f2007-02-12 12:21:10 +0000117 def _qname(self, name):
118 """Builds a qualified name from a (ns_url, localname) pair"""
119 if name[0]:
Antoine Pitrou7f081022010-10-27 18:43:21 +0000120 # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is
121 # bound by definition to http://www.w3.org/XML/1998/namespace. It
122 # does not need to be declared and will not usually be found in
123 # self._current_context.
124 if 'http://www.w3.org/XML/1998/namespace' == name[0]:
125 return 'xml:' + name[1]
Martin v. Löwis2bad58f2007-02-12 12:21:10 +0000126 # The name is in a non-empty namespace
127 prefix = self._current_context[name[0]]
128 if prefix:
129 # If it is not the default namespace, prepend the prefix
130 return prefix + ":" + name[1]
131 # Return the unqualified name
132 return name[1]
133
Fred Drake45cd9de2000-06-29 19:34:54 +0000134 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000135
Fred Drake45cd9de2000-06-29 19:34:54 +0000136 def startDocument(self):
Serhiy Storchakaf8980382013-02-10 14:26:08 +0200137 self._write(u'<?xml version="1.0" encoding="%s"?>\n' %
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000138 self._encoding)
Fred Drake45cd9de2000-06-29 19:34:54 +0000139
Serhiy Storchakaf8980382013-02-10 14:26:08 +0200140 def endDocument(self):
141 self._flush()
142
Fred Drake45cd9de2000-06-29 19:34:54 +0000143 def startPrefixMapping(self, prefix, uri):
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000144 self._ns_contexts.append(self._current_context.copy())
145 self._current_context[uri] = prefix
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000146 self._undeclared_ns_maps.append((prefix, uri))
Fred Drake45cd9de2000-06-29 19:34:54 +0000147
148 def endPrefixMapping(self, prefix):
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000149 self._current_context = self._ns_contexts[-1]
150 del self._ns_contexts[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +0000151
152 def startElement(self, name, attrs):
Serhiy Storchakaf8980382013-02-10 14:26:08 +0200153 self._write(u'<' + name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000154 for (name, value) in attrs.items():
Serhiy Storchakaf8980382013-02-10 14:26:08 +0200155 self._write(u' %s=%s' % (name, quoteattr(value)))
156 self._write(u'>')
Fred Drake16f63292000-10-23 18:09:50 +0000157
Fred Drake45cd9de2000-06-29 19:34:54 +0000158 def endElement(self, name):
Serhiy Storchakaf8980382013-02-10 14:26:08 +0200159 self._write(u'</%s>' % name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000160
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000161 def startElementNS(self, name, qname, attrs):
Serhiy Storchakaf8980382013-02-10 14:26:08 +0200162 self._write(u'<' + self._qname(name))
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000163
Martin v. Löwis2bad58f2007-02-12 12:21:10 +0000164 for prefix, uri in self._undeclared_ns_maps:
165 if prefix:
Serhiy Storchakaf8980382013-02-10 14:26:08 +0200166 self._write(u' xmlns:%s="%s"' % (prefix, uri))
Martin v. Löwis2bad58f2007-02-12 12:21:10 +0000167 else:
Serhiy Storchakaf8980382013-02-10 14:26:08 +0200168 self._write(u' xmlns="%s"' % uri)
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000169 self._undeclared_ns_maps = []
Fred Drake16f63292000-10-23 18:09:50 +0000170
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000171 for (name, value) in attrs.items():
Serhiy Storchakaf8980382013-02-10 14:26:08 +0200172 self._write(u' %s=%s' % (self._qname(name), quoteattr(value)))
173 self._write(u'>')
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000174
175 def endElementNS(self, name, qname):
Serhiy Storchakaf8980382013-02-10 14:26:08 +0200176 self._write(u'</%s>' % self._qname(name))
Fred Drake16f63292000-10-23 18:09:50 +0000177
Fred Drake45cd9de2000-06-29 19:34:54 +0000178 def characters(self, content):
Serhiy Storchakaf8980382013-02-10 14:26:08 +0200179 self._write(escape(unicode(content)))
Fred Drake45cd9de2000-06-29 19:34:54 +0000180
181 def ignorableWhitespace(self, content):
Serhiy Storchakaf8980382013-02-10 14:26:08 +0200182 self._write(unicode(content))
Fred Drakea12adfe2000-09-18 17:40:22 +0000183
Fred Drake45cd9de2000-06-29 19:34:54 +0000184 def processingInstruction(self, target, data):
Serhiy Storchakaf8980382013-02-10 14:26:08 +0200185 self._write(u'<?%s %s?>' % (target, data))
Fred Drake45cd9de2000-06-29 19:34:54 +0000186
Fred Drakea12adfe2000-09-18 17:40:22 +0000187
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000188class XMLFilterBase(xmlreader.XMLReader):
Fred Drake45cd9de2000-06-29 19:34:54 +0000189 """This class is designed to sit between an XMLReader and the
190 client application's event handlers. By default, it does nothing
191 but pass requests up to the reader and events on to the handlers
192 unmodified, but subclasses can override specific methods to modify
193 the event stream or the configuration requests as they pass
194 through."""
195
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000196 def __init__(self, parent = None):
197 xmlreader.XMLReader.__init__(self)
198 self._parent = parent
Fred Drake16f63292000-10-23 18:09:50 +0000199
Fred Drake45cd9de2000-06-29 19:34:54 +0000200 # ErrorHandler methods
201
202 def error(self, exception):
203 self._err_handler.error(exception)
204
205 def fatalError(self, exception):
206 self._err_handler.fatalError(exception)
207
208 def warning(self, exception):
209 self._err_handler.warning(exception)
210
211 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000212
Fred Drake45cd9de2000-06-29 19:34:54 +0000213 def setDocumentLocator(self, locator):
214 self._cont_handler.setDocumentLocator(locator)
Fred Drakea12adfe2000-09-18 17:40:22 +0000215
Fred Drake45cd9de2000-06-29 19:34:54 +0000216 def startDocument(self):
217 self._cont_handler.startDocument()
218
219 def endDocument(self):
220 self._cont_handler.endDocument()
221
222 def startPrefixMapping(self, prefix, uri):
223 self._cont_handler.startPrefixMapping(prefix, uri)
224
225 def endPrefixMapping(self, prefix):
226 self._cont_handler.endPrefixMapping(prefix)
227
228 def startElement(self, name, attrs):
229 self._cont_handler.startElement(name, attrs)
230
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000231 def endElement(self, name):
232 self._cont_handler.endElement(name)
233
234 def startElementNS(self, name, qname, attrs):
Martin v. Löwis0ea558f2004-05-06 02:04:21 +0000235 self._cont_handler.startElementNS(name, qname, attrs)
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000236
237 def endElementNS(self, name, qname):
238 self._cont_handler.endElementNS(name, qname)
Fred Drake45cd9de2000-06-29 19:34:54 +0000239
240 def characters(self, content):
241 self._cont_handler.characters(content)
242
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000243 def ignorableWhitespace(self, chars):
244 self._cont_handler.ignorableWhitespace(chars)
Fred Drake45cd9de2000-06-29 19:34:54 +0000245
246 def processingInstruction(self, target, data):
247 self._cont_handler.processingInstruction(target, data)
248
249 def skippedEntity(self, name):
250 self._cont_handler.skippedEntity(name)
251
252 # DTDHandler methods
253
254 def notationDecl(self, name, publicId, systemId):
255 self._dtd_handler.notationDecl(name, publicId, systemId)
256
257 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
258 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
259
260 # EntityResolver methods
261
262 def resolveEntity(self, publicId, systemId):
Fred Drakee4772f32005-02-03 17:31:39 +0000263 return self._ent_handler.resolveEntity(publicId, systemId)
Fred Drake45cd9de2000-06-29 19:34:54 +0000264
265 # XMLReader methods
266
267 def parse(self, source):
268 self._parent.setContentHandler(self)
269 self._parent.setErrorHandler(self)
270 self._parent.setEntityResolver(self)
271 self._parent.setDTDHandler(self)
272 self._parent.parse(source)
273
274 def setLocale(self, locale):
275 self._parent.setLocale(locale)
Fred Drakea12adfe2000-09-18 17:40:22 +0000276
Fred Drake45cd9de2000-06-29 19:34:54 +0000277 def getFeature(self, name):
278 return self._parent.getFeature(name)
279
280 def setFeature(self, name, state):
281 self._parent.setFeature(name, state)
282
283 def getProperty(self, name):
284 return self._parent.getProperty(name)
285
286 def setProperty(self, name, value):
287 self._parent.setProperty(name, value)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000288
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000289 # XMLFilter methods
290
291 def getParent(self):
292 return self._parent
293
294 def setParent(self, parent):
295 self._parent = parent
296
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000297# --- Utility functions
298
299def prepare_input_source(source, base = ""):
300 """This function takes an InputSource and an optional base URL and
301 returns a fully resolved InputSource object ready for reading."""
Fred Drake16f63292000-10-23 18:09:50 +0000302
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000303 if type(source) in _StringTypes:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000304 source = xmlreader.InputSource(source)
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000305 elif hasattr(source, "read"):
306 f = source
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000307 source = xmlreader.InputSource()
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000308 source.setByteStream(f)
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000309 if hasattr(f, "name"):
Lars Gustäbel4ced5e72000-10-24 15:53:12 +0000310 source.setSystemId(f.name)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000311
Fred Drake0872e052000-09-26 17:23:09 +0000312 if source.getByteStream() is None:
Serhiy Storchaka8673ab92013-02-02 10:28:30 +0200313 try:
314 sysid = source.getSystemId()
315 basehead = os.path.dirname(os.path.normpath(base))
316 encoding = sys.getfilesystemencoding()
317 if isinstance(sysid, unicode):
318 if not isinstance(basehead, unicode):
319 try:
320 basehead = basehead.decode(encoding)
321 except UnicodeDecodeError:
322 sysid = sysid.encode(encoding)
323 else:
324 if isinstance(basehead, unicode):
325 try:
326 sysid = sysid.decode(encoding)
327 except UnicodeDecodeError:
328 basehead = basehead.encode(encoding)
329 sysidfilename = os.path.join(basehead, sysid)
330 isfile = os.path.isfile(sysidfilename)
331 except UnicodeError:
332 isfile = False
333 if isfile:
Raymond Hettinger06d9b1f2004-10-20 08:21:19 +0000334 source.setSystemId(sysidfilename)
335 f = open(sysidfilename, "rb")
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000336 else:
Serhiy Storchaka8673ab92013-02-02 10:28:30 +0200337 source.setSystemId(urlparse.urljoin(base, source.getSystemId()))
Fred Drake0872e052000-09-26 17:23:09 +0000338 f = urllib.urlopen(source.getSystemId())
Fred Drake16f63292000-10-23 18:09:50 +0000339
Fred Drake0872e052000-09-26 17:23:09 +0000340 source.setByteStream(f)
Fred Drake16f63292000-10-23 18:09:50 +0000341
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000342 return source