blob: ff9f59627445c7c7ff9996c124704861adb93e6d [file] [log] [blame]
Fred Drakea12adfe2000-09-18 17:40:22 +00001"""\
2A library of useful helper classes to the SAX classes, for the
Fred Drake45cd9de2000-06-29 19:34:54 +00003convenience of application and driver writers.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Martin v. Löwis58af43f2000-09-24 21:31:06 +00006import os, urlparse, urllib, types
Guido van Rossum3b271052006-08-17 09:10:09 +00007from . import handler
8from . import xmlreader
Fred Drakea12adfe2000-09-18 17:40:22 +00009
Fred Drake95b4ec52000-12-16 01:45:11 +000010try:
11 _StringTypes = [types.StringType, types.UnicodeType]
12except AttributeError:
Walter Dörwald93f39102007-05-29 19:31:48 +000013 try:
14 _StringTypes = [types.StringType]
15 except AttributeError:
16 _StringTypes = [str]
Fred Drake95b4ec52000-12-16 01:45:11 +000017
Martin v. Löwisae207222004-05-06 02:22:43 +000018# See whether the xmlcharrefreplace error handler is
19# supported
20try:
21 from codecs import xmlcharrefreplace_errors
22 _error_handling = "xmlcharrefreplace"
23 del xmlcharrefreplace_errors
24except ImportError:
25 _error_handling = "strict"
26
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000027def __dict_replace(s, d):
28 """Replace substrings of a string using a dictionary."""
29 for key, value in d.items():
30 s = s.replace(key, value)
31 return s
Martin v. Löwis58af43f2000-09-24 21:31:06 +000032
Fred Drakea12adfe2000-09-18 17:40:22 +000033def escape(data, entities={}):
Fred Drake45cd9de2000-06-29 19:34:54 +000034 """Escape &, <, and > in a string of data.
Tim Peters0eadaac2003-04-24 16:02:54 +000035
Fred Drake16f63292000-10-23 18:09:50 +000036 You can escape other strings of data by passing a dictionary as
Fred Drake45cd9de2000-06-29 19:34:54 +000037 the optional entities parameter. The keys and values must all be
38 strings; each key will be replaced with its corresponding value.
39 """
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000040
41 # must do ampersand first
Fred Drakea12adfe2000-09-18 17:40:22 +000042 data = data.replace("&", "&amp;")
Fred Drakef55222d2002-10-28 17:29:01 +000043 data = data.replace(">", "&gt;")
44 data = data.replace("<", "&lt;")
45 if entities:
46 data = __dict_replace(data, entities)
47 return data
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000048
49def unescape(data, entities={}):
50 """Unescape &amp;, &lt;, and &gt; in a string of data.
51
52 You can unescape other strings of data by passing a dictionary as
53 the optional entities parameter. The keys and values must all be
54 strings; each key will be replaced with its corresponding value.
55 """
Fred Drakef55222d2002-10-28 17:29:01 +000056 data = data.replace("&lt;", "<")
57 data = data.replace("&gt;", ">")
Fred Drakef55222d2002-10-28 17:29:01 +000058 if entities:
59 data = __dict_replace(data, entities)
Fred Drake407fea52002-10-28 17:46:59 +000060 # must do ampersand last
Fred Drake6d890502002-10-28 18:09:41 +000061 return data.replace("&amp;", "&")
Fred Drake45cd9de2000-06-29 19:34:54 +000062
Fred Drakeacd32d32001-07-19 16:10:15 +000063def quoteattr(data, entities={}):
64 """Escape and quote an attribute value.
65
66 Escape &, <, and > in a string of data, then quote it for use as
67 an attribute value. The \" character will be escaped as well, if
68 necessary.
69
70 You can escape other strings of data by passing a dictionary as
71 the optional entities parameter. The keys and values must all be
72 strings; each key will be replaced with its corresponding value.
73 """
Thomas Wouters0e3f5912006-08-11 14:57:12 +000074 entities = entities.copy()
75 entities.update({'\n': '&#10;', '\r': '&#13;', '\t':'&#9;'})
Fred Drakeacd32d32001-07-19 16:10:15 +000076 data = escape(data, entities)
77 if '"' in data:
78 if "'" in data:
79 data = '"%s"' % data.replace('"', "&quot;")
80 else:
81 data = "'%s'" % data
82 else:
83 data = '"%s"' % data
84 return data
85
Fred Drakea12adfe2000-09-18 17:40:22 +000086
Fred Drake45cd9de2000-06-29 19:34:54 +000087class XMLGenerator(handler.ContentHandler):
88
Lars Gustäbelc5cec512000-09-21 08:25:28 +000089 def __init__(self, out=None, encoding="iso-8859-1"):
Fred Drakea12adfe2000-09-18 17:40:22 +000090 if out is None:
91 import sys
92 out = sys.stdout
Fred Drake45cd9de2000-06-29 19:34:54 +000093 handler.ContentHandler.__init__(self)
94 self._out = out
Lars Gustäbelc5cec512000-09-21 08:25:28 +000095 self._ns_contexts = [{}] # contains uri -> prefix dicts
96 self._current_context = self._ns_contexts[-1]
Lars Gustäbelfc643c32000-09-24 10:53:31 +000097 self._undeclared_ns_maps = []
Lars Gustäbelc5cec512000-09-21 08:25:28 +000098 self._encoding = encoding
Fred Drake45cd9de2000-06-29 19:34:54 +000099
Martin v. Löwisae207222004-05-06 02:22:43 +0000100 def _write(self, text):
101 if isinstance(text, str):
102 self._out.write(text)
103 else:
104 self._out.write(text.encode(self._encoding, _error_handling))
105
Thomas Wouterscf297e42007-02-23 15:07:44 +0000106 def _qname(self, name):
107 """Builds a qualified name from a (ns_url, localname) pair"""
108 if name[0]:
109 # The name is in a non-empty namespace
110 prefix = self._current_context[name[0]]
111 if prefix:
112 # If it is not the default namespace, prepend the prefix
113 return prefix + ":" + name[1]
114 # Return the unqualified name
115 return name[1]
116
Fred Drake45cd9de2000-06-29 19:34:54 +0000117 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000118
Fred Drake45cd9de2000-06-29 19:34:54 +0000119 def startDocument(self):
Martin v. Löwisae207222004-05-06 02:22:43 +0000120 self._write('<?xml version="1.0" encoding="%s"?>\n' %
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000121 self._encoding)
Fred Drake45cd9de2000-06-29 19:34:54 +0000122
123 def startPrefixMapping(self, prefix, uri):
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000124 self._ns_contexts.append(self._current_context.copy())
125 self._current_context[uri] = prefix
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000126 self._undeclared_ns_maps.append((prefix, uri))
Fred Drake45cd9de2000-06-29 19:34:54 +0000127
128 def endPrefixMapping(self, prefix):
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000129 self._current_context = self._ns_contexts[-1]
130 del self._ns_contexts[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +0000131
132 def startElement(self, name, attrs):
Martin v. Löwisae207222004-05-06 02:22:43 +0000133 self._write('<' + name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000134 for (name, value) in attrs.items():
Martin v. Löwisae207222004-05-06 02:22:43 +0000135 self._write(' %s=%s' % (name, quoteattr(value)))
136 self._write('>')
Fred Drake16f63292000-10-23 18:09:50 +0000137
Fred Drake45cd9de2000-06-29 19:34:54 +0000138 def endElement(self, name):
Martin v. Löwisae207222004-05-06 02:22:43 +0000139 self._write('</%s>' % name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000140
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000141 def startElementNS(self, name, qname, attrs):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000142 self._write('<' + self._qname(name))
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000143
Thomas Wouterscf297e42007-02-23 15:07:44 +0000144 for prefix, uri in self._undeclared_ns_maps:
145 if prefix:
146 self._out.write(' xmlns:%s="%s"' % (prefix, uri))
147 else:
148 self._out.write(' xmlns="%s"' % uri)
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000149 self._undeclared_ns_maps = []
Fred Drake16f63292000-10-23 18:09:50 +0000150
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000151 for (name, value) in attrs.items():
Thomas Wouterscf297e42007-02-23 15:07:44 +0000152 self._write(' %s=%s' % (self._qname(name), quoteattr(value)))
Martin v. Löwisae207222004-05-06 02:22:43 +0000153 self._write('>')
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000154
155 def endElementNS(self, name, qname):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000156 self._write('</%s>' % self._qname(name))
Fred Drake16f63292000-10-23 18:09:50 +0000157
Fred Drake45cd9de2000-06-29 19:34:54 +0000158 def characters(self, content):
Martin v. Löwisae207222004-05-06 02:22:43 +0000159 self._write(escape(content))
Fred Drake45cd9de2000-06-29 19:34:54 +0000160
161 def ignorableWhitespace(self, content):
Martin v. Löwisae207222004-05-06 02:22:43 +0000162 self._write(content)
Fred Drakea12adfe2000-09-18 17:40:22 +0000163
Fred Drake45cd9de2000-06-29 19:34:54 +0000164 def processingInstruction(self, target, data):
Martin v. Löwisae207222004-05-06 02:22:43 +0000165 self._write('<?%s %s?>' % (target, data))
Fred Drake45cd9de2000-06-29 19:34:54 +0000166
Fred Drakea12adfe2000-09-18 17:40:22 +0000167
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000168class XMLFilterBase(xmlreader.XMLReader):
Fred Drake45cd9de2000-06-29 19:34:54 +0000169 """This class is designed to sit between an XMLReader and the
170 client application's event handlers. By default, it does nothing
171 but pass requests up to the reader and events on to the handlers
172 unmodified, but subclasses can override specific methods to modify
173 the event stream or the configuration requests as they pass
174 through."""
175
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000176 def __init__(self, parent = None):
177 xmlreader.XMLReader.__init__(self)
178 self._parent = parent
Fred Drake16f63292000-10-23 18:09:50 +0000179
Fred Drake45cd9de2000-06-29 19:34:54 +0000180 # ErrorHandler methods
181
182 def error(self, exception):
183 self._err_handler.error(exception)
184
185 def fatalError(self, exception):
186 self._err_handler.fatalError(exception)
187
188 def warning(self, exception):
189 self._err_handler.warning(exception)
190
191 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000192
Fred Drake45cd9de2000-06-29 19:34:54 +0000193 def setDocumentLocator(self, locator):
194 self._cont_handler.setDocumentLocator(locator)
Fred Drakea12adfe2000-09-18 17:40:22 +0000195
Fred Drake45cd9de2000-06-29 19:34:54 +0000196 def startDocument(self):
197 self._cont_handler.startDocument()
198
199 def endDocument(self):
200 self._cont_handler.endDocument()
201
202 def startPrefixMapping(self, prefix, uri):
203 self._cont_handler.startPrefixMapping(prefix, uri)
204
205 def endPrefixMapping(self, prefix):
206 self._cont_handler.endPrefixMapping(prefix)
207
208 def startElement(self, name, attrs):
209 self._cont_handler.startElement(name, attrs)
210
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000211 def endElement(self, name):
212 self._cont_handler.endElement(name)
213
214 def startElementNS(self, name, qname, attrs):
Martin v. Löwis0ea558f2004-05-06 02:04:21 +0000215 self._cont_handler.startElementNS(name, qname, attrs)
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000216
217 def endElementNS(self, name, qname):
218 self._cont_handler.endElementNS(name, qname)
Fred Drake45cd9de2000-06-29 19:34:54 +0000219
220 def characters(self, content):
221 self._cont_handler.characters(content)
222
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000223 def ignorableWhitespace(self, chars):
224 self._cont_handler.ignorableWhitespace(chars)
Fred Drake45cd9de2000-06-29 19:34:54 +0000225
226 def processingInstruction(self, target, data):
227 self._cont_handler.processingInstruction(target, data)
228
229 def skippedEntity(self, name):
230 self._cont_handler.skippedEntity(name)
231
232 # DTDHandler methods
233
234 def notationDecl(self, name, publicId, systemId):
235 self._dtd_handler.notationDecl(name, publicId, systemId)
236
237 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
238 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
239
240 # EntityResolver methods
241
242 def resolveEntity(self, publicId, systemId):
Fred Drakee4772f32005-02-03 17:31:39 +0000243 return self._ent_handler.resolveEntity(publicId, systemId)
Fred Drake45cd9de2000-06-29 19:34:54 +0000244
245 # XMLReader methods
246
247 def parse(self, source):
248 self._parent.setContentHandler(self)
249 self._parent.setErrorHandler(self)
250 self._parent.setEntityResolver(self)
251 self._parent.setDTDHandler(self)
252 self._parent.parse(source)
253
254 def setLocale(self, locale):
255 self._parent.setLocale(locale)
Fred Drakea12adfe2000-09-18 17:40:22 +0000256
Fred Drake45cd9de2000-06-29 19:34:54 +0000257 def getFeature(self, name):
258 return self._parent.getFeature(name)
259
260 def setFeature(self, name, state):
261 self._parent.setFeature(name, state)
262
263 def getProperty(self, name):
264 return self._parent.getProperty(name)
265
266 def setProperty(self, name, value):
267 self._parent.setProperty(name, value)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000268
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000269 # XMLFilter methods
270
271 def getParent(self):
272 return self._parent
273
274 def setParent(self, parent):
275 self._parent = parent
276
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000277# --- Utility functions
278
279def prepare_input_source(source, base = ""):
280 """This function takes an InputSource and an optional base URL and
281 returns a fully resolved InputSource object ready for reading."""
Fred Drake16f63292000-10-23 18:09:50 +0000282
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000283 if type(source) in _StringTypes:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000284 source = xmlreader.InputSource(source)
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000285 elif hasattr(source, "read"):
286 f = source
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000287 source = xmlreader.InputSource()
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000288 source.setByteStream(f)
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000289 if hasattr(f, "name"):
Lars Gustäbel4ced5e72000-10-24 15:53:12 +0000290 source.setSystemId(f.name)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000291
Fred Drake0872e052000-09-26 17:23:09 +0000292 if source.getByteStream() is None:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000293 sysid = source.getSystemId()
Fred Drake910b2822004-10-20 11:08:35 +0000294 basehead = os.path.dirname(os.path.normpath(base))
Raymond Hettinger06d9b1f2004-10-20 08:21:19 +0000295 sysidfilename = os.path.join(basehead, sysid)
296 if os.path.isfile(sysidfilename):
297 source.setSystemId(sysidfilename)
298 f = open(sysidfilename, "rb")
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000299 else:
300 source.setSystemId(urlparse.urljoin(base, sysid))
Fred Drake0872e052000-09-26 17:23:09 +0000301 f = urllib.urlopen(source.getSystemId())
Fred Drake16f63292000-10-23 18:09:50 +0000302
Fred Drake0872e052000-09-26 17:23:09 +0000303 source.setByteStream(f)
Fred Drake16f63292000-10-23 18:09:50 +0000304
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000305 return source