blob: 7989713f5880a39eaacaa23c4660f778fc389e2f [file] [log] [blame]
Fred Drakea12adfe2000-09-18 17:40:22 +00001"""\
2A library of useful helper classes to the SAX classes, for the
Fred Drake45cd9de2000-06-29 19:34:54 +00003convenience of application and driver writers.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Martin v. Löwis58af43f2000-09-24 21:31:06 +00006import os, urlparse, urllib, types
Serhiy Storchaka8673ab92013-02-02 10:28:30 +02007import sys
Fred Drake45cd9de2000-06-29 19:34:54 +00008import handler
Lars Gustäbelfc643c32000-09-24 10:53:31 +00009import xmlreader
Fred Drakea12adfe2000-09-18 17:40:22 +000010
Fred Drake95b4ec52000-12-16 01:45:11 +000011try:
12 _StringTypes = [types.StringType, types.UnicodeType]
13except AttributeError:
14 _StringTypes = [types.StringType]
15
Martin v. Löwisae207222004-05-06 02:22:43 +000016# See whether the xmlcharrefreplace error handler is
17# supported
18try:
19 from codecs import xmlcharrefreplace_errors
20 _error_handling = "xmlcharrefreplace"
21 del xmlcharrefreplace_errors
22except ImportError:
23 _error_handling = "strict"
24
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000025def __dict_replace(s, d):
26 """Replace substrings of a string using a dictionary."""
27 for key, value in d.items():
28 s = s.replace(key, value)
29 return s
Martin v. Löwis58af43f2000-09-24 21:31:06 +000030
Fred Drakea12adfe2000-09-18 17:40:22 +000031def escape(data, entities={}):
Fred Drake45cd9de2000-06-29 19:34:54 +000032 """Escape &, <, and > in a string of data.
Tim Peters0eadaac2003-04-24 16:02:54 +000033
Fred Drake16f63292000-10-23 18:09:50 +000034 You can escape other strings of data by passing a dictionary as
Fred Drake45cd9de2000-06-29 19:34:54 +000035 the optional entities parameter. The keys and values must all be
36 strings; each key will be replaced with its corresponding value.
37 """
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000038
39 # must do ampersand first
Fred Drakea12adfe2000-09-18 17:40:22 +000040 data = data.replace("&", "&amp;")
Fred Drakef55222d2002-10-28 17:29:01 +000041 data = data.replace(">", "&gt;")
42 data = data.replace("<", "&lt;")
43 if entities:
44 data = __dict_replace(data, entities)
45 return data
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000046
47def unescape(data, entities={}):
48 """Unescape &amp;, &lt;, and &gt; in a string of data.
49
50 You can unescape other strings of data by passing a dictionary as
51 the optional entities parameter. The keys and values must all be
52 strings; each key will be replaced with its corresponding value.
53 """
Fred Drakef55222d2002-10-28 17:29:01 +000054 data = data.replace("&lt;", "<")
55 data = data.replace("&gt;", ">")
Fred Drakef55222d2002-10-28 17:29:01 +000056 if entities:
57 data = __dict_replace(data, entities)
Fred Drake407fea52002-10-28 17:46:59 +000058 # must do ampersand last
Fred Drake6d890502002-10-28 18:09:41 +000059 return data.replace("&amp;", "&")
Fred Drake45cd9de2000-06-29 19:34:54 +000060
Fred Drakeacd32d32001-07-19 16:10:15 +000061def quoteattr(data, entities={}):
62 """Escape and quote an attribute value.
63
64 Escape &, <, and > in a string of data, then quote it for use as
65 an attribute value. The \" character will be escaped as well, if
66 necessary.
67
68 You can escape other strings of data by passing a dictionary as
69 the optional entities parameter. The keys and values must all be
70 strings; each key will be replaced with its corresponding value.
71 """
Andrew M. Kuchling91c64a02006-06-09 13:15:57 +000072 entities = entities.copy()
73 entities.update({'\n': '&#10;', '\r': '&#13;', '\t':'&#9;'})
Fred Drakeacd32d32001-07-19 16:10:15 +000074 data = escape(data, entities)
75 if '"' in data:
76 if "'" in data:
77 data = '"%s"' % data.replace('"', "&quot;")
78 else:
79 data = "'%s'" % data
80 else:
81 data = '"%s"' % data
82 return data
83
Fred Drakea12adfe2000-09-18 17:40:22 +000084
Fred Drake45cd9de2000-06-29 19:34:54 +000085class XMLGenerator(handler.ContentHandler):
86
Lars Gustäbelc5cec512000-09-21 08:25:28 +000087 def __init__(self, out=None, encoding="iso-8859-1"):
Fred Drakea12adfe2000-09-18 17:40:22 +000088 if out is None:
89 import sys
90 out = sys.stdout
Fred Drake45cd9de2000-06-29 19:34:54 +000091 handler.ContentHandler.__init__(self)
92 self._out = out
Lars Gustäbelc5cec512000-09-21 08:25:28 +000093 self._ns_contexts = [{}] # contains uri -> prefix dicts
94 self._current_context = self._ns_contexts[-1]
Lars Gustäbelfc643c32000-09-24 10:53:31 +000095 self._undeclared_ns_maps = []
Lars Gustäbelc5cec512000-09-21 08:25:28 +000096 self._encoding = encoding
Fred Drake45cd9de2000-06-29 19:34:54 +000097
Martin v. Löwisae207222004-05-06 02:22:43 +000098 def _write(self, text):
99 if isinstance(text, str):
100 self._out.write(text)
101 else:
102 self._out.write(text.encode(self._encoding, _error_handling))
103
Martin v. Löwis2bad58f2007-02-12 12:21:10 +0000104 def _qname(self, name):
105 """Builds a qualified name from a (ns_url, localname) pair"""
106 if name[0]:
Antoine Pitrou7f081022010-10-27 18:43:21 +0000107 # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is
108 # bound by definition to http://www.w3.org/XML/1998/namespace. It
109 # does not need to be declared and will not usually be found in
110 # self._current_context.
111 if 'http://www.w3.org/XML/1998/namespace' == name[0]:
112 return 'xml:' + name[1]
Martin v. Löwis2bad58f2007-02-12 12:21:10 +0000113 # The name is in a non-empty namespace
114 prefix = self._current_context[name[0]]
115 if prefix:
116 # If it is not the default namespace, prepend the prefix
117 return prefix + ":" + name[1]
118 # Return the unqualified name
119 return name[1]
120
Fred Drake45cd9de2000-06-29 19:34:54 +0000121 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000122
Fred Drake45cd9de2000-06-29 19:34:54 +0000123 def startDocument(self):
Martin v. Löwisae207222004-05-06 02:22:43 +0000124 self._write('<?xml version="1.0" encoding="%s"?>\n' %
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000125 self._encoding)
Fred Drake45cd9de2000-06-29 19:34:54 +0000126
127 def startPrefixMapping(self, prefix, uri):
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000128 self._ns_contexts.append(self._current_context.copy())
129 self._current_context[uri] = prefix
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000130 self._undeclared_ns_maps.append((prefix, uri))
Fred Drake45cd9de2000-06-29 19:34:54 +0000131
132 def endPrefixMapping(self, prefix):
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000133 self._current_context = self._ns_contexts[-1]
134 del self._ns_contexts[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +0000135
136 def startElement(self, name, attrs):
Martin v. Löwisae207222004-05-06 02:22:43 +0000137 self._write('<' + name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000138 for (name, value) in attrs.items():
Martin v. Löwisae207222004-05-06 02:22:43 +0000139 self._write(' %s=%s' % (name, quoteattr(value)))
140 self._write('>')
Fred Drake16f63292000-10-23 18:09:50 +0000141
Fred Drake45cd9de2000-06-29 19:34:54 +0000142 def endElement(self, name):
Martin v. Löwisae207222004-05-06 02:22:43 +0000143 self._write('</%s>' % name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000144
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000145 def startElementNS(self, name, qname, attrs):
Martin v. Löwis2bad58f2007-02-12 12:21:10 +0000146 self._write('<' + self._qname(name))
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000147
Martin v. Löwis2bad58f2007-02-12 12:21:10 +0000148 for prefix, uri in self._undeclared_ns_maps:
149 if prefix:
150 self._out.write(' xmlns:%s="%s"' % (prefix, uri))
151 else:
152 self._out.write(' xmlns="%s"' % uri)
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000153 self._undeclared_ns_maps = []
Fred Drake16f63292000-10-23 18:09:50 +0000154
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000155 for (name, value) in attrs.items():
Martin v. Löwis2bad58f2007-02-12 12:21:10 +0000156 self._write(' %s=%s' % (self._qname(name), quoteattr(value)))
Martin v. Löwisae207222004-05-06 02:22:43 +0000157 self._write('>')
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000158
159 def endElementNS(self, name, qname):
Martin v. Löwis2bad58f2007-02-12 12:21:10 +0000160 self._write('</%s>' % self._qname(name))
Fred Drake16f63292000-10-23 18:09:50 +0000161
Fred Drake45cd9de2000-06-29 19:34:54 +0000162 def characters(self, content):
Martin v. Löwisae207222004-05-06 02:22:43 +0000163 self._write(escape(content))
Fred Drake45cd9de2000-06-29 19:34:54 +0000164
165 def ignorableWhitespace(self, content):
Martin v. Löwisae207222004-05-06 02:22:43 +0000166 self._write(content)
Fred Drakea12adfe2000-09-18 17:40:22 +0000167
Fred Drake45cd9de2000-06-29 19:34:54 +0000168 def processingInstruction(self, target, data):
Martin v. Löwisae207222004-05-06 02:22:43 +0000169 self._write('<?%s %s?>' % (target, data))
Fred Drake45cd9de2000-06-29 19:34:54 +0000170
Fred Drakea12adfe2000-09-18 17:40:22 +0000171
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000172class XMLFilterBase(xmlreader.XMLReader):
Fred Drake45cd9de2000-06-29 19:34:54 +0000173 """This class is designed to sit between an XMLReader and the
174 client application's event handlers. By default, it does nothing
175 but pass requests up to the reader and events on to the handlers
176 unmodified, but subclasses can override specific methods to modify
177 the event stream or the configuration requests as they pass
178 through."""
179
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000180 def __init__(self, parent = None):
181 xmlreader.XMLReader.__init__(self)
182 self._parent = parent
Fred Drake16f63292000-10-23 18:09:50 +0000183
Fred Drake45cd9de2000-06-29 19:34:54 +0000184 # ErrorHandler methods
185
186 def error(self, exception):
187 self._err_handler.error(exception)
188
189 def fatalError(self, exception):
190 self._err_handler.fatalError(exception)
191
192 def warning(self, exception):
193 self._err_handler.warning(exception)
194
195 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000196
Fred Drake45cd9de2000-06-29 19:34:54 +0000197 def setDocumentLocator(self, locator):
198 self._cont_handler.setDocumentLocator(locator)
Fred Drakea12adfe2000-09-18 17:40:22 +0000199
Fred Drake45cd9de2000-06-29 19:34:54 +0000200 def startDocument(self):
201 self._cont_handler.startDocument()
202
203 def endDocument(self):
204 self._cont_handler.endDocument()
205
206 def startPrefixMapping(self, prefix, uri):
207 self._cont_handler.startPrefixMapping(prefix, uri)
208
209 def endPrefixMapping(self, prefix):
210 self._cont_handler.endPrefixMapping(prefix)
211
212 def startElement(self, name, attrs):
213 self._cont_handler.startElement(name, attrs)
214
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000215 def endElement(self, name):
216 self._cont_handler.endElement(name)
217
218 def startElementNS(self, name, qname, attrs):
Martin v. Löwis0ea558f2004-05-06 02:04:21 +0000219 self._cont_handler.startElementNS(name, qname, attrs)
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000220
221 def endElementNS(self, name, qname):
222 self._cont_handler.endElementNS(name, qname)
Fred Drake45cd9de2000-06-29 19:34:54 +0000223
224 def characters(self, content):
225 self._cont_handler.characters(content)
226
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000227 def ignorableWhitespace(self, chars):
228 self._cont_handler.ignorableWhitespace(chars)
Fred Drake45cd9de2000-06-29 19:34:54 +0000229
230 def processingInstruction(self, target, data):
231 self._cont_handler.processingInstruction(target, data)
232
233 def skippedEntity(self, name):
234 self._cont_handler.skippedEntity(name)
235
236 # DTDHandler methods
237
238 def notationDecl(self, name, publicId, systemId):
239 self._dtd_handler.notationDecl(name, publicId, systemId)
240
241 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
242 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
243
244 # EntityResolver methods
245
246 def resolveEntity(self, publicId, systemId):
Fred Drakee4772f32005-02-03 17:31:39 +0000247 return self._ent_handler.resolveEntity(publicId, systemId)
Fred Drake45cd9de2000-06-29 19:34:54 +0000248
249 # XMLReader methods
250
251 def parse(self, source):
252 self._parent.setContentHandler(self)
253 self._parent.setErrorHandler(self)
254 self._parent.setEntityResolver(self)
255 self._parent.setDTDHandler(self)
256 self._parent.parse(source)
257
258 def setLocale(self, locale):
259 self._parent.setLocale(locale)
Fred Drakea12adfe2000-09-18 17:40:22 +0000260
Fred Drake45cd9de2000-06-29 19:34:54 +0000261 def getFeature(self, name):
262 return self._parent.getFeature(name)
263
264 def setFeature(self, name, state):
265 self._parent.setFeature(name, state)
266
267 def getProperty(self, name):
268 return self._parent.getProperty(name)
269
270 def setProperty(self, name, value):
271 self._parent.setProperty(name, value)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000272
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000273 # XMLFilter methods
274
275 def getParent(self):
276 return self._parent
277
278 def setParent(self, parent):
279 self._parent = parent
280
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000281# --- Utility functions
282
283def prepare_input_source(source, base = ""):
284 """This function takes an InputSource and an optional base URL and
285 returns a fully resolved InputSource object ready for reading."""
Fred Drake16f63292000-10-23 18:09:50 +0000286
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000287 if type(source) in _StringTypes:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000288 source = xmlreader.InputSource(source)
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000289 elif hasattr(source, "read"):
290 f = source
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000291 source = xmlreader.InputSource()
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000292 source.setByteStream(f)
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000293 if hasattr(f, "name"):
Lars Gustäbel4ced5e72000-10-24 15:53:12 +0000294 source.setSystemId(f.name)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000295
Fred Drake0872e052000-09-26 17:23:09 +0000296 if source.getByteStream() is None:
Serhiy Storchaka8673ab92013-02-02 10:28:30 +0200297 try:
298 sysid = source.getSystemId()
299 basehead = os.path.dirname(os.path.normpath(base))
300 encoding = sys.getfilesystemencoding()
301 if isinstance(sysid, unicode):
302 if not isinstance(basehead, unicode):
303 try:
304 basehead = basehead.decode(encoding)
305 except UnicodeDecodeError:
306 sysid = sysid.encode(encoding)
307 else:
308 if isinstance(basehead, unicode):
309 try:
310 sysid = sysid.decode(encoding)
311 except UnicodeDecodeError:
312 basehead = basehead.encode(encoding)
313 sysidfilename = os.path.join(basehead, sysid)
314 isfile = os.path.isfile(sysidfilename)
315 except UnicodeError:
316 isfile = False
317 if isfile:
Raymond Hettinger06d9b1f2004-10-20 08:21:19 +0000318 source.setSystemId(sysidfilename)
319 f = open(sysidfilename, "rb")
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000320 else:
Serhiy Storchaka8673ab92013-02-02 10:28:30 +0200321 source.setSystemId(urlparse.urljoin(base, source.getSystemId()))
Fred Drake0872e052000-09-26 17:23:09 +0000322 f = urllib.urlopen(source.getSystemId())
Fred Drake16f63292000-10-23 18:09:50 +0000323
Fred Drake0872e052000-09-26 17:23:09 +0000324 source.setByteStream(f)
Fred Drake16f63292000-10-23 18:09:50 +0000325
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000326 return source