blob: 97d65d8fd33f803f808d6358ab2773835ceedca6 [file] [log] [blame]
Fred Drakea12adfe2000-09-18 17:40:22 +00001"""\
2A library of useful helper classes to the SAX classes, for the
Fred Drake45cd9de2000-06-29 19:34:54 +00003convenience of application and driver writers.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Martin v. Löwis58af43f2000-09-24 21:31:06 +00006import os, urlparse, urllib, types
Fred Drake45cd9de2000-06-29 19:34:54 +00007import handler
Lars Gustäbelfc643c32000-09-24 10:53:31 +00008import xmlreader
Fred Drakea12adfe2000-09-18 17:40:22 +00009
Fred Drake95b4ec52000-12-16 01:45:11 +000010try:
11 _StringTypes = [types.StringType, types.UnicodeType]
12except AttributeError:
13 _StringTypes = [types.StringType]
14
Martin v. Löwisae207222004-05-06 02:22:43 +000015# See whether the xmlcharrefreplace error handler is
16# supported
17try:
18 from codecs import xmlcharrefreplace_errors
19 _error_handling = "xmlcharrefreplace"
20 del xmlcharrefreplace_errors
21except ImportError:
22 _error_handling = "strict"
23
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000024def __dict_replace(s, d):
25 """Replace substrings of a string using a dictionary."""
26 for key, value in d.items():
27 s = s.replace(key, value)
28 return s
Martin v. Löwis58af43f2000-09-24 21:31:06 +000029
Fred Drakea12adfe2000-09-18 17:40:22 +000030def escape(data, entities={}):
Fred Drake45cd9de2000-06-29 19:34:54 +000031 """Escape &, <, and > in a string of data.
Tim Peters0eadaac2003-04-24 16:02:54 +000032
Fred Drake16f63292000-10-23 18:09:50 +000033 You can escape other strings of data by passing a dictionary as
Fred Drake45cd9de2000-06-29 19:34:54 +000034 the optional entities parameter. The keys and values must all be
35 strings; each key will be replaced with its corresponding value.
36 """
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000037
38 # must do ampersand first
Fred Drakea12adfe2000-09-18 17:40:22 +000039 data = data.replace("&", "&amp;")
Fred Drakef55222d2002-10-28 17:29:01 +000040 data = data.replace(">", "&gt;")
41 data = data.replace("<", "&lt;")
42 if entities:
43 data = __dict_replace(data, entities)
44 return data
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000045
46def unescape(data, entities={}):
47 """Unescape &amp;, &lt;, and &gt; in a string of data.
48
49 You can unescape other strings of data by passing a dictionary as
50 the optional entities parameter. The keys and values must all be
51 strings; each key will be replaced with its corresponding value.
52 """
Fred Drakef55222d2002-10-28 17:29:01 +000053 data = data.replace("&lt;", "<")
54 data = data.replace("&gt;", ">")
Fred Drakef55222d2002-10-28 17:29:01 +000055 if entities:
56 data = __dict_replace(data, entities)
Fred Drake407fea52002-10-28 17:46:59 +000057 # must do ampersand last
Fred Drake6d890502002-10-28 18:09:41 +000058 return data.replace("&amp;", "&")
Fred Drake45cd9de2000-06-29 19:34:54 +000059
Fred Drakeacd32d32001-07-19 16:10:15 +000060def quoteattr(data, entities={}):
61 """Escape and quote an attribute value.
62
63 Escape &, <, and > in a string of data, then quote it for use as
64 an attribute value. The \" character will be escaped as well, if
65 necessary.
66
67 You can escape other strings of data by passing a dictionary as
68 the optional entities parameter. The keys and values must all be
69 strings; each key will be replaced with its corresponding value.
70 """
Andrew M. Kuchling91c64a02006-06-09 13:15:57 +000071 entities = entities.copy()
72 entities.update({'\n': '&#10;', '\r': '&#13;', '\t':'&#9;'})
Fred Drakeacd32d32001-07-19 16:10:15 +000073 data = escape(data, entities)
74 if '"' in data:
75 if "'" in data:
76 data = '"%s"' % data.replace('"', "&quot;")
77 else:
78 data = "'%s'" % data
79 else:
80 data = '"%s"' % data
81 return data
82
Fred Drakea12adfe2000-09-18 17:40:22 +000083
Fred Drake45cd9de2000-06-29 19:34:54 +000084class XMLGenerator(handler.ContentHandler):
85
Lars Gustäbelc5cec512000-09-21 08:25:28 +000086 def __init__(self, out=None, encoding="iso-8859-1"):
Fred Drakea12adfe2000-09-18 17:40:22 +000087 if out is None:
88 import sys
89 out = sys.stdout
Fred Drake45cd9de2000-06-29 19:34:54 +000090 handler.ContentHandler.__init__(self)
91 self._out = out
Lars Gustäbelc5cec512000-09-21 08:25:28 +000092 self._ns_contexts = [{}] # contains uri -> prefix dicts
93 self._current_context = self._ns_contexts[-1]
Lars Gustäbelfc643c32000-09-24 10:53:31 +000094 self._undeclared_ns_maps = []
Lars Gustäbelc5cec512000-09-21 08:25:28 +000095 self._encoding = encoding
Fred Drake45cd9de2000-06-29 19:34:54 +000096
Martin v. Löwisae207222004-05-06 02:22:43 +000097 def _write(self, text):
98 if isinstance(text, str):
99 self._out.write(text)
100 else:
101 self._out.write(text.encode(self._encoding, _error_handling))
102
Martin v. Löwis2bad58f2007-02-12 12:21:10 +0000103 def _qname(self, name):
104 """Builds a qualified name from a (ns_url, localname) pair"""
105 if name[0]:
Antoine Pitrou7f081022010-10-27 18:43:21 +0000106 # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is
107 # bound by definition to http://www.w3.org/XML/1998/namespace. It
108 # does not need to be declared and will not usually be found in
109 # self._current_context.
110 if 'http://www.w3.org/XML/1998/namespace' == name[0]:
111 return 'xml:' + name[1]
Martin v. Löwis2bad58f2007-02-12 12:21:10 +0000112 # The name is in a non-empty namespace
113 prefix = self._current_context[name[0]]
114 if prefix:
115 # If it is not the default namespace, prepend the prefix
116 return prefix + ":" + name[1]
117 # Return the unqualified name
118 return name[1]
119
Fred Drake45cd9de2000-06-29 19:34:54 +0000120 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000121
Fred Drake45cd9de2000-06-29 19:34:54 +0000122 def startDocument(self):
Martin v. Löwisae207222004-05-06 02:22:43 +0000123 self._write('<?xml version="1.0" encoding="%s"?>\n' %
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000124 self._encoding)
Fred Drake45cd9de2000-06-29 19:34:54 +0000125
126 def startPrefixMapping(self, prefix, uri):
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000127 self._ns_contexts.append(self._current_context.copy())
128 self._current_context[uri] = prefix
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000129 self._undeclared_ns_maps.append((prefix, uri))
Fred Drake45cd9de2000-06-29 19:34:54 +0000130
131 def endPrefixMapping(self, prefix):
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000132 self._current_context = self._ns_contexts[-1]
133 del self._ns_contexts[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +0000134
135 def startElement(self, name, attrs):
Martin v. Löwisae207222004-05-06 02:22:43 +0000136 self._write('<' + name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000137 for (name, value) in attrs.items():
Martin v. Löwisae207222004-05-06 02:22:43 +0000138 self._write(' %s=%s' % (name, quoteattr(value)))
139 self._write('>')
Fred Drake16f63292000-10-23 18:09:50 +0000140
Fred Drake45cd9de2000-06-29 19:34:54 +0000141 def endElement(self, name):
Martin v. Löwisae207222004-05-06 02:22:43 +0000142 self._write('</%s>' % name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000143
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000144 def startElementNS(self, name, qname, attrs):
Martin v. Löwis2bad58f2007-02-12 12:21:10 +0000145 self._write('<' + self._qname(name))
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000146
Martin v. Löwis2bad58f2007-02-12 12:21:10 +0000147 for prefix, uri in self._undeclared_ns_maps:
148 if prefix:
149 self._out.write(' xmlns:%s="%s"' % (prefix, uri))
150 else:
151 self._out.write(' xmlns="%s"' % uri)
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000152 self._undeclared_ns_maps = []
Fred Drake16f63292000-10-23 18:09:50 +0000153
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000154 for (name, value) in attrs.items():
Martin v. Löwis2bad58f2007-02-12 12:21:10 +0000155 self._write(' %s=%s' % (self._qname(name), quoteattr(value)))
Martin v. Löwisae207222004-05-06 02:22:43 +0000156 self._write('>')
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000157
158 def endElementNS(self, name, qname):
Martin v. Löwis2bad58f2007-02-12 12:21:10 +0000159 self._write('</%s>' % self._qname(name))
Fred Drake16f63292000-10-23 18:09:50 +0000160
Fred Drake45cd9de2000-06-29 19:34:54 +0000161 def characters(self, content):
Martin v. Löwisae207222004-05-06 02:22:43 +0000162 self._write(escape(content))
Fred Drake45cd9de2000-06-29 19:34:54 +0000163
164 def ignorableWhitespace(self, content):
Martin v. Löwisae207222004-05-06 02:22:43 +0000165 self._write(content)
Fred Drakea12adfe2000-09-18 17:40:22 +0000166
Fred Drake45cd9de2000-06-29 19:34:54 +0000167 def processingInstruction(self, target, data):
Martin v. Löwisae207222004-05-06 02:22:43 +0000168 self._write('<?%s %s?>' % (target, data))
Fred Drake45cd9de2000-06-29 19:34:54 +0000169
Fred Drakea12adfe2000-09-18 17:40:22 +0000170
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000171class XMLFilterBase(xmlreader.XMLReader):
Fred Drake45cd9de2000-06-29 19:34:54 +0000172 """This class is designed to sit between an XMLReader and the
173 client application's event handlers. By default, it does nothing
174 but pass requests up to the reader and events on to the handlers
175 unmodified, but subclasses can override specific methods to modify
176 the event stream or the configuration requests as they pass
177 through."""
178
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000179 def __init__(self, parent = None):
180 xmlreader.XMLReader.__init__(self)
181 self._parent = parent
Fred Drake16f63292000-10-23 18:09:50 +0000182
Fred Drake45cd9de2000-06-29 19:34:54 +0000183 # ErrorHandler methods
184
185 def error(self, exception):
186 self._err_handler.error(exception)
187
188 def fatalError(self, exception):
189 self._err_handler.fatalError(exception)
190
191 def warning(self, exception):
192 self._err_handler.warning(exception)
193
194 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000195
Fred Drake45cd9de2000-06-29 19:34:54 +0000196 def setDocumentLocator(self, locator):
197 self._cont_handler.setDocumentLocator(locator)
Fred Drakea12adfe2000-09-18 17:40:22 +0000198
Fred Drake45cd9de2000-06-29 19:34:54 +0000199 def startDocument(self):
200 self._cont_handler.startDocument()
201
202 def endDocument(self):
203 self._cont_handler.endDocument()
204
205 def startPrefixMapping(self, prefix, uri):
206 self._cont_handler.startPrefixMapping(prefix, uri)
207
208 def endPrefixMapping(self, prefix):
209 self._cont_handler.endPrefixMapping(prefix)
210
211 def startElement(self, name, attrs):
212 self._cont_handler.startElement(name, attrs)
213
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000214 def endElement(self, name):
215 self._cont_handler.endElement(name)
216
217 def startElementNS(self, name, qname, attrs):
Martin v. Löwis0ea558f2004-05-06 02:04:21 +0000218 self._cont_handler.startElementNS(name, qname, attrs)
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000219
220 def endElementNS(self, name, qname):
221 self._cont_handler.endElementNS(name, qname)
Fred Drake45cd9de2000-06-29 19:34:54 +0000222
223 def characters(self, content):
224 self._cont_handler.characters(content)
225
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000226 def ignorableWhitespace(self, chars):
227 self._cont_handler.ignorableWhitespace(chars)
Fred Drake45cd9de2000-06-29 19:34:54 +0000228
229 def processingInstruction(self, target, data):
230 self._cont_handler.processingInstruction(target, data)
231
232 def skippedEntity(self, name):
233 self._cont_handler.skippedEntity(name)
234
235 # DTDHandler methods
236
237 def notationDecl(self, name, publicId, systemId):
238 self._dtd_handler.notationDecl(name, publicId, systemId)
239
240 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
241 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
242
243 # EntityResolver methods
244
245 def resolveEntity(self, publicId, systemId):
Fred Drakee4772f32005-02-03 17:31:39 +0000246 return self._ent_handler.resolveEntity(publicId, systemId)
Fred Drake45cd9de2000-06-29 19:34:54 +0000247
248 # XMLReader methods
249
250 def parse(self, source):
251 self._parent.setContentHandler(self)
252 self._parent.setErrorHandler(self)
253 self._parent.setEntityResolver(self)
254 self._parent.setDTDHandler(self)
255 self._parent.parse(source)
256
257 def setLocale(self, locale):
258 self._parent.setLocale(locale)
Fred Drakea12adfe2000-09-18 17:40:22 +0000259
Fred Drake45cd9de2000-06-29 19:34:54 +0000260 def getFeature(self, name):
261 return self._parent.getFeature(name)
262
263 def setFeature(self, name, state):
264 self._parent.setFeature(name, state)
265
266 def getProperty(self, name):
267 return self._parent.getProperty(name)
268
269 def setProperty(self, name, value):
270 self._parent.setProperty(name, value)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000271
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000272 # XMLFilter methods
273
274 def getParent(self):
275 return self._parent
276
277 def setParent(self, parent):
278 self._parent = parent
279
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000280# --- Utility functions
281
282def prepare_input_source(source, base = ""):
283 """This function takes an InputSource and an optional base URL and
284 returns a fully resolved InputSource object ready for reading."""
Fred Drake16f63292000-10-23 18:09:50 +0000285
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000286 if type(source) in _StringTypes:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000287 source = xmlreader.InputSource(source)
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000288 elif hasattr(source, "read"):
289 f = source
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000290 source = xmlreader.InputSource()
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000291 source.setByteStream(f)
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000292 if hasattr(f, "name"):
Lars Gustäbel4ced5e72000-10-24 15:53:12 +0000293 source.setSystemId(f.name)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000294
Fred Drake0872e052000-09-26 17:23:09 +0000295 if source.getByteStream() is None:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000296 sysid = source.getSystemId()
Fred Drake910b2822004-10-20 11:08:35 +0000297 basehead = os.path.dirname(os.path.normpath(base))
Raymond Hettinger06d9b1f2004-10-20 08:21:19 +0000298 sysidfilename = os.path.join(basehead, sysid)
299 if os.path.isfile(sysidfilename):
300 source.setSystemId(sysidfilename)
301 f = open(sysidfilename, "rb")
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000302 else:
303 source.setSystemId(urlparse.urljoin(base, sysid))
Fred Drake0872e052000-09-26 17:23:09 +0000304 f = urllib.urlopen(source.getSystemId())
Fred Drake16f63292000-10-23 18:09:50 +0000305
Fred Drake0872e052000-09-26 17:23:09 +0000306 source.setByteStream(f)
Fred Drake16f63292000-10-23 18:09:50 +0000307
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000308 return source