blob: 3402a02dadcc52b45284fb5ecfe84b6f5e35a486 [file] [log] [blame]
Fred Drakea12adfe2000-09-18 17:40:22 +00001"""\
2A library of useful helper classes to the SAX classes, for the
Fred Drake45cd9de2000-06-29 19:34:54 +00003convenience of application and driver writers.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Martin v. Löwis58af43f2000-09-24 21:31:06 +00006import os, urlparse, urllib, types
Fred Drake45cd9de2000-06-29 19:34:54 +00007import handler
Lars Gustäbelfc643c32000-09-24 10:53:31 +00008import xmlreader
Fred Drakea12adfe2000-09-18 17:40:22 +00009
Fred Drake95b4ec52000-12-16 01:45:11 +000010try:
11 _StringTypes = [types.StringType, types.UnicodeType]
12except AttributeError:
13 _StringTypes = [types.StringType]
14
Martin v. Löwisae207222004-05-06 02:22:43 +000015# See whether the xmlcharrefreplace error handler is
16# supported
17try:
18 from codecs import xmlcharrefreplace_errors
19 _error_handling = "xmlcharrefreplace"
20 del xmlcharrefreplace_errors
21except ImportError:
22 _error_handling = "strict"
23
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000024def __dict_replace(s, d):
25 """Replace substrings of a string using a dictionary."""
26 for key, value in d.items():
27 s = s.replace(key, value)
28 return s
Martin v. Löwis58af43f2000-09-24 21:31:06 +000029
Fred Drakea12adfe2000-09-18 17:40:22 +000030def escape(data, entities={}):
Fred Drake45cd9de2000-06-29 19:34:54 +000031 """Escape &, <, and > in a string of data.
Tim Peters0eadaac2003-04-24 16:02:54 +000032
Fred Drake16f63292000-10-23 18:09:50 +000033 You can escape other strings of data by passing a dictionary as
Fred Drake45cd9de2000-06-29 19:34:54 +000034 the optional entities parameter. The keys and values must all be
35 strings; each key will be replaced with its corresponding value.
36 """
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000037
38 # must do ampersand first
Fred Drakea12adfe2000-09-18 17:40:22 +000039 data = data.replace("&", "&amp;")
Fred Drakef55222d2002-10-28 17:29:01 +000040 data = data.replace(">", "&gt;")
41 data = data.replace("<", "&lt;")
42 if entities:
43 data = __dict_replace(data, entities)
44 return data
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000045
46def unescape(data, entities={}):
47 """Unescape &amp;, &lt;, and &gt; in a string of data.
48
49 You can unescape other strings of data by passing a dictionary as
50 the optional entities parameter. The keys and values must all be
51 strings; each key will be replaced with its corresponding value.
52 """
Fred Drakef55222d2002-10-28 17:29:01 +000053 data = data.replace("&lt;", "<")
54 data = data.replace("&gt;", ">")
Fred Drakef55222d2002-10-28 17:29:01 +000055 if entities:
56 data = __dict_replace(data, entities)
Fred Drake407fea52002-10-28 17:46:59 +000057 # must do ampersand last
Fred Drake6d890502002-10-28 18:09:41 +000058 return data.replace("&amp;", "&")
Fred Drake45cd9de2000-06-29 19:34:54 +000059
Fred Drakeacd32d32001-07-19 16:10:15 +000060def quoteattr(data, entities={}):
61 """Escape and quote an attribute value.
62
63 Escape &, <, and > in a string of data, then quote it for use as
64 an attribute value. The \" character will be escaped as well, if
65 necessary.
66
67 You can escape other strings of data by passing a dictionary as
68 the optional entities parameter. The keys and values must all be
69 strings; each key will be replaced with its corresponding value.
70 """
71 data = escape(data, entities)
72 if '"' in data:
73 if "'" in data:
74 data = '"%s"' % data.replace('"', "&quot;")
75 else:
76 data = "'%s'" % data
77 else:
78 data = '"%s"' % data
79 return data
80
Fred Drakea12adfe2000-09-18 17:40:22 +000081
Fred Drake45cd9de2000-06-29 19:34:54 +000082class XMLGenerator(handler.ContentHandler):
83
Lars Gustäbelc5cec512000-09-21 08:25:28 +000084 def __init__(self, out=None, encoding="iso-8859-1"):
Fred Drakea12adfe2000-09-18 17:40:22 +000085 if out is None:
86 import sys
87 out = sys.stdout
Fred Drake45cd9de2000-06-29 19:34:54 +000088 handler.ContentHandler.__init__(self)
89 self._out = out
Lars Gustäbelc5cec512000-09-21 08:25:28 +000090 self._ns_contexts = [{}] # contains uri -> prefix dicts
91 self._current_context = self._ns_contexts[-1]
Lars Gustäbelfc643c32000-09-24 10:53:31 +000092 self._undeclared_ns_maps = []
Lars Gustäbelc5cec512000-09-21 08:25:28 +000093 self._encoding = encoding
Fred Drake45cd9de2000-06-29 19:34:54 +000094
Martin v. Löwisae207222004-05-06 02:22:43 +000095 def _write(self, text):
96 if isinstance(text, str):
97 self._out.write(text)
98 else:
99 self._out.write(text.encode(self._encoding, _error_handling))
100
Fred Drake45cd9de2000-06-29 19:34:54 +0000101 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000102
Fred Drake45cd9de2000-06-29 19:34:54 +0000103 def startDocument(self):
Martin v. Löwisae207222004-05-06 02:22:43 +0000104 self._write('<?xml version="1.0" encoding="%s"?>\n' %
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000105 self._encoding)
Fred Drake45cd9de2000-06-29 19:34:54 +0000106
107 def startPrefixMapping(self, prefix, uri):
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000108 self._ns_contexts.append(self._current_context.copy())
109 self._current_context[uri] = prefix
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000110 self._undeclared_ns_maps.append((prefix, uri))
Fred Drake45cd9de2000-06-29 19:34:54 +0000111
112 def endPrefixMapping(self, prefix):
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000113 self._current_context = self._ns_contexts[-1]
114 del self._ns_contexts[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +0000115
116 def startElement(self, name, attrs):
Martin v. Löwisae207222004-05-06 02:22:43 +0000117 self._write('<' + name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000118 for (name, value) in attrs.items():
Martin v. Löwisae207222004-05-06 02:22:43 +0000119 self._write(' %s=%s' % (name, quoteattr(value)))
120 self._write('>')
Fred Drake16f63292000-10-23 18:09:50 +0000121
Fred Drake45cd9de2000-06-29 19:34:54 +0000122 def endElement(self, name):
Martin v. Löwisae207222004-05-06 02:22:43 +0000123 self._write('</%s>' % name)
Fred Drake45cd9de2000-06-29 19:34:54 +0000124
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000125 def startElementNS(self, name, qname, attrs):
Martin v. Löwiscf0a1cc2000-10-03 22:35:29 +0000126 if name[0] is None:
127 # if the name was not namespace-scoped, use the unqualified part
128 name = name[1]
129 else:
130 # else try to restore the original prefix from the namespace
131 name = self._current_context[name[0]] + ":" + name[1]
Martin v. Löwisae207222004-05-06 02:22:43 +0000132 self._write('<' + name)
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000133
134 for pair in self._undeclared_ns_maps:
Martin v. Löwisae207222004-05-06 02:22:43 +0000135 self._write(' xmlns:%s="%s"' % pair)
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000136 self._undeclared_ns_maps = []
Fred Drake16f63292000-10-23 18:09:50 +0000137
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000138 for (name, value) in attrs.items():
139 name = self._current_context[name[0]] + ":" + name[1]
Martin v. Löwisae207222004-05-06 02:22:43 +0000140 self._write(' %s=%s' % (name, quoteattr(value)))
141 self._write('>')
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000142
143 def endElementNS(self, name, qname):
Martin v. Löwiscf0a1cc2000-10-03 22:35:29 +0000144 if name[0] is None:
145 name = name[1]
146 else:
147 name = self._current_context[name[0]] + ":" + name[1]
Martin v. Löwisae207222004-05-06 02:22:43 +0000148 self._write('</%s>' % name)
Fred Drake16f63292000-10-23 18:09:50 +0000149
Fred Drake45cd9de2000-06-29 19:34:54 +0000150 def characters(self, content):
Martin v. Löwisae207222004-05-06 02:22:43 +0000151 self._write(escape(content))
Fred Drake45cd9de2000-06-29 19:34:54 +0000152
153 def ignorableWhitespace(self, content):
Martin v. Löwisae207222004-05-06 02:22:43 +0000154 self._write(content)
Fred Drakea12adfe2000-09-18 17:40:22 +0000155
Fred Drake45cd9de2000-06-29 19:34:54 +0000156 def processingInstruction(self, target, data):
Martin v. Löwisae207222004-05-06 02:22:43 +0000157 self._write('<?%s %s?>' % (target, data))
Fred Drake45cd9de2000-06-29 19:34:54 +0000158
Fred Drakea12adfe2000-09-18 17:40:22 +0000159
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000160class XMLFilterBase(xmlreader.XMLReader):
Fred Drake45cd9de2000-06-29 19:34:54 +0000161 """This class is designed to sit between an XMLReader and the
162 client application's event handlers. By default, it does nothing
163 but pass requests up to the reader and events on to the handlers
164 unmodified, but subclasses can override specific methods to modify
165 the event stream or the configuration requests as they pass
166 through."""
167
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000168 def __init__(self, parent = None):
169 xmlreader.XMLReader.__init__(self)
170 self._parent = parent
Fred Drake16f63292000-10-23 18:09:50 +0000171
Fred Drake45cd9de2000-06-29 19:34:54 +0000172 # ErrorHandler methods
173
174 def error(self, exception):
175 self._err_handler.error(exception)
176
177 def fatalError(self, exception):
178 self._err_handler.fatalError(exception)
179
180 def warning(self, exception):
181 self._err_handler.warning(exception)
182
183 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000184
Fred Drake45cd9de2000-06-29 19:34:54 +0000185 def setDocumentLocator(self, locator):
186 self._cont_handler.setDocumentLocator(locator)
Fred Drakea12adfe2000-09-18 17:40:22 +0000187
Fred Drake45cd9de2000-06-29 19:34:54 +0000188 def startDocument(self):
189 self._cont_handler.startDocument()
190
191 def endDocument(self):
192 self._cont_handler.endDocument()
193
194 def startPrefixMapping(self, prefix, uri):
195 self._cont_handler.startPrefixMapping(prefix, uri)
196
197 def endPrefixMapping(self, prefix):
198 self._cont_handler.endPrefixMapping(prefix)
199
200 def startElement(self, name, attrs):
201 self._cont_handler.startElement(name, attrs)
202
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000203 def endElement(self, name):
204 self._cont_handler.endElement(name)
205
206 def startElementNS(self, name, qname, attrs):
Martin v. Löwis0ea558f2004-05-06 02:04:21 +0000207 self._cont_handler.startElementNS(name, qname, attrs)
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000208
209 def endElementNS(self, name, qname):
210 self._cont_handler.endElementNS(name, qname)
Fred Drake45cd9de2000-06-29 19:34:54 +0000211
212 def characters(self, content):
213 self._cont_handler.characters(content)
214
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000215 def ignorableWhitespace(self, chars):
216 self._cont_handler.ignorableWhitespace(chars)
Fred Drake45cd9de2000-06-29 19:34:54 +0000217
218 def processingInstruction(self, target, data):
219 self._cont_handler.processingInstruction(target, data)
220
221 def skippedEntity(self, name):
222 self._cont_handler.skippedEntity(name)
223
224 # DTDHandler methods
225
226 def notationDecl(self, name, publicId, systemId):
227 self._dtd_handler.notationDecl(name, publicId, systemId)
228
229 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
230 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
231
232 # EntityResolver methods
233
234 def resolveEntity(self, publicId, systemId):
235 self._ent_handler.resolveEntity(publicId, systemId)
236
237 # XMLReader methods
238
239 def parse(self, source):
240 self._parent.setContentHandler(self)
241 self._parent.setErrorHandler(self)
242 self._parent.setEntityResolver(self)
243 self._parent.setDTDHandler(self)
244 self._parent.parse(source)
245
246 def setLocale(self, locale):
247 self._parent.setLocale(locale)
Fred Drakea12adfe2000-09-18 17:40:22 +0000248
Fred Drake45cd9de2000-06-29 19:34:54 +0000249 def getFeature(self, name):
250 return self._parent.getFeature(name)
251
252 def setFeature(self, name, state):
253 self._parent.setFeature(name, state)
254
255 def getProperty(self, name):
256 return self._parent.getProperty(name)
257
258 def setProperty(self, name, value):
259 self._parent.setProperty(name, value)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000260
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000261 # XMLFilter methods
262
263 def getParent(self):
264 return self._parent
265
266 def setParent(self, parent):
267 self._parent = parent
268
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000269# --- Utility functions
270
271def prepare_input_source(source, base = ""):
272 """This function takes an InputSource and an optional base URL and
273 returns a fully resolved InputSource object ready for reading."""
Fred Drake16f63292000-10-23 18:09:50 +0000274
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000275 if type(source) in _StringTypes:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000276 source = xmlreader.InputSource(source)
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000277 elif hasattr(source, "read"):
278 f = source
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000279 source = xmlreader.InputSource()
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000280 source.setByteStream(f)
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000281 if hasattr(f, "name"):
Lars Gustäbel4ced5e72000-10-24 15:53:12 +0000282 source.setSystemId(f.name)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000283
Fred Drake0872e052000-09-26 17:23:09 +0000284 if source.getByteStream() is None:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000285 sysid = source.getSystemId()
Fred Drake0872e052000-09-26 17:23:09 +0000286 if os.path.isfile(sysid):
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000287 basehead = os.path.split(os.path.normpath(base))[0]
288 source.setSystemId(os.path.join(basehead, sysid))
Fred Drake0872e052000-09-26 17:23:09 +0000289 f = open(sysid, "rb")
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000290 else:
291 source.setSystemId(urlparse.urljoin(base, sysid))
Fred Drake0872e052000-09-26 17:23:09 +0000292 f = urllib.urlopen(source.getSystemId())
Fred Drake16f63292000-10-23 18:09:50 +0000293
Fred Drake0872e052000-09-26 17:23:09 +0000294 source.setByteStream(f)
Fred Drake16f63292000-10-23 18:09:50 +0000295
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000296 return source