blob: 6544f171d9c8b1392ca1705f18c4c830042b458d [file] [log] [blame]
Fred Drakea12adfe2000-09-18 17:40:22 +00001"""\
2A library of useful helper classes to the SAX classes, for the
Fred Drake45cd9de2000-06-29 19:34:54 +00003convenience of application and driver writers.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Martin v. Löwis58af43f2000-09-24 21:31:06 +00006import os, urlparse, urllib, types
Fred Drake45cd9de2000-06-29 19:34:54 +00007import handler
Lars Gustäbelfc643c32000-09-24 10:53:31 +00008import xmlreader
Fred Drakea12adfe2000-09-18 17:40:22 +00009
Fred Drake95b4ec52000-12-16 01:45:11 +000010try:
11 _StringTypes = [types.StringType, types.UnicodeType]
12except AttributeError:
13 _StringTypes = [types.StringType]
14
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000015def __dict_replace(s, d):
16 """Replace substrings of a string using a dictionary."""
17 for key, value in d.items():
18 s = s.replace(key, value)
19 return s
Martin v. Löwis58af43f2000-09-24 21:31:06 +000020
Fred Drakea12adfe2000-09-18 17:40:22 +000021def escape(data, entities={}):
Fred Drake45cd9de2000-06-29 19:34:54 +000022 """Escape &, <, and > in a string of data.
Tim Peters0eadaac2003-04-24 16:02:54 +000023
Fred Drake16f63292000-10-23 18:09:50 +000024 You can escape other strings of data by passing a dictionary as
Fred Drake45cd9de2000-06-29 19:34:54 +000025 the optional entities parameter. The keys and values must all be
26 strings; each key will be replaced with its corresponding value.
27 """
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000028
29 # must do ampersand first
Fred Drakea12adfe2000-09-18 17:40:22 +000030 data = data.replace("&", "&amp;")
Fred Drakef55222d2002-10-28 17:29:01 +000031 data = data.replace(">", "&gt;")
32 data = data.replace("<", "&lt;")
33 if entities:
34 data = __dict_replace(data, entities)
35 return data
Martin v. Löwis74b51ac2002-10-26 14:50:45 +000036
37def unescape(data, entities={}):
38 """Unescape &amp;, &lt;, and &gt; in a string of data.
39
40 You can unescape other strings of data by passing a dictionary as
41 the optional entities parameter. The keys and values must all be
42 strings; each key will be replaced with its corresponding value.
43 """
Fred Drakef55222d2002-10-28 17:29:01 +000044 data = data.replace("&lt;", "<")
45 data = data.replace("&gt;", ">")
Fred Drakef55222d2002-10-28 17:29:01 +000046 if entities:
47 data = __dict_replace(data, entities)
Fred Drake407fea52002-10-28 17:46:59 +000048 # must do ampersand last
Fred Drake6d890502002-10-28 18:09:41 +000049 return data.replace("&amp;", "&")
Fred Drake45cd9de2000-06-29 19:34:54 +000050
Fred Drakeacd32d32001-07-19 16:10:15 +000051def quoteattr(data, entities={}):
52 """Escape and quote an attribute value.
53
54 Escape &, <, and > in a string of data, then quote it for use as
55 an attribute value. The \" character will be escaped as well, if
56 necessary.
57
58 You can escape other strings of data by passing a dictionary as
59 the optional entities parameter. The keys and values must all be
60 strings; each key will be replaced with its corresponding value.
61 """
62 data = escape(data, entities)
63 if '"' in data:
64 if "'" in data:
65 data = '"%s"' % data.replace('"', "&quot;")
66 else:
67 data = "'%s'" % data
68 else:
69 data = '"%s"' % data
70 return data
71
Fred Drakea12adfe2000-09-18 17:40:22 +000072
Fred Drake45cd9de2000-06-29 19:34:54 +000073class XMLGenerator(handler.ContentHandler):
74
Lars Gustäbelc5cec512000-09-21 08:25:28 +000075 def __init__(self, out=None, encoding="iso-8859-1"):
Fred Drakea12adfe2000-09-18 17:40:22 +000076 if out is None:
77 import sys
78 out = sys.stdout
Fred Drake45cd9de2000-06-29 19:34:54 +000079 handler.ContentHandler.__init__(self)
80 self._out = out
Lars Gustäbelc5cec512000-09-21 08:25:28 +000081 self._ns_contexts = [{}] # contains uri -> prefix dicts
82 self._current_context = self._ns_contexts[-1]
Lars Gustäbelfc643c32000-09-24 10:53:31 +000083 self._undeclared_ns_maps = []
Lars Gustäbelc5cec512000-09-21 08:25:28 +000084 self._encoding = encoding
Fred Drake45cd9de2000-06-29 19:34:54 +000085
86 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +000087
Fred Drake45cd9de2000-06-29 19:34:54 +000088 def startDocument(self):
Lars Gustäbelc5cec512000-09-21 08:25:28 +000089 self._out.write('<?xml version="1.0" encoding="%s"?>\n' %
90 self._encoding)
Fred Drake45cd9de2000-06-29 19:34:54 +000091
92 def startPrefixMapping(self, prefix, uri):
Lars Gustäbelc5cec512000-09-21 08:25:28 +000093 self._ns_contexts.append(self._current_context.copy())
94 self._current_context[uri] = prefix
Lars Gustäbelfc643c32000-09-24 10:53:31 +000095 self._undeclared_ns_maps.append((prefix, uri))
Fred Drake45cd9de2000-06-29 19:34:54 +000096
97 def endPrefixMapping(self, prefix):
Lars Gustäbelfc643c32000-09-24 10:53:31 +000098 self._current_context = self._ns_contexts[-1]
99 del self._ns_contexts[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +0000100
101 def startElement(self, name, attrs):
Fred Drake45cd9de2000-06-29 19:34:54 +0000102 self._out.write('<' + name)
103 for (name, value) in attrs.items():
Fred Drakedad91dd2001-08-07 19:14:46 +0000104 self._out.write(' %s=%s' % (name, quoteattr(value)))
Fred Drake45cd9de2000-06-29 19:34:54 +0000105 self._out.write('>')
Fred Drake16f63292000-10-23 18:09:50 +0000106
Fred Drake45cd9de2000-06-29 19:34:54 +0000107 def endElement(self, name):
Fred Drake45cd9de2000-06-29 19:34:54 +0000108 self._out.write('</%s>' % name)
109
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000110 def startElementNS(self, name, qname, attrs):
Martin v. Löwiscf0a1cc2000-10-03 22:35:29 +0000111 if name[0] is None:
112 # if the name was not namespace-scoped, use the unqualified part
113 name = name[1]
114 else:
115 # else try to restore the original prefix from the namespace
116 name = self._current_context[name[0]] + ":" + name[1]
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000117 self._out.write('<' + name)
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000118
119 for pair in self._undeclared_ns_maps:
120 self._out.write(' xmlns:%s="%s"' % pair)
121 self._undeclared_ns_maps = []
Fred Drake16f63292000-10-23 18:09:50 +0000122
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000123 for (name, value) in attrs.items():
124 name = self._current_context[name[0]] + ":" + name[1]
Fred Drakedad91dd2001-08-07 19:14:46 +0000125 self._out.write(' %s=%s' % (name, quoteattr(value)))
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000126 self._out.write('>')
127
128 def endElementNS(self, name, qname):
Martin v. Löwiscf0a1cc2000-10-03 22:35:29 +0000129 if name[0] is None:
130 name = name[1]
131 else:
132 name = self._current_context[name[0]] + ":" + name[1]
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000133 self._out.write('</%s>' % name)
Fred Drake16f63292000-10-23 18:09:50 +0000134
Fred Drake45cd9de2000-06-29 19:34:54 +0000135 def characters(self, content):
136 self._out.write(escape(content))
137
138 def ignorableWhitespace(self, content):
139 self._out.write(content)
Fred Drakea12adfe2000-09-18 17:40:22 +0000140
Fred Drake45cd9de2000-06-29 19:34:54 +0000141 def processingInstruction(self, target, data):
142 self._out.write('<?%s %s?>' % (target, data))
143
Fred Drakea12adfe2000-09-18 17:40:22 +0000144
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000145class XMLFilterBase(xmlreader.XMLReader):
Fred Drake45cd9de2000-06-29 19:34:54 +0000146 """This class is designed to sit between an XMLReader and the
147 client application's event handlers. By default, it does nothing
148 but pass requests up to the reader and events on to the handlers
149 unmodified, but subclasses can override specific methods to modify
150 the event stream or the configuration requests as they pass
151 through."""
152
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000153 def __init__(self, parent = None):
154 xmlreader.XMLReader.__init__(self)
155 self._parent = parent
Fred Drake16f63292000-10-23 18:09:50 +0000156
Fred Drake45cd9de2000-06-29 19:34:54 +0000157 # ErrorHandler methods
158
159 def error(self, exception):
160 self._err_handler.error(exception)
161
162 def fatalError(self, exception):
163 self._err_handler.fatalError(exception)
164
165 def warning(self, exception):
166 self._err_handler.warning(exception)
167
168 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000169
Fred Drake45cd9de2000-06-29 19:34:54 +0000170 def setDocumentLocator(self, locator):
171 self._cont_handler.setDocumentLocator(locator)
Fred Drakea12adfe2000-09-18 17:40:22 +0000172
Fred Drake45cd9de2000-06-29 19:34:54 +0000173 def startDocument(self):
174 self._cont_handler.startDocument()
175
176 def endDocument(self):
177 self._cont_handler.endDocument()
178
179 def startPrefixMapping(self, prefix, uri):
180 self._cont_handler.startPrefixMapping(prefix, uri)
181
182 def endPrefixMapping(self, prefix):
183 self._cont_handler.endPrefixMapping(prefix)
184
185 def startElement(self, name, attrs):
186 self._cont_handler.startElement(name, attrs)
187
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000188 def endElement(self, name):
189 self._cont_handler.endElement(name)
190
191 def startElementNS(self, name, qname, attrs):
192 self._cont_handler.startElement(name, attrs)
193
194 def endElementNS(self, name, qname):
195 self._cont_handler.endElementNS(name, qname)
Fred Drake45cd9de2000-06-29 19:34:54 +0000196
197 def characters(self, content):
198 self._cont_handler.characters(content)
199
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000200 def ignorableWhitespace(self, chars):
201 self._cont_handler.ignorableWhitespace(chars)
Fred Drake45cd9de2000-06-29 19:34:54 +0000202
203 def processingInstruction(self, target, data):
204 self._cont_handler.processingInstruction(target, data)
205
206 def skippedEntity(self, name):
207 self._cont_handler.skippedEntity(name)
208
209 # DTDHandler methods
210
211 def notationDecl(self, name, publicId, systemId):
212 self._dtd_handler.notationDecl(name, publicId, systemId)
213
214 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
215 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
216
217 # EntityResolver methods
218
219 def resolveEntity(self, publicId, systemId):
220 self._ent_handler.resolveEntity(publicId, systemId)
221
222 # XMLReader methods
223
224 def parse(self, source):
225 self._parent.setContentHandler(self)
226 self._parent.setErrorHandler(self)
227 self._parent.setEntityResolver(self)
228 self._parent.setDTDHandler(self)
229 self._parent.parse(source)
230
231 def setLocale(self, locale):
232 self._parent.setLocale(locale)
Fred Drakea12adfe2000-09-18 17:40:22 +0000233
Fred Drake45cd9de2000-06-29 19:34:54 +0000234 def getFeature(self, name):
235 return self._parent.getFeature(name)
236
237 def setFeature(self, name, state):
238 self._parent.setFeature(name, state)
239
240 def getProperty(self, name):
241 return self._parent.getProperty(name)
242
243 def setProperty(self, name, value):
244 self._parent.setProperty(name, value)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000245
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000246 # XMLFilter methods
247
248 def getParent(self):
249 return self._parent
250
251 def setParent(self, parent):
252 self._parent = parent
253
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000254# --- Utility functions
255
256def prepare_input_source(source, base = ""):
257 """This function takes an InputSource and an optional base URL and
258 returns a fully resolved InputSource object ready for reading."""
Fred Drake16f63292000-10-23 18:09:50 +0000259
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000260 if type(source) in _StringTypes:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000261 source = xmlreader.InputSource(source)
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000262 elif hasattr(source, "read"):
263 f = source
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000264 source = xmlreader.InputSource()
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000265 source.setByteStream(f)
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000266 if hasattr(f, "name"):
Lars Gustäbel4ced5e72000-10-24 15:53:12 +0000267 source.setSystemId(f.name)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000268
Fred Drake0872e052000-09-26 17:23:09 +0000269 if source.getByteStream() is None:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000270 sysid = source.getSystemId()
Fred Drake0872e052000-09-26 17:23:09 +0000271 if os.path.isfile(sysid):
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000272 basehead = os.path.split(os.path.normpath(base))[0]
273 source.setSystemId(os.path.join(basehead, sysid))
Fred Drake0872e052000-09-26 17:23:09 +0000274 f = open(sysid, "rb")
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000275 else:
276 source.setSystemId(urlparse.urljoin(base, sysid))
Fred Drake0872e052000-09-26 17:23:09 +0000277 f = urllib.urlopen(source.getSystemId())
Fred Drake16f63292000-10-23 18:09:50 +0000278
Fred Drake0872e052000-09-26 17:23:09 +0000279 source.setByteStream(f)
Fred Drake16f63292000-10-23 18:09:50 +0000280
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000281 return source