blob: bf1f5f317e928cd7f9368d57d5c6c55e39f66196 [file] [log] [blame]
Fred Drakea12adfe2000-09-18 17:40:22 +00001"""\
2A library of useful helper classes to the SAX classes, for the
Fred Drake45cd9de2000-06-29 19:34:54 +00003convenience of application and driver writers.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Martin v. Löwis58af43f2000-09-24 21:31:06 +00006import os, urlparse, urllib, types
Fred Drake45cd9de2000-06-29 19:34:54 +00007import handler
Lars Gustäbelfc643c32000-09-24 10:53:31 +00008import xmlreader
Fred Drakea12adfe2000-09-18 17:40:22 +00009
Fred Drake95b4ec52000-12-16 01:45:11 +000010try:
11 _StringTypes = [types.StringType, types.UnicodeType]
12except AttributeError:
13 _StringTypes = [types.StringType]
14
Martin v. Löwis58af43f2000-09-24 21:31:06 +000015
Fred Drakea12adfe2000-09-18 17:40:22 +000016def escape(data, entities={}):
Fred Drake45cd9de2000-06-29 19:34:54 +000017 """Escape &, <, and > in a string of data.
Fred Drakea12adfe2000-09-18 17:40:22 +000018
Fred Drake16f63292000-10-23 18:09:50 +000019 You can escape other strings of data by passing a dictionary as
Fred Drake45cd9de2000-06-29 19:34:54 +000020 the optional entities parameter. The keys and values must all be
21 strings; each key will be replaced with its corresponding value.
22 """
Fred Drakea12adfe2000-09-18 17:40:22 +000023 data = data.replace("&", "&amp;")
24 data = data.replace("<", "&lt;")
25 data = data.replace(">", "&gt;")
Fred Drake45cd9de2000-06-29 19:34:54 +000026 for chars, entity in entities.items():
Fred Drake16f63292000-10-23 18:09:50 +000027 data = data.replace(chars, entity)
Fred Drake45cd9de2000-06-29 19:34:54 +000028 return data
29
Fred Drakeacd32d32001-07-19 16:10:15 +000030def quoteattr(data, entities={}):
31 """Escape and quote an attribute value.
32
33 Escape &, <, and > in a string of data, then quote it for use as
34 an attribute value. The \" character will be escaped as well, if
35 necessary.
36
37 You can escape other strings of data by passing a dictionary as
38 the optional entities parameter. The keys and values must all be
39 strings; each key will be replaced with its corresponding value.
40 """
41 data = escape(data, entities)
42 if '"' in data:
43 if "'" in data:
44 data = '"%s"' % data.replace('"', "&quot;")
45 else:
46 data = "'%s'" % data
47 else:
48 data = '"%s"' % data
49 return data
50
Fred Drakea12adfe2000-09-18 17:40:22 +000051
Fred Drake45cd9de2000-06-29 19:34:54 +000052class XMLGenerator(handler.ContentHandler):
53
Lars Gustäbelc5cec512000-09-21 08:25:28 +000054 def __init__(self, out=None, encoding="iso-8859-1"):
Fred Drakea12adfe2000-09-18 17:40:22 +000055 if out is None:
56 import sys
57 out = sys.stdout
Fred Drake45cd9de2000-06-29 19:34:54 +000058 handler.ContentHandler.__init__(self)
59 self._out = out
Lars Gustäbelc5cec512000-09-21 08:25:28 +000060 self._ns_contexts = [{}] # contains uri -> prefix dicts
61 self._current_context = self._ns_contexts[-1]
Lars Gustäbelfc643c32000-09-24 10:53:31 +000062 self._undeclared_ns_maps = []
Lars Gustäbelc5cec512000-09-21 08:25:28 +000063 self._encoding = encoding
Fred Drake45cd9de2000-06-29 19:34:54 +000064
65 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +000066
Fred Drake45cd9de2000-06-29 19:34:54 +000067 def startDocument(self):
Lars Gustäbelc5cec512000-09-21 08:25:28 +000068 self._out.write('<?xml version="1.0" encoding="%s"?>\n' %
69 self._encoding)
Fred Drake45cd9de2000-06-29 19:34:54 +000070
71 def startPrefixMapping(self, prefix, uri):
Lars Gustäbelc5cec512000-09-21 08:25:28 +000072 self._ns_contexts.append(self._current_context.copy())
73 self._current_context[uri] = prefix
Lars Gustäbelfc643c32000-09-24 10:53:31 +000074 self._undeclared_ns_maps.append((prefix, uri))
Fred Drake45cd9de2000-06-29 19:34:54 +000075
76 def endPrefixMapping(self, prefix):
Lars Gustäbelfc643c32000-09-24 10:53:31 +000077 self._current_context = self._ns_contexts[-1]
78 del self._ns_contexts[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +000079
80 def startElement(self, name, attrs):
Fred Drake45cd9de2000-06-29 19:34:54 +000081 self._out.write('<' + name)
82 for (name, value) in attrs.items():
83 self._out.write(' %s="%s"' % (name, escape(value)))
84 self._out.write('>')
Fred Drake16f63292000-10-23 18:09:50 +000085
Fred Drake45cd9de2000-06-29 19:34:54 +000086 def endElement(self, name):
Fred Drake45cd9de2000-06-29 19:34:54 +000087 self._out.write('</%s>' % name)
88
Lars Gustäbelc5cec512000-09-21 08:25:28 +000089 def startElementNS(self, name, qname, attrs):
Martin v. Löwiscf0a1cc2000-10-03 22:35:29 +000090 if name[0] is None:
91 # if the name was not namespace-scoped, use the unqualified part
92 name = name[1]
93 else:
94 # else try to restore the original prefix from the namespace
95 name = self._current_context[name[0]] + ":" + name[1]
Lars Gustäbelc5cec512000-09-21 08:25:28 +000096 self._out.write('<' + name)
Lars Gustäbelfc643c32000-09-24 10:53:31 +000097
98 for pair in self._undeclared_ns_maps:
99 self._out.write(' xmlns:%s="%s"' % pair)
100 self._undeclared_ns_maps = []
Fred Drake16f63292000-10-23 18:09:50 +0000101
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000102 for (name, value) in attrs.items():
103 name = self._current_context[name[0]] + ":" + name[1]
104 self._out.write(' %s="%s"' % (name, escape(value)))
105 self._out.write('>')
106
107 def endElementNS(self, name, qname):
Martin v. Löwiscf0a1cc2000-10-03 22:35:29 +0000108 if name[0] is None:
109 name = name[1]
110 else:
111 name = self._current_context[name[0]] + ":" + name[1]
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000112 self._out.write('</%s>' % name)
Fred Drake16f63292000-10-23 18:09:50 +0000113
Fred Drake45cd9de2000-06-29 19:34:54 +0000114 def characters(self, content):
115 self._out.write(escape(content))
116
117 def ignorableWhitespace(self, content):
118 self._out.write(content)
Fred Drakea12adfe2000-09-18 17:40:22 +0000119
Fred Drake45cd9de2000-06-29 19:34:54 +0000120 def processingInstruction(self, target, data):
121 self._out.write('<?%s %s?>' % (target, data))
122
Fred Drakea12adfe2000-09-18 17:40:22 +0000123
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000124class XMLFilterBase(xmlreader.XMLReader):
Fred Drake45cd9de2000-06-29 19:34:54 +0000125 """This class is designed to sit between an XMLReader and the
126 client application's event handlers. By default, it does nothing
127 but pass requests up to the reader and events on to the handlers
128 unmodified, but subclasses can override specific methods to modify
129 the event stream or the configuration requests as they pass
130 through."""
131
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000132 def __init__(self, parent = None):
133 xmlreader.XMLReader.__init__(self)
134 self._parent = parent
Fred Drake16f63292000-10-23 18:09:50 +0000135
Fred Drake45cd9de2000-06-29 19:34:54 +0000136 # ErrorHandler methods
137
138 def error(self, exception):
139 self._err_handler.error(exception)
140
141 def fatalError(self, exception):
142 self._err_handler.fatalError(exception)
143
144 def warning(self, exception):
145 self._err_handler.warning(exception)
146
147 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000148
Fred Drake45cd9de2000-06-29 19:34:54 +0000149 def setDocumentLocator(self, locator):
150 self._cont_handler.setDocumentLocator(locator)
Fred Drakea12adfe2000-09-18 17:40:22 +0000151
Fred Drake45cd9de2000-06-29 19:34:54 +0000152 def startDocument(self):
153 self._cont_handler.startDocument()
154
155 def endDocument(self):
156 self._cont_handler.endDocument()
157
158 def startPrefixMapping(self, prefix, uri):
159 self._cont_handler.startPrefixMapping(prefix, uri)
160
161 def endPrefixMapping(self, prefix):
162 self._cont_handler.endPrefixMapping(prefix)
163
164 def startElement(self, name, attrs):
165 self._cont_handler.startElement(name, attrs)
166
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000167 def endElement(self, name):
168 self._cont_handler.endElement(name)
169
170 def startElementNS(self, name, qname, attrs):
171 self._cont_handler.startElement(name, attrs)
172
173 def endElementNS(self, name, qname):
174 self._cont_handler.endElementNS(name, qname)
Fred Drake45cd9de2000-06-29 19:34:54 +0000175
176 def characters(self, content):
177 self._cont_handler.characters(content)
178
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000179 def ignorableWhitespace(self, chars):
180 self._cont_handler.ignorableWhitespace(chars)
Fred Drake45cd9de2000-06-29 19:34:54 +0000181
182 def processingInstruction(self, target, data):
183 self._cont_handler.processingInstruction(target, data)
184
185 def skippedEntity(self, name):
186 self._cont_handler.skippedEntity(name)
187
188 # DTDHandler methods
189
190 def notationDecl(self, name, publicId, systemId):
191 self._dtd_handler.notationDecl(name, publicId, systemId)
192
193 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
194 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
195
196 # EntityResolver methods
197
198 def resolveEntity(self, publicId, systemId):
199 self._ent_handler.resolveEntity(publicId, systemId)
200
201 # XMLReader methods
202
203 def parse(self, source):
204 self._parent.setContentHandler(self)
205 self._parent.setErrorHandler(self)
206 self._parent.setEntityResolver(self)
207 self._parent.setDTDHandler(self)
208 self._parent.parse(source)
209
210 def setLocale(self, locale):
211 self._parent.setLocale(locale)
Fred Drakea12adfe2000-09-18 17:40:22 +0000212
Fred Drake45cd9de2000-06-29 19:34:54 +0000213 def getFeature(self, name):
214 return self._parent.getFeature(name)
215
216 def setFeature(self, name, state):
217 self._parent.setFeature(name, state)
218
219 def getProperty(self, name):
220 return self._parent.getProperty(name)
221
222 def setProperty(self, name, value):
223 self._parent.setProperty(name, value)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000224
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000225 # XMLFilter methods
226
227 def getParent(self):
228 return self._parent
229
230 def setParent(self, parent):
231 self._parent = parent
232
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000233# --- Utility functions
234
235def prepare_input_source(source, base = ""):
236 """This function takes an InputSource and an optional base URL and
237 returns a fully resolved InputSource object ready for reading."""
Fred Drake16f63292000-10-23 18:09:50 +0000238
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000239 if type(source) in _StringTypes:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000240 source = xmlreader.InputSource(source)
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000241 elif hasattr(source, "read"):
242 f = source
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000243 source = xmlreader.InputSource()
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000244 source.setByteStream(f)
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000245 if hasattr(f, "name"):
Lars Gustäbel4ced5e72000-10-24 15:53:12 +0000246 source.setSystemId(f.name)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000247
Fred Drake0872e052000-09-26 17:23:09 +0000248 if source.getByteStream() is None:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000249 sysid = source.getSystemId()
Fred Drake0872e052000-09-26 17:23:09 +0000250 if os.path.isfile(sysid):
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000251 basehead = os.path.split(os.path.normpath(base))[0]
252 source.setSystemId(os.path.join(basehead, sysid))
Fred Drake0872e052000-09-26 17:23:09 +0000253 f = open(sysid, "rb")
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000254 else:
255 source.setSystemId(urlparse.urljoin(base, sysid))
Fred Drake0872e052000-09-26 17:23:09 +0000256 f = urllib.urlopen(source.getSystemId())
Fred Drake16f63292000-10-23 18:09:50 +0000257
Fred Drake0872e052000-09-26 17:23:09 +0000258 source.setByteStream(f)
Fred Drake16f63292000-10-23 18:09:50 +0000259
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000260 return source