blob: 1fb3743802d1967df15c8d58c965d98c93829d41 [file] [log] [blame]
Fred Drakea12adfe2000-09-18 17:40:22 +00001"""\
2A library of useful helper classes to the SAX classes, for the
Fred Drake45cd9de2000-06-29 19:34:54 +00003convenience of application and driver writers.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Martin v. Löwis58af43f2000-09-24 21:31:06 +00006import os, urlparse, urllib, types
Fred Drake45cd9de2000-06-29 19:34:54 +00007import handler
Lars Gustäbelfc643c32000-09-24 10:53:31 +00008import xmlreader
Fred Drakea12adfe2000-09-18 17:40:22 +00009
Martin v. Löwis58af43f2000-09-24 21:31:06 +000010_StringTypes = [types.StringType, types.UnicodeType]
11
Fred Drakea12adfe2000-09-18 17:40:22 +000012def escape(data, entities={}):
Fred Drake45cd9de2000-06-29 19:34:54 +000013 """Escape &, <, and > in a string of data.
Fred Drakea12adfe2000-09-18 17:40:22 +000014
Fred Drake45cd9de2000-06-29 19:34:54 +000015 You can escape other strings of data by passing a dictionary as
16 the optional entities parameter. The keys and values must all be
17 strings; each key will be replaced with its corresponding value.
18 """
Fred Drakea12adfe2000-09-18 17:40:22 +000019 data = data.replace("&", "&amp;")
20 data = data.replace("<", "&lt;")
21 data = data.replace(">", "&gt;")
Fred Drake45cd9de2000-06-29 19:34:54 +000022 for chars, entity in entities.items():
Fred Drakea12adfe2000-09-18 17:40:22 +000023 data = data.replace(chars, entity)
Fred Drake45cd9de2000-06-29 19:34:54 +000024 return data
25
Fred Drakea12adfe2000-09-18 17:40:22 +000026
Fred Drake45cd9de2000-06-29 19:34:54 +000027class XMLGenerator(handler.ContentHandler):
28
Lars Gustäbelc5cec512000-09-21 08:25:28 +000029 def __init__(self, out=None, encoding="iso-8859-1"):
Fred Drakea12adfe2000-09-18 17:40:22 +000030 if out is None:
31 import sys
32 out = sys.stdout
Fred Drake45cd9de2000-06-29 19:34:54 +000033 handler.ContentHandler.__init__(self)
34 self._out = out
Lars Gustäbelc5cec512000-09-21 08:25:28 +000035 self._ns_contexts = [{}] # contains uri -> prefix dicts
36 self._current_context = self._ns_contexts[-1]
Lars Gustäbelfc643c32000-09-24 10:53:31 +000037 self._undeclared_ns_maps = []
Lars Gustäbelc5cec512000-09-21 08:25:28 +000038 self._encoding = encoding
Fred Drake45cd9de2000-06-29 19:34:54 +000039
40 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +000041
Fred Drake45cd9de2000-06-29 19:34:54 +000042 def startDocument(self):
Lars Gustäbelc5cec512000-09-21 08:25:28 +000043 self._out.write('<?xml version="1.0" encoding="%s"?>\n' %
44 self._encoding)
Fred Drake45cd9de2000-06-29 19:34:54 +000045
46 def startPrefixMapping(self, prefix, uri):
Lars Gustäbelc5cec512000-09-21 08:25:28 +000047 self._ns_contexts.append(self._current_context.copy())
48 self._current_context[uri] = prefix
Lars Gustäbelfc643c32000-09-24 10:53:31 +000049 self._undeclared_ns_maps.append((prefix, uri))
Fred Drake45cd9de2000-06-29 19:34:54 +000050
51 def endPrefixMapping(self, prefix):
Lars Gustäbelfc643c32000-09-24 10:53:31 +000052 self._current_context = self._ns_contexts[-1]
53 del self._ns_contexts[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +000054
55 def startElement(self, name, attrs):
Fred Drake45cd9de2000-06-29 19:34:54 +000056 self._out.write('<' + name)
57 for (name, value) in attrs.items():
58 self._out.write(' %s="%s"' % (name, escape(value)))
59 self._out.write('>')
Lars Gustäbelc5cec512000-09-21 08:25:28 +000060
Fred Drake45cd9de2000-06-29 19:34:54 +000061 def endElement(self, name):
Fred Drake45cd9de2000-06-29 19:34:54 +000062 self._out.write('</%s>' % name)
63
Lars Gustäbelc5cec512000-09-21 08:25:28 +000064 def startElementNS(self, name, qname, attrs):
Martin v. Löwiscf0a1cc2000-10-03 22:35:29 +000065 if name[0] is None:
66 # if the name was not namespace-scoped, use the unqualified part
67 name = name[1]
68 else:
69 # else try to restore the original prefix from the namespace
70 name = self._current_context[name[0]] + ":" + name[1]
Lars Gustäbelc5cec512000-09-21 08:25:28 +000071 self._out.write('<' + name)
Lars Gustäbelfc643c32000-09-24 10:53:31 +000072
73 for pair in self._undeclared_ns_maps:
74 self._out.write(' xmlns:%s="%s"' % pair)
75 self._undeclared_ns_maps = []
76
Lars Gustäbelc5cec512000-09-21 08:25:28 +000077 for (name, value) in attrs.items():
78 name = self._current_context[name[0]] + ":" + name[1]
79 self._out.write(' %s="%s"' % (name, escape(value)))
80 self._out.write('>')
81
82 def endElementNS(self, name, qname):
Martin v. Löwiscf0a1cc2000-10-03 22:35:29 +000083 if name[0] is None:
84 name = name[1]
85 else:
86 name = self._current_context[name[0]] + ":" + name[1]
Lars Gustäbelc5cec512000-09-21 08:25:28 +000087 self._out.write('</%s>' % name)
88
Fred Drake45cd9de2000-06-29 19:34:54 +000089 def characters(self, content):
90 self._out.write(escape(content))
91
92 def ignorableWhitespace(self, content):
93 self._out.write(content)
Fred Drakea12adfe2000-09-18 17:40:22 +000094
Fred Drake45cd9de2000-06-29 19:34:54 +000095 def processingInstruction(self, target, data):
96 self._out.write('<?%s %s?>' % (target, data))
97
Fred Drakea12adfe2000-09-18 17:40:22 +000098
Lars Gustäbelfc643c32000-09-24 10:53:31 +000099class XMLFilterBase(xmlreader.XMLReader):
Fred Drake45cd9de2000-06-29 19:34:54 +0000100 """This class is designed to sit between an XMLReader and the
101 client application's event handlers. By default, it does nothing
102 but pass requests up to the reader and events on to the handlers
103 unmodified, but subclasses can override specific methods to modify
104 the event stream or the configuration requests as they pass
105 through."""
106
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000107 def __init__(self, parent = None):
108 xmlreader.XMLReader.__init__(self)
109 self._parent = parent
110
Fred Drake45cd9de2000-06-29 19:34:54 +0000111 # ErrorHandler methods
112
113 def error(self, exception):
114 self._err_handler.error(exception)
115
116 def fatalError(self, exception):
117 self._err_handler.fatalError(exception)
118
119 def warning(self, exception):
120 self._err_handler.warning(exception)
121
122 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000123
Fred Drake45cd9de2000-06-29 19:34:54 +0000124 def setDocumentLocator(self, locator):
125 self._cont_handler.setDocumentLocator(locator)
Fred Drakea12adfe2000-09-18 17:40:22 +0000126
Fred Drake45cd9de2000-06-29 19:34:54 +0000127 def startDocument(self):
128 self._cont_handler.startDocument()
129
130 def endDocument(self):
131 self._cont_handler.endDocument()
132
133 def startPrefixMapping(self, prefix, uri):
134 self._cont_handler.startPrefixMapping(prefix, uri)
135
136 def endPrefixMapping(self, prefix):
137 self._cont_handler.endPrefixMapping(prefix)
138
139 def startElement(self, name, attrs):
140 self._cont_handler.startElement(name, attrs)
141
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000142 def endElement(self, name):
143 self._cont_handler.endElement(name)
144
145 def startElementNS(self, name, qname, attrs):
146 self._cont_handler.startElement(name, attrs)
147
148 def endElementNS(self, name, qname):
149 self._cont_handler.endElementNS(name, qname)
Fred Drake45cd9de2000-06-29 19:34:54 +0000150
151 def characters(self, content):
152 self._cont_handler.characters(content)
153
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000154 def ignorableWhitespace(self, chars):
155 self._cont_handler.ignorableWhitespace(chars)
Fred Drake45cd9de2000-06-29 19:34:54 +0000156
157 def processingInstruction(self, target, data):
158 self._cont_handler.processingInstruction(target, data)
159
160 def skippedEntity(self, name):
161 self._cont_handler.skippedEntity(name)
162
163 # DTDHandler methods
164
165 def notationDecl(self, name, publicId, systemId):
166 self._dtd_handler.notationDecl(name, publicId, systemId)
167
168 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
169 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
170
171 # EntityResolver methods
172
173 def resolveEntity(self, publicId, systemId):
174 self._ent_handler.resolveEntity(publicId, systemId)
175
176 # XMLReader methods
177
178 def parse(self, source):
179 self._parent.setContentHandler(self)
180 self._parent.setErrorHandler(self)
181 self._parent.setEntityResolver(self)
182 self._parent.setDTDHandler(self)
183 self._parent.parse(source)
184
185 def setLocale(self, locale):
186 self._parent.setLocale(locale)
Fred Drakea12adfe2000-09-18 17:40:22 +0000187
Fred Drake45cd9de2000-06-29 19:34:54 +0000188 def getFeature(self, name):
189 return self._parent.getFeature(name)
190
191 def setFeature(self, name, state):
192 self._parent.setFeature(name, state)
193
194 def getProperty(self, name):
195 return self._parent.getProperty(name)
196
197 def setProperty(self, name, value):
198 self._parent.setProperty(name, value)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000199
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000200 # XMLFilter methods
201
202 def getParent(self):
203 return self._parent
204
205 def setParent(self, parent):
206 self._parent = parent
207
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000208# --- Utility functions
209
210def prepare_input_source(source, base = ""):
211 """This function takes an InputSource and an optional base URL and
212 returns a fully resolved InputSource object ready for reading."""
213
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000214 if type(source) in _StringTypes:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000215 source = xmlreader.InputSource(source)
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000216 elif hasattr(source, "read"):
217 f = source
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000218 source = xmlreader.InputSource()
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000219 source.setByteStream(f)
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000220 if hasattr(f, "name"):
221 f.setSystemId(f.name)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000222
Fred Drake0872e052000-09-26 17:23:09 +0000223 if source.getByteStream() is None:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000224 sysid = source.getSystemId()
Fred Drake0872e052000-09-26 17:23:09 +0000225 if os.path.isfile(sysid):
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000226 basehead = os.path.split(os.path.normpath(base))[0]
227 source.setSystemId(os.path.join(basehead, sysid))
Fred Drake0872e052000-09-26 17:23:09 +0000228 f = open(sysid, "rb")
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000229 else:
230 source.setSystemId(urlparse.urljoin(base, sysid))
Fred Drake0872e052000-09-26 17:23:09 +0000231 f = urllib.urlopen(source.getSystemId())
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000232
Fred Drake0872e052000-09-26 17:23:09 +0000233 source.setByteStream(f)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000234
235 return source