blob: e592f2a1d9020967ec45b60228cc90f4027540a2 [file] [log] [blame]
Fred Drakea12adfe2000-09-18 17:40:22 +00001"""\
2A library of useful helper classes to the SAX classes, for the
Fred Drake45cd9de2000-06-29 19:34:54 +00003convenience of application and driver writers.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Martin v. Löwis58af43f2000-09-24 21:31:06 +00006import os, urlparse, urllib, types
Fred Drake45cd9de2000-06-29 19:34:54 +00007import handler
Lars Gustäbelfc643c32000-09-24 10:53:31 +00008import xmlreader
Fred Drakea12adfe2000-09-18 17:40:22 +00009
Fred Drake95b4ec52000-12-16 01:45:11 +000010try:
11 _StringTypes = [types.StringType, types.UnicodeType]
12except AttributeError:
13 _StringTypes = [types.StringType]
14
Martin v. Löwis58af43f2000-09-24 21:31:06 +000015
Fred Drakea12adfe2000-09-18 17:40:22 +000016def escape(data, entities={}):
Fred Drake45cd9de2000-06-29 19:34:54 +000017 """Escape &, <, and > in a string of data.
Fred Drakea12adfe2000-09-18 17:40:22 +000018
Fred Drake16f63292000-10-23 18:09:50 +000019 You can escape other strings of data by passing a dictionary as
Fred Drake45cd9de2000-06-29 19:34:54 +000020 the optional entities parameter. The keys and values must all be
21 strings; each key will be replaced with its corresponding value.
22 """
Fred Drakea12adfe2000-09-18 17:40:22 +000023 data = data.replace("&", "&amp;")
24 data = data.replace("<", "&lt;")
25 data = data.replace(">", "&gt;")
Fred Drake45cd9de2000-06-29 19:34:54 +000026 for chars, entity in entities.items():
Fred Drake16f63292000-10-23 18:09:50 +000027 data = data.replace(chars, entity)
Fred Drake45cd9de2000-06-29 19:34:54 +000028 return data
29
Fred Drakea12adfe2000-09-18 17:40:22 +000030
Fred Drake45cd9de2000-06-29 19:34:54 +000031class XMLGenerator(handler.ContentHandler):
32
Lars Gustäbelc5cec512000-09-21 08:25:28 +000033 def __init__(self, out=None, encoding="iso-8859-1"):
Fred Drakea12adfe2000-09-18 17:40:22 +000034 if out is None:
35 import sys
36 out = sys.stdout
Fred Drake45cd9de2000-06-29 19:34:54 +000037 handler.ContentHandler.__init__(self)
38 self._out = out
Lars Gustäbelc5cec512000-09-21 08:25:28 +000039 self._ns_contexts = [{}] # contains uri -> prefix dicts
40 self._current_context = self._ns_contexts[-1]
Lars Gustäbelfc643c32000-09-24 10:53:31 +000041 self._undeclared_ns_maps = []
Lars Gustäbelc5cec512000-09-21 08:25:28 +000042 self._encoding = encoding
Fred Drake45cd9de2000-06-29 19:34:54 +000043
44 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +000045
Fred Drake45cd9de2000-06-29 19:34:54 +000046 def startDocument(self):
Lars Gustäbelc5cec512000-09-21 08:25:28 +000047 self._out.write('<?xml version="1.0" encoding="%s"?>\n' %
48 self._encoding)
Fred Drake45cd9de2000-06-29 19:34:54 +000049
50 def startPrefixMapping(self, prefix, uri):
Lars Gustäbelc5cec512000-09-21 08:25:28 +000051 self._ns_contexts.append(self._current_context.copy())
52 self._current_context[uri] = prefix
Lars Gustäbelfc643c32000-09-24 10:53:31 +000053 self._undeclared_ns_maps.append((prefix, uri))
Fred Drake45cd9de2000-06-29 19:34:54 +000054
55 def endPrefixMapping(self, prefix):
Lars Gustäbelfc643c32000-09-24 10:53:31 +000056 self._current_context = self._ns_contexts[-1]
57 del self._ns_contexts[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +000058
59 def startElement(self, name, attrs):
Fred Drake45cd9de2000-06-29 19:34:54 +000060 self._out.write('<' + name)
61 for (name, value) in attrs.items():
62 self._out.write(' %s="%s"' % (name, escape(value)))
63 self._out.write('>')
Fred Drake16f63292000-10-23 18:09:50 +000064
Fred Drake45cd9de2000-06-29 19:34:54 +000065 def endElement(self, name):
Fred Drake45cd9de2000-06-29 19:34:54 +000066 self._out.write('</%s>' % name)
67
Lars Gustäbelc5cec512000-09-21 08:25:28 +000068 def startElementNS(self, name, qname, attrs):
Martin v. Löwiscf0a1cc2000-10-03 22:35:29 +000069 if name[0] is None:
70 # if the name was not namespace-scoped, use the unqualified part
71 name = name[1]
72 else:
73 # else try to restore the original prefix from the namespace
74 name = self._current_context[name[0]] + ":" + name[1]
Lars Gustäbelc5cec512000-09-21 08:25:28 +000075 self._out.write('<' + name)
Lars Gustäbelfc643c32000-09-24 10:53:31 +000076
77 for pair in self._undeclared_ns_maps:
78 self._out.write(' xmlns:%s="%s"' % pair)
79 self._undeclared_ns_maps = []
Fred Drake16f63292000-10-23 18:09:50 +000080
Lars Gustäbelc5cec512000-09-21 08:25:28 +000081 for (name, value) in attrs.items():
82 name = self._current_context[name[0]] + ":" + name[1]
83 self._out.write(' %s="%s"' % (name, escape(value)))
84 self._out.write('>')
85
86 def endElementNS(self, name, qname):
Martin v. Löwiscf0a1cc2000-10-03 22:35:29 +000087 if name[0] is None:
88 name = name[1]
89 else:
90 name = self._current_context[name[0]] + ":" + name[1]
Lars Gustäbelc5cec512000-09-21 08:25:28 +000091 self._out.write('</%s>' % name)
Fred Drake16f63292000-10-23 18:09:50 +000092
Fred Drake45cd9de2000-06-29 19:34:54 +000093 def characters(self, content):
94 self._out.write(escape(content))
95
96 def ignorableWhitespace(self, content):
97 self._out.write(content)
Fred Drakea12adfe2000-09-18 17:40:22 +000098
Fred Drake45cd9de2000-06-29 19:34:54 +000099 def processingInstruction(self, target, data):
100 self._out.write('<?%s %s?>' % (target, data))
101
Fred Drakea12adfe2000-09-18 17:40:22 +0000102
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000103class XMLFilterBase(xmlreader.XMLReader):
Fred Drake45cd9de2000-06-29 19:34:54 +0000104 """This class is designed to sit between an XMLReader and the
105 client application's event handlers. By default, it does nothing
106 but pass requests up to the reader and events on to the handlers
107 unmodified, but subclasses can override specific methods to modify
108 the event stream or the configuration requests as they pass
109 through."""
110
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000111 def __init__(self, parent = None):
112 xmlreader.XMLReader.__init__(self)
113 self._parent = parent
Fred Drake16f63292000-10-23 18:09:50 +0000114
Fred Drake45cd9de2000-06-29 19:34:54 +0000115 # ErrorHandler methods
116
117 def error(self, exception):
118 self._err_handler.error(exception)
119
120 def fatalError(self, exception):
121 self._err_handler.fatalError(exception)
122
123 def warning(self, exception):
124 self._err_handler.warning(exception)
125
126 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000127
Fred Drake45cd9de2000-06-29 19:34:54 +0000128 def setDocumentLocator(self, locator):
129 self._cont_handler.setDocumentLocator(locator)
Fred Drakea12adfe2000-09-18 17:40:22 +0000130
Fred Drake45cd9de2000-06-29 19:34:54 +0000131 def startDocument(self):
132 self._cont_handler.startDocument()
133
134 def endDocument(self):
135 self._cont_handler.endDocument()
136
137 def startPrefixMapping(self, prefix, uri):
138 self._cont_handler.startPrefixMapping(prefix, uri)
139
140 def endPrefixMapping(self, prefix):
141 self._cont_handler.endPrefixMapping(prefix)
142
143 def startElement(self, name, attrs):
144 self._cont_handler.startElement(name, attrs)
145
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000146 def endElement(self, name):
147 self._cont_handler.endElement(name)
148
149 def startElementNS(self, name, qname, attrs):
150 self._cont_handler.startElement(name, attrs)
151
152 def endElementNS(self, name, qname):
153 self._cont_handler.endElementNS(name, qname)
Fred Drake45cd9de2000-06-29 19:34:54 +0000154
155 def characters(self, content):
156 self._cont_handler.characters(content)
157
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000158 def ignorableWhitespace(self, chars):
159 self._cont_handler.ignorableWhitespace(chars)
Fred Drake45cd9de2000-06-29 19:34:54 +0000160
161 def processingInstruction(self, target, data):
162 self._cont_handler.processingInstruction(target, data)
163
164 def skippedEntity(self, name):
165 self._cont_handler.skippedEntity(name)
166
167 # DTDHandler methods
168
169 def notationDecl(self, name, publicId, systemId):
170 self._dtd_handler.notationDecl(name, publicId, systemId)
171
172 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
173 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
174
175 # EntityResolver methods
176
177 def resolveEntity(self, publicId, systemId):
178 self._ent_handler.resolveEntity(publicId, systemId)
179
180 # XMLReader methods
181
182 def parse(self, source):
183 self._parent.setContentHandler(self)
184 self._parent.setErrorHandler(self)
185 self._parent.setEntityResolver(self)
186 self._parent.setDTDHandler(self)
187 self._parent.parse(source)
188
189 def setLocale(self, locale):
190 self._parent.setLocale(locale)
Fred Drakea12adfe2000-09-18 17:40:22 +0000191
Fred Drake45cd9de2000-06-29 19:34:54 +0000192 def getFeature(self, name):
193 return self._parent.getFeature(name)
194
195 def setFeature(self, name, state):
196 self._parent.setFeature(name, state)
197
198 def getProperty(self, name):
199 return self._parent.getProperty(name)
200
201 def setProperty(self, name, value):
202 self._parent.setProperty(name, value)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000203
Lars Gustäbelbc1b5c82000-10-11 22:35:00 +0000204 # XMLFilter methods
205
206 def getParent(self):
207 return self._parent
208
209 def setParent(self, parent):
210 self._parent = parent
211
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000212# --- Utility functions
213
214def prepare_input_source(source, base = ""):
215 """This function takes an InputSource and an optional base URL and
216 returns a fully resolved InputSource object ready for reading."""
Fred Drake16f63292000-10-23 18:09:50 +0000217
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000218 if type(source) in _StringTypes:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000219 source = xmlreader.InputSource(source)
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000220 elif hasattr(source, "read"):
221 f = source
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000222 source = xmlreader.InputSource()
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000223 source.setByteStream(f)
Martin v. Löwis5fece7f2000-10-06 21:11:20 +0000224 if hasattr(f, "name"):
Lars Gustäbel4ced5e72000-10-24 15:53:12 +0000225 source.setSystemId(f.name)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000226
Fred Drake0872e052000-09-26 17:23:09 +0000227 if source.getByteStream() is None:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000228 sysid = source.getSystemId()
Fred Drake0872e052000-09-26 17:23:09 +0000229 if os.path.isfile(sysid):
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000230 basehead = os.path.split(os.path.normpath(base))[0]
231 source.setSystemId(os.path.join(basehead, sysid))
Fred Drake0872e052000-09-26 17:23:09 +0000232 f = open(sysid, "rb")
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000233 else:
234 source.setSystemId(urlparse.urljoin(base, sysid))
Fred Drake0872e052000-09-26 17:23:09 +0000235 f = urllib.urlopen(source.getSystemId())
Fred Drake16f63292000-10-23 18:09:50 +0000236
Fred Drake0872e052000-09-26 17:23:09 +0000237 source.setByteStream(f)
Fred Drake16f63292000-10-23 18:09:50 +0000238
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000239 return source