blob: 892f34dc1cb1db4494c13b61104922da62438096 [file] [log] [blame]
Fred Drakea12adfe2000-09-18 17:40:22 +00001"""\
2A library of useful helper classes to the SAX classes, for the
Fred Drake45cd9de2000-06-29 19:34:54 +00003convenience of application and driver writers.
Fred Drake45cd9de2000-06-29 19:34:54 +00004"""
5
Martin v. Löwis58af43f2000-09-24 21:31:06 +00006import os, urlparse, urllib, types
Fred Drake45cd9de2000-06-29 19:34:54 +00007import handler
Lars Gustäbelfc643c32000-09-24 10:53:31 +00008import xmlreader
Fred Drakea12adfe2000-09-18 17:40:22 +00009
Martin v. Löwis58af43f2000-09-24 21:31:06 +000010_StringTypes = [types.StringType, types.UnicodeType]
11
Fred Drakea12adfe2000-09-18 17:40:22 +000012def escape(data, entities={}):
Fred Drake45cd9de2000-06-29 19:34:54 +000013 """Escape &, <, and > in a string of data.
Fred Drakea12adfe2000-09-18 17:40:22 +000014
Fred Drake45cd9de2000-06-29 19:34:54 +000015 You can escape other strings of data by passing a dictionary as
16 the optional entities parameter. The keys and values must all be
17 strings; each key will be replaced with its corresponding value.
18 """
Fred Drakea12adfe2000-09-18 17:40:22 +000019 data = data.replace("&", "&amp;")
20 data = data.replace("<", "&lt;")
21 data = data.replace(">", "&gt;")
Fred Drake45cd9de2000-06-29 19:34:54 +000022 for chars, entity in entities.items():
Fred Drakea12adfe2000-09-18 17:40:22 +000023 data = data.replace(chars, entity)
Fred Drake45cd9de2000-06-29 19:34:54 +000024 return data
25
Fred Drakea12adfe2000-09-18 17:40:22 +000026
Fred Drake45cd9de2000-06-29 19:34:54 +000027class XMLGenerator(handler.ContentHandler):
28
Lars Gustäbelc5cec512000-09-21 08:25:28 +000029 def __init__(self, out=None, encoding="iso-8859-1"):
Fred Drakea12adfe2000-09-18 17:40:22 +000030 if out is None:
31 import sys
32 out = sys.stdout
Fred Drake45cd9de2000-06-29 19:34:54 +000033 handler.ContentHandler.__init__(self)
34 self._out = out
Lars Gustäbelc5cec512000-09-21 08:25:28 +000035 self._ns_contexts = [{}] # contains uri -> prefix dicts
36 self._current_context = self._ns_contexts[-1]
Lars Gustäbelfc643c32000-09-24 10:53:31 +000037 self._undeclared_ns_maps = []
Lars Gustäbelc5cec512000-09-21 08:25:28 +000038 self._encoding = encoding
Fred Drake45cd9de2000-06-29 19:34:54 +000039
40 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +000041
Fred Drake45cd9de2000-06-29 19:34:54 +000042 def startDocument(self):
Lars Gustäbelc5cec512000-09-21 08:25:28 +000043 self._out.write('<?xml version="1.0" encoding="%s"?>\n' %
44 self._encoding)
Fred Drake45cd9de2000-06-29 19:34:54 +000045
46 def startPrefixMapping(self, prefix, uri):
Lars Gustäbelc5cec512000-09-21 08:25:28 +000047 self._ns_contexts.append(self._current_context.copy())
48 self._current_context[uri] = prefix
Lars Gustäbelfc643c32000-09-24 10:53:31 +000049 self._undeclared_ns_maps.append((prefix, uri))
Fred Drake45cd9de2000-06-29 19:34:54 +000050
51 def endPrefixMapping(self, prefix):
Lars Gustäbelfc643c32000-09-24 10:53:31 +000052 self._current_context = self._ns_contexts[-1]
53 del self._ns_contexts[-1]
Fred Drake45cd9de2000-06-29 19:34:54 +000054
55 def startElement(self, name, attrs):
Fred Drake45cd9de2000-06-29 19:34:54 +000056 self._out.write('<' + name)
57 for (name, value) in attrs.items():
58 self._out.write(' %s="%s"' % (name, escape(value)))
59 self._out.write('>')
Lars Gustäbelc5cec512000-09-21 08:25:28 +000060
Fred Drake45cd9de2000-06-29 19:34:54 +000061 def endElement(self, name):
Fred Drake45cd9de2000-06-29 19:34:54 +000062 self._out.write('</%s>' % name)
63
Lars Gustäbelc5cec512000-09-21 08:25:28 +000064 def startElementNS(self, name, qname, attrs):
Martin v. Löwiscf0a1cc2000-10-03 22:35:29 +000065 if name[0] is None:
66 # if the name was not namespace-scoped, use the unqualified part
67 name = name[1]
68 else:
69 # else try to restore the original prefix from the namespace
70 name = self._current_context[name[0]] + ":" + name[1]
Lars Gustäbelc5cec512000-09-21 08:25:28 +000071 self._out.write('<' + name)
Lars Gustäbelfc643c32000-09-24 10:53:31 +000072
73 for pair in self._undeclared_ns_maps:
74 self._out.write(' xmlns:%s="%s"' % pair)
75 self._undeclared_ns_maps = []
76
Lars Gustäbelc5cec512000-09-21 08:25:28 +000077 for (name, value) in attrs.items():
78 name = self._current_context[name[0]] + ":" + name[1]
79 self._out.write(' %s="%s"' % (name, escape(value)))
80 self._out.write('>')
81
82 def endElementNS(self, name, qname):
Martin v. Löwiscf0a1cc2000-10-03 22:35:29 +000083 if name[0] is None:
84 name = name[1]
85 else:
86 name = self._current_context[name[0]] + ":" + name[1]
Lars Gustäbelc5cec512000-09-21 08:25:28 +000087 self._out.write('</%s>' % name)
88
Fred Drake45cd9de2000-06-29 19:34:54 +000089 def characters(self, content):
90 self._out.write(escape(content))
91
92 def ignorableWhitespace(self, content):
93 self._out.write(content)
Fred Drakea12adfe2000-09-18 17:40:22 +000094
Fred Drake45cd9de2000-06-29 19:34:54 +000095 def processingInstruction(self, target, data):
96 self._out.write('<?%s %s?>' % (target, data))
97
Fred Drakea12adfe2000-09-18 17:40:22 +000098
Lars Gustäbelfc643c32000-09-24 10:53:31 +000099class XMLFilterBase(xmlreader.XMLReader):
Fred Drake45cd9de2000-06-29 19:34:54 +0000100 """This class is designed to sit between an XMLReader and the
101 client application's event handlers. By default, it does nothing
102 but pass requests up to the reader and events on to the handlers
103 unmodified, but subclasses can override specific methods to modify
104 the event stream or the configuration requests as they pass
105 through."""
106
107 # ErrorHandler methods
108
109 def error(self, exception):
110 self._err_handler.error(exception)
111
112 def fatalError(self, exception):
113 self._err_handler.fatalError(exception)
114
115 def warning(self, exception):
116 self._err_handler.warning(exception)
117
118 # ContentHandler methods
Fred Drakea12adfe2000-09-18 17:40:22 +0000119
Fred Drake45cd9de2000-06-29 19:34:54 +0000120 def setDocumentLocator(self, locator):
121 self._cont_handler.setDocumentLocator(locator)
Fred Drakea12adfe2000-09-18 17:40:22 +0000122
Fred Drake45cd9de2000-06-29 19:34:54 +0000123 def startDocument(self):
124 self._cont_handler.startDocument()
125
126 def endDocument(self):
127 self._cont_handler.endDocument()
128
129 def startPrefixMapping(self, prefix, uri):
130 self._cont_handler.startPrefixMapping(prefix, uri)
131
132 def endPrefixMapping(self, prefix):
133 self._cont_handler.endPrefixMapping(prefix)
134
135 def startElement(self, name, attrs):
136 self._cont_handler.startElement(name, attrs)
137
Lars Gustäbelc5cec512000-09-21 08:25:28 +0000138 def endElement(self, name):
139 self._cont_handler.endElement(name)
140
141 def startElementNS(self, name, qname, attrs):
142 self._cont_handler.startElement(name, attrs)
143
144 def endElementNS(self, name, qname):
145 self._cont_handler.endElementNS(name, qname)
Fred Drake45cd9de2000-06-29 19:34:54 +0000146
147 def characters(self, content):
148 self._cont_handler.characters(content)
149
Lars Gustäbelfc643c32000-09-24 10:53:31 +0000150 def ignorableWhitespace(self, chars):
151 self._cont_handler.ignorableWhitespace(chars)
Fred Drake45cd9de2000-06-29 19:34:54 +0000152
153 def processingInstruction(self, target, data):
154 self._cont_handler.processingInstruction(target, data)
155
156 def skippedEntity(self, name):
157 self._cont_handler.skippedEntity(name)
158
159 # DTDHandler methods
160
161 def notationDecl(self, name, publicId, systemId):
162 self._dtd_handler.notationDecl(name, publicId, systemId)
163
164 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
165 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
166
167 # EntityResolver methods
168
169 def resolveEntity(self, publicId, systemId):
170 self._ent_handler.resolveEntity(publicId, systemId)
171
172 # XMLReader methods
173
174 def parse(self, source):
175 self._parent.setContentHandler(self)
176 self._parent.setErrorHandler(self)
177 self._parent.setEntityResolver(self)
178 self._parent.setDTDHandler(self)
179 self._parent.parse(source)
180
181 def setLocale(self, locale):
182 self._parent.setLocale(locale)
Fred Drakea12adfe2000-09-18 17:40:22 +0000183
Fred Drake45cd9de2000-06-29 19:34:54 +0000184 def getFeature(self, name):
185 return self._parent.getFeature(name)
186
187 def setFeature(self, name, state):
188 self._parent.setFeature(name, state)
189
190 def getProperty(self, name):
191 return self._parent.getProperty(name)
192
193 def setProperty(self, name, value):
194 self._parent.setProperty(name, value)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000195
196# --- Utility functions
197
198def prepare_input_source(source, base = ""):
199 """This function takes an InputSource and an optional base URL and
200 returns a fully resolved InputSource object ready for reading."""
201
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000202 if type(source) in _StringTypes:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000203 source = xmlreader.InputSource(source)
Martin v. Löwis58af43f2000-09-24 21:31:06 +0000204 elif hasattr(source, "read"):
205 f = source
206 source = xmlreader.InputSource(source)
207 source.setByteStream(f)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000208
Fred Drake0872e052000-09-26 17:23:09 +0000209 if source.getByteStream() is None:
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000210 sysid = source.getSystemId()
Fred Drake0872e052000-09-26 17:23:09 +0000211 if os.path.isfile(sysid):
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000212 basehead = os.path.split(os.path.normpath(base))[0]
213 source.setSystemId(os.path.join(basehead, sysid))
Fred Drake0872e052000-09-26 17:23:09 +0000214 f = open(sysid, "rb")
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000215 else:
216 source.setSystemId(urlparse.urljoin(base, sysid))
Fred Drake0872e052000-09-26 17:23:09 +0000217 f = urllib.urlopen(source.getSystemId())
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000218
Fred Drake0872e052000-09-26 17:23:09 +0000219 source.setByteStream(f)
Lars Gustäbel523b0a62000-09-24 18:54:49 +0000220
221 return source