blob: d79862470910ebf351280a3e2c59935daaab4d51 [file] [log] [blame]
Martin v. Löwis9ea67882003-01-25 15:29:56 +00001"""Implementation of the DOM Level 3 'LS-Load' feature."""
2
3import copy
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004import xml.dom
Martin v. Löwis9ea67882003-01-25 15:29:56 +00005
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006from xml.dom.NodeFilter import NodeFilter
Martin v. Löwis9ea67882003-01-25 15:29:56 +00007
8
9__all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"]
10
11
12class Options:
13 """Features object that has variables set for each DOMBuilder feature.
14
15 The DOMBuilder class uses an instance of this class to pass settings to
16 the ExpatBuilder class.
17 """
18
19 # Note that the DOMBuilder class in LoadSave constrains which of these
20 # values can be set using the DOM Level 3 LoadSave feature.
21
22 namespaces = 1
23 namespace_declarations = True
24 validation = False
25 external_parameter_entities = True
26 external_general_entities = True
27 external_dtd_subset = True
28 validate_if_schema = False
29 validate = False
30 datatype_normalization = False
31 create_entity_ref_nodes = True
32 entities = True
33 whitespace_in_element_content = True
34 cdata_sections = True
35 comments = True
36 charset_overrides_xml_encoding = True
37 infoset = False
38 supported_mediatypes_only = False
39
40 errorHandler = None
41 filter = None
42
43
44class DOMBuilder:
45 entityResolver = None
46 errorHandler = None
47 filter = None
48
49 ACTION_REPLACE = 1
50 ACTION_APPEND_AS_CHILDREN = 2
51 ACTION_INSERT_AFTER = 3
52 ACTION_INSERT_BEFORE = 4
53
54 _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN,
55 ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE)
56
57 def __init__(self):
58 self._options = Options()
59
60 def _get_entityResolver(self):
61 return self.entityResolver
62 def _set_entityResolver(self, entityResolver):
63 self.entityResolver = entityResolver
64
65 def _get_errorHandler(self):
66 return self.errorHandler
67 def _set_errorHandler(self, errorHandler):
68 self.errorHandler = errorHandler
69
70 def _get_filter(self):
71 return self.filter
72 def _set_filter(self, filter):
73 self.filter = filter
74
75 def setFeature(self, name, state):
76 if self.supportsFeature(name):
77 state = state and 1 or 0
78 try:
79 settings = self._settings[(_name_xform(name), state)]
80 except KeyError:
Thomas Wouters0e3f5912006-08-11 14:57:12 +000081 raise xml.dom.NotSupportedErr(
Walter Dörwald70a6b492004-02-12 17:35:32 +000082 "unsupported feature: %r" % (name,))
Martin v. Löwis9ea67882003-01-25 15:29:56 +000083 else:
84 for name, value in settings:
85 setattr(self._options, name, value)
86 else:
Thomas Wouters0e3f5912006-08-11 14:57:12 +000087 raise xml.dom.NotFoundErr("unknown feature: " + repr(name))
Martin v. Löwis9ea67882003-01-25 15:29:56 +000088
89 def supportsFeature(self, name):
90 return hasattr(self._options, _name_xform(name))
91
92 def canSetFeature(self, name, state):
93 key = (_name_xform(name), state and 1 or 0)
Guido van Rossum1b01e5c2006-08-19 02:45:06 +000094 return key in self._settings
Martin v. Löwis9ea67882003-01-25 15:29:56 +000095
96 # This dictionary maps from (feature,value) to a list of
97 # (option,value) pairs that should be set on the Options object.
98 # If a (feature,value) setting is not in this dictionary, it is
99 # not supported by the DOMBuilder.
100 #
101 _settings = {
102 ("namespace_declarations", 0): [
103 ("namespace_declarations", 0)],
104 ("namespace_declarations", 1): [
105 ("namespace_declarations", 1)],
106 ("validation", 0): [
107 ("validation", 0)],
108 ("external_general_entities", 0): [
109 ("external_general_entities", 0)],
110 ("external_general_entities", 1): [
111 ("external_general_entities", 1)],
112 ("external_parameter_entities", 0): [
113 ("external_parameter_entities", 0)],
114 ("external_parameter_entities", 1): [
115 ("external_parameter_entities", 1)],
116 ("validate_if_schema", 0): [
117 ("validate_if_schema", 0)],
118 ("create_entity_ref_nodes", 0): [
119 ("create_entity_ref_nodes", 0)],
120 ("create_entity_ref_nodes", 1): [
121 ("create_entity_ref_nodes", 1)],
122 ("entities", 0): [
123 ("create_entity_ref_nodes", 0),
124 ("entities", 0)],
125 ("entities", 1): [
126 ("entities", 1)],
127 ("whitespace_in_element_content", 0): [
128 ("whitespace_in_element_content", 0)],
129 ("whitespace_in_element_content", 1): [
130 ("whitespace_in_element_content", 1)],
131 ("cdata_sections", 0): [
132 ("cdata_sections", 0)],
133 ("cdata_sections", 1): [
134 ("cdata_sections", 1)],
135 ("comments", 0): [
136 ("comments", 0)],
137 ("comments", 1): [
138 ("comments", 1)],
139 ("charset_overrides_xml_encoding", 0): [
140 ("charset_overrides_xml_encoding", 0)],
141 ("charset_overrides_xml_encoding", 1): [
142 ("charset_overrides_xml_encoding", 1)],
143 ("infoset", 0): [],
144 ("infoset", 1): [
145 ("namespace_declarations", 0),
146 ("validate_if_schema", 0),
147 ("create_entity_ref_nodes", 0),
148 ("entities", 0),
149 ("cdata_sections", 0),
150 ("datatype_normalization", 1),
151 ("whitespace_in_element_content", 1),
152 ("comments", 1),
153 ("charset_overrides_xml_encoding", 1)],
154 ("supported_mediatypes_only", 0): [
155 ("supported_mediatypes_only", 0)],
156 ("namespaces", 0): [
157 ("namespaces", 0)],
158 ("namespaces", 1): [
159 ("namespaces", 1)],
160 }
161
162 def getFeature(self, name):
163 xname = _name_xform(name)
164 try:
165 return getattr(self._options, xname)
166 except AttributeError:
167 if name == "infoset":
168 options = self._options
169 return (options.datatype_normalization
170 and options.whitespace_in_element_content
171 and options.comments
172 and options.charset_overrides_xml_encoding
173 and not (options.namespace_declarations
174 or options.validate_if_schema
175 or options.create_entity_ref_nodes
176 or options.entities
177 or options.cdata_sections))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000178 raise xml.dom.NotFoundErr("feature %s not known" % repr(name))
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000179
180 def parseURI(self, uri):
181 if self.entityResolver:
182 input = self.entityResolver.resolveEntity(None, uri)
183 else:
184 input = DOMEntityResolver().resolveEntity(None, uri)
185 return self.parse(input)
186
187 def parse(self, input):
188 options = copy.copy(self._options)
189 options.filter = self.filter
190 options.errorHandler = self.errorHandler
191 fp = input.byteStream
192 if fp is None and options.systemId:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000193 import urllib.request
194 fp = urllib.request.urlopen(input.systemId)
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000195 return self._parse_bytestream(fp, options)
196
197 def parseWithContext(self, input, cnode, action):
198 if action not in self._legal_actions:
199 raise ValueError("not a legal action")
200 raise NotImplementedError("Haven't written this yet...")
201
202 def _parse_bytestream(self, stream, options):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000203 import xml.dom.expatbuilder
204 builder = xml.dom.expatbuilder.makeBuilder(options)
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000205 return builder.parseFile(stream)
206
207
208def _name_xform(name):
209 return name.lower().replace('-', '_')
210
211
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000212class DOMEntityResolver(object):
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000213 __slots__ = '_opener',
214
215 def resolveEntity(self, publicId, systemId):
216 assert systemId is not None
217 source = DOMInputSource()
218 source.publicId = publicId
219 source.systemId = systemId
220 source.byteStream = self._get_opener().open(systemId)
221
222 # determine the encoding if the transport provided it
223 source.encoding = self._guess_media_encoding(source)
224
225 # determine the base URI is we can
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000226 import posixpath, urllib.parse
227 parts = urllib.parse.urlparse(systemId)
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000228 scheme, netloc, path, params, query, fragment = parts
229 # XXX should we check the scheme here as well?
230 if path and not path.endswith("/"):
231 path = posixpath.dirname(path) + "/"
232 parts = scheme, netloc, path, params, query, fragment
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000233 source.baseURI = urllib.parse.urlunparse(parts)
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000234
235 return source
236
237 def _get_opener(self):
238 try:
239 return self._opener
240 except AttributeError:
241 self._opener = self._create_opener()
242 return self._opener
243
244 def _create_opener(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000245 import urllib.request
246 return urllib.request.build_opener()
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000247
248 def _guess_media_encoding(self, source):
249 info = source.byteStream.info()
Guido van Rossum1b01e5c2006-08-19 02:45:06 +0000250 if "Content-Type" in info:
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000251 for param in info.getplist():
252 if param.startswith("charset="):
253 return param.split("=", 1)[1].lower()
254
255
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000256class DOMInputSource(object):
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000257 __slots__ = ('byteStream', 'characterStream', 'stringData',
258 'encoding', 'publicId', 'systemId', 'baseURI')
259
260 def __init__(self):
261 self.byteStream = None
262 self.characterStream = None
263 self.stringData = None
264 self.encoding = None
265 self.publicId = None
266 self.systemId = None
267 self.baseURI = None
268
269 def _get_byteStream(self):
270 return self.byteStream
271 def _set_byteStream(self, byteStream):
272 self.byteStream = byteStream
273
274 def _get_characterStream(self):
275 return self.characterStream
276 def _set_characterStream(self, characterStream):
277 self.characterStream = characterStream
278
279 def _get_stringData(self):
280 return self.stringData
281 def _set_stringData(self, data):
282 self.stringData = data
283
284 def _get_encoding(self):
285 return self.encoding
286 def _set_encoding(self, encoding):
287 self.encoding = encoding
288
289 def _get_publicId(self):
290 return self.publicId
291 def _set_publicId(self, publicId):
292 self.publicId = publicId
293
294 def _get_systemId(self):
295 return self.systemId
296 def _set_systemId(self, systemId):
297 self.systemId = systemId
298
299 def _get_baseURI(self):
300 return self.baseURI
301 def _set_baseURI(self, uri):
302 self.baseURI = uri
303
304
305class DOMBuilderFilter:
306 """Element filter which can be used to tailor construction of
307 a DOM instance.
308 """
309
310 # There's really no need for this class; concrete implementations
311 # should just implement the endElement() and startElement()
312 # methods as appropriate. Using this makes it easy to only
313 # implement one of them.
314
315 FILTER_ACCEPT = 1
316 FILTER_REJECT = 2
317 FILTER_SKIP = 3
318 FILTER_INTERRUPT = 4
319
320 whatToShow = NodeFilter.SHOW_ALL
321
322 def _get_whatToShow(self):
323 return self.whatToShow
324
325 def acceptNode(self, element):
326 return self.FILTER_ACCEPT
327
328 def startContainer(self, element):
329 return self.FILTER_ACCEPT
330
331del NodeFilter
332
333
334class DocumentLS:
335 """Mixin to create documents that conform to the load/save spec."""
336
337 async = False
338
339 def _get_async(self):
340 return False
341 def _set_async(self, async):
342 if async:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000343 raise xml.dom.NotSupportedErr(
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000344 "asynchronous document loading is not supported")
345
346 def abort(self):
347 # What does it mean to "clear" a document? Does the
348 # documentElement disappear?
349 raise NotImplementedError(
350 "haven't figured out what this means yet")
351
352 def load(self, uri):
353 raise NotImplementedError("haven't written this yet")
354
355 def loadXML(self, source):
356 raise NotImplementedError("haven't written this yet")
357
358 def saveXML(self, snode):
359 if snode is None:
360 snode = self
361 elif snode.ownerDocument is not self:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000362 raise xml.dom.WrongDocumentErr()
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000363 return snode.toxml()
364
365
366class DOMImplementationLS:
367 MODE_SYNCHRONOUS = 1
368 MODE_ASYNCHRONOUS = 2
369
370 def createDOMBuilder(self, mode, schemaType):
371 if schemaType is not None:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000372 raise xml.dom.NotSupportedErr(
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000373 "schemaType not yet supported")
374 if mode == self.MODE_SYNCHRONOUS:
375 return DOMBuilder()
376 if mode == self.MODE_ASYNCHRONOUS:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000377 raise xml.dom.NotSupportedErr(
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000378 "asynchronous builders are not supported")
379 raise ValueError("unknown value for mode")
380
381 def createDOMWriter(self):
382 raise NotImplementedError(
383 "the writer interface hasn't been written yet!")
384
385 def createDOMInputSource(self):
386 return DOMInputSource()