blob: 444f0b2a57d3df183c3c19153be6173fa0436721 [file] [log] [blame]
Martin v. Löwis9ea67882003-01-25 15:29:56 +00001"""Implementation of the DOM Level 3 'LS-Load' feature."""
2
3import copy
Yury Selivanov75445082015-05-11 22:57:16 -04004import warnings
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005import xml.dom
Martin v. Löwis9ea67882003-01-25 15:29:56 +00006
Thomas Wouters0e3f5912006-08-11 14:57:12 +00007from xml.dom.NodeFilter import NodeFilter
Martin v. Löwis9ea67882003-01-25 15:29:56 +00008
9
10__all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"]
11
12
13class Options:
14 """Features object that has variables set for each DOMBuilder feature.
15
16 The DOMBuilder class uses an instance of this class to pass settings to
17 the ExpatBuilder class.
18 """
19
20 # Note that the DOMBuilder class in LoadSave constrains which of these
21 # values can be set using the DOM Level 3 LoadSave feature.
22
23 namespaces = 1
24 namespace_declarations = True
25 validation = False
26 external_parameter_entities = True
27 external_general_entities = True
28 external_dtd_subset = True
29 validate_if_schema = False
30 validate = False
31 datatype_normalization = False
32 create_entity_ref_nodes = True
33 entities = True
34 whitespace_in_element_content = True
35 cdata_sections = True
36 comments = True
37 charset_overrides_xml_encoding = True
38 infoset = False
39 supported_mediatypes_only = False
40
41 errorHandler = None
42 filter = None
43
44
45class DOMBuilder:
46 entityResolver = None
47 errorHandler = None
48 filter = None
49
50 ACTION_REPLACE = 1
51 ACTION_APPEND_AS_CHILDREN = 2
52 ACTION_INSERT_AFTER = 3
53 ACTION_INSERT_BEFORE = 4
54
55 _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN,
56 ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE)
57
58 def __init__(self):
59 self._options = Options()
60
61 def _get_entityResolver(self):
62 return self.entityResolver
63 def _set_entityResolver(self, entityResolver):
64 self.entityResolver = entityResolver
65
66 def _get_errorHandler(self):
67 return self.errorHandler
68 def _set_errorHandler(self, errorHandler):
69 self.errorHandler = errorHandler
70
71 def _get_filter(self):
72 return self.filter
73 def _set_filter(self, filter):
74 self.filter = filter
75
76 def setFeature(self, name, state):
77 if self.supportsFeature(name):
78 state = state and 1 or 0
79 try:
80 settings = self._settings[(_name_xform(name), state)]
81 except KeyError:
Thomas Wouters0e3f5912006-08-11 14:57:12 +000082 raise xml.dom.NotSupportedErr(
Walter Dörwald70a6b492004-02-12 17:35:32 +000083 "unsupported feature: %r" % (name,))
Martin v. Löwis9ea67882003-01-25 15:29:56 +000084 else:
85 for name, value in settings:
86 setattr(self._options, name, value)
87 else:
Thomas Wouters0e3f5912006-08-11 14:57:12 +000088 raise xml.dom.NotFoundErr("unknown feature: " + repr(name))
Martin v. Löwis9ea67882003-01-25 15:29:56 +000089
90 def supportsFeature(self, name):
91 return hasattr(self._options, _name_xform(name))
92
93 def canSetFeature(self, name, state):
94 key = (_name_xform(name), state and 1 or 0)
Guido van Rossum1b01e5c2006-08-19 02:45:06 +000095 return key in self._settings
Martin v. Löwis9ea67882003-01-25 15:29:56 +000096
97 # This dictionary maps from (feature,value) to a list of
98 # (option,value) pairs that should be set on the Options object.
99 # If a (feature,value) setting is not in this dictionary, it is
100 # not supported by the DOMBuilder.
101 #
102 _settings = {
103 ("namespace_declarations", 0): [
104 ("namespace_declarations", 0)],
105 ("namespace_declarations", 1): [
106 ("namespace_declarations", 1)],
107 ("validation", 0): [
108 ("validation", 0)],
109 ("external_general_entities", 0): [
110 ("external_general_entities", 0)],
111 ("external_general_entities", 1): [
112 ("external_general_entities", 1)],
113 ("external_parameter_entities", 0): [
114 ("external_parameter_entities", 0)],
115 ("external_parameter_entities", 1): [
116 ("external_parameter_entities", 1)],
117 ("validate_if_schema", 0): [
118 ("validate_if_schema", 0)],
119 ("create_entity_ref_nodes", 0): [
120 ("create_entity_ref_nodes", 0)],
121 ("create_entity_ref_nodes", 1): [
122 ("create_entity_ref_nodes", 1)],
123 ("entities", 0): [
124 ("create_entity_ref_nodes", 0),
125 ("entities", 0)],
126 ("entities", 1): [
127 ("entities", 1)],
128 ("whitespace_in_element_content", 0): [
129 ("whitespace_in_element_content", 0)],
130 ("whitespace_in_element_content", 1): [
131 ("whitespace_in_element_content", 1)],
132 ("cdata_sections", 0): [
133 ("cdata_sections", 0)],
134 ("cdata_sections", 1): [
135 ("cdata_sections", 1)],
136 ("comments", 0): [
137 ("comments", 0)],
138 ("comments", 1): [
139 ("comments", 1)],
140 ("charset_overrides_xml_encoding", 0): [
141 ("charset_overrides_xml_encoding", 0)],
142 ("charset_overrides_xml_encoding", 1): [
143 ("charset_overrides_xml_encoding", 1)],
144 ("infoset", 0): [],
145 ("infoset", 1): [
146 ("namespace_declarations", 0),
147 ("validate_if_schema", 0),
148 ("create_entity_ref_nodes", 0),
149 ("entities", 0),
150 ("cdata_sections", 0),
151 ("datatype_normalization", 1),
152 ("whitespace_in_element_content", 1),
153 ("comments", 1),
154 ("charset_overrides_xml_encoding", 1)],
155 ("supported_mediatypes_only", 0): [
156 ("supported_mediatypes_only", 0)],
157 ("namespaces", 0): [
158 ("namespaces", 0)],
159 ("namespaces", 1): [
160 ("namespaces", 1)],
161 }
162
163 def getFeature(self, name):
164 xname = _name_xform(name)
165 try:
166 return getattr(self._options, xname)
167 except AttributeError:
168 if name == "infoset":
169 options = self._options
170 return (options.datatype_normalization
171 and options.whitespace_in_element_content
172 and options.comments
173 and options.charset_overrides_xml_encoding
174 and not (options.namespace_declarations
175 or options.validate_if_schema
176 or options.create_entity_ref_nodes
177 or options.entities
178 or options.cdata_sections))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000179 raise xml.dom.NotFoundErr("feature %s not known" % repr(name))
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000180
181 def parseURI(self, uri):
182 if self.entityResolver:
183 input = self.entityResolver.resolveEntity(None, uri)
184 else:
185 input = DOMEntityResolver().resolveEntity(None, uri)
186 return self.parse(input)
187
188 def parse(self, input):
189 options = copy.copy(self._options)
190 options.filter = self.filter
191 options.errorHandler = self.errorHandler
192 fp = input.byteStream
193 if fp is None and options.systemId:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000194 import urllib.request
195 fp = urllib.request.urlopen(input.systemId)
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000196 return self._parse_bytestream(fp, options)
197
198 def parseWithContext(self, input, cnode, action):
199 if action not in self._legal_actions:
200 raise ValueError("not a legal action")
201 raise NotImplementedError("Haven't written this yet...")
202
203 def _parse_bytestream(self, stream, options):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000204 import xml.dom.expatbuilder
205 builder = xml.dom.expatbuilder.makeBuilder(options)
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000206 return builder.parseFile(stream)
207
208
209def _name_xform(name):
210 return name.lower().replace('-', '_')
211
212
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000213class DOMEntityResolver(object):
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000214 __slots__ = '_opener',
215
216 def resolveEntity(self, publicId, systemId):
217 assert systemId is not None
218 source = DOMInputSource()
219 source.publicId = publicId
220 source.systemId = systemId
221 source.byteStream = self._get_opener().open(systemId)
222
223 # determine the encoding if the transport provided it
224 source.encoding = self._guess_media_encoding(source)
225
226 # determine the base URI is we can
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000227 import posixpath, urllib.parse
228 parts = urllib.parse.urlparse(systemId)
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000229 scheme, netloc, path, params, query, fragment = parts
230 # XXX should we check the scheme here as well?
231 if path and not path.endswith("/"):
232 path = posixpath.dirname(path) + "/"
233 parts = scheme, netloc, path, params, query, fragment
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000234 source.baseURI = urllib.parse.urlunparse(parts)
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000235
236 return source
237
238 def _get_opener(self):
239 try:
240 return self._opener
241 except AttributeError:
242 self._opener = self._create_opener()
243 return self._opener
244
245 def _create_opener(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000246 import urllib.request
247 return urllib.request.build_opener()
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000248
249 def _guess_media_encoding(self, source):
250 info = source.byteStream.info()
Guido van Rossum1b01e5c2006-08-19 02:45:06 +0000251 if "Content-Type" in info:
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000252 for param in info.getplist():
253 if param.startswith("charset="):
254 return param.split("=", 1)[1].lower()
255
256
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000257class DOMInputSource(object):
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000258 __slots__ = ('byteStream', 'characterStream', 'stringData',
259 'encoding', 'publicId', 'systemId', 'baseURI')
260
261 def __init__(self):
262 self.byteStream = None
263 self.characterStream = None
264 self.stringData = None
265 self.encoding = None
266 self.publicId = None
267 self.systemId = None
268 self.baseURI = None
269
270 def _get_byteStream(self):
271 return self.byteStream
272 def _set_byteStream(self, byteStream):
273 self.byteStream = byteStream
274
275 def _get_characterStream(self):
276 return self.characterStream
277 def _set_characterStream(self, characterStream):
278 self.characterStream = characterStream
279
280 def _get_stringData(self):
281 return self.stringData
282 def _set_stringData(self, data):
283 self.stringData = data
284
285 def _get_encoding(self):
286 return self.encoding
287 def _set_encoding(self, encoding):
288 self.encoding = encoding
289
290 def _get_publicId(self):
291 return self.publicId
292 def _set_publicId(self, publicId):
293 self.publicId = publicId
294
295 def _get_systemId(self):
296 return self.systemId
297 def _set_systemId(self, systemId):
298 self.systemId = systemId
299
300 def _get_baseURI(self):
301 return self.baseURI
302 def _set_baseURI(self, uri):
303 self.baseURI = uri
304
305
306class DOMBuilderFilter:
307 """Element filter which can be used to tailor construction of
308 a DOM instance.
309 """
310
311 # There's really no need for this class; concrete implementations
312 # should just implement the endElement() and startElement()
313 # methods as appropriate. Using this makes it easy to only
314 # implement one of them.
315
316 FILTER_ACCEPT = 1
317 FILTER_REJECT = 2
318 FILTER_SKIP = 3
319 FILTER_INTERRUPT = 4
320
321 whatToShow = NodeFilter.SHOW_ALL
322
323 def _get_whatToShow(self):
324 return self.whatToShow
325
326 def acceptNode(self, element):
327 return self.FILTER_ACCEPT
328
329 def startContainer(self, element):
330 return self.FILTER_ACCEPT
331
332del NodeFilter
333
334
Yury Selivanov75445082015-05-11 22:57:16 -0400335class _AsyncDeprecatedProperty:
336 def warn(self, cls):
337 clsname = cls.__name__
338 warnings.warn(
339 "{cls}.async is deprecated; use {cls}.async_".format(cls=clsname),
340 DeprecationWarning)
341
342 def __get__(self, instance, cls):
343 self.warn(cls)
344 if instance is not None:
345 return instance.async_
346 return False
347
348 def __set__(self, instance, value):
349 self.warn(type(instance))
350 setattr(instance, 'async_', value)
351
352
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000353class DocumentLS:
354 """Mixin to create documents that conform to the load/save spec."""
355
Yury Selivanov75445082015-05-11 22:57:16 -0400356 async = _AsyncDeprecatedProperty()
357 async_ = False
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000358
359 def _get_async(self):
360 return False
Yury Selivanov75445082015-05-11 22:57:16 -0400361
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000362 def _set_async(self, async):
363 if async:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000364 raise xml.dom.NotSupportedErr(
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000365 "asynchronous document loading is not supported")
366
367 def abort(self):
368 # What does it mean to "clear" a document? Does the
369 # documentElement disappear?
370 raise NotImplementedError(
371 "haven't figured out what this means yet")
372
373 def load(self, uri):
374 raise NotImplementedError("haven't written this yet")
375
376 def loadXML(self, source):
377 raise NotImplementedError("haven't written this yet")
378
379 def saveXML(self, snode):
380 if snode is None:
381 snode = self
382 elif snode.ownerDocument is not self:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000383 raise xml.dom.WrongDocumentErr()
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000384 return snode.toxml()
385
386
Yury Selivanov75445082015-05-11 22:57:16 -0400387del _AsyncDeprecatedProperty
388
389
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000390class DOMImplementationLS:
391 MODE_SYNCHRONOUS = 1
392 MODE_ASYNCHRONOUS = 2
393
394 def createDOMBuilder(self, mode, schemaType):
395 if schemaType is not None:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000396 raise xml.dom.NotSupportedErr(
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000397 "schemaType not yet supported")
398 if mode == self.MODE_SYNCHRONOUS:
399 return DOMBuilder()
400 if mode == self.MODE_ASYNCHRONOUS:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000401 raise xml.dom.NotSupportedErr(
Martin v. Löwis9ea67882003-01-25 15:29:56 +0000402 "asynchronous builders are not supported")
403 raise ValueError("unknown value for mode")
404
405 def createDOMWriter(self):
406 raise NotImplementedError(
407 "the writer interface hasn't been written yet!")
408
409 def createDOMInputSource(self):
410 return DOMInputSource()