blob: 5303062716c47aa7739f78e2df2de799ab54ba37 [file] [log] [blame]
Armin Rigo9ed73062005-12-14 18:10:45 +00001#
2# ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003# $Id: ElementInclude.py 3375 2008-02-13 08:05:08Z fredrik $
Armin Rigo9ed73062005-12-14 18:10:45 +00004#
5# limited xinclude support for element trees
6#
7# history:
8# 2003-08-15 fl created
9# 2003-11-14 fl fixed default loader
10#
11# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved.
12#
13# fredrik@pythonware.com
14# http://www.pythonware.com
15#
16# --------------------------------------------------------------------
17# The ElementTree toolkit is
18#
Florent Xiclunaf15351d2010-03-13 23:24:31 +000019# Copyright (c) 1999-2008 by Fredrik Lundh
Armin Rigo9ed73062005-12-14 18:10:45 +000020#
21# By obtaining, using, and/or copying this software and/or its
22# associated documentation, you agree that you have read, understood,
23# and will comply with the following terms and conditions:
24#
25# Permission to use, copy, modify, and distribute this software and
26# its associated documentation for any purpose and without fee is
27# hereby granted, provided that the above copyright notice appears in
28# all copies, and that both that copyright notice and this permission
29# notice appear in supporting documentation, and that the name of
30# Secret Labs AB or the author not be used in advertising or publicity
31# pertaining to distribution of the software without specific, written
32# prior permission.
33#
34# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
35# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
36# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
37# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
38# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
39# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
40# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
41# OF THIS SOFTWARE.
42# --------------------------------------------------------------------
43
Fredrik Lundh63168a52005-12-14 22:29:34 +000044# Licensed to PSF under a Contributor Agreement.
Florent Xiclunaf15351d2010-03-13 23:24:31 +000045# See http://www.python.org/psf/license for licensing details.
Fredrik Lundh63168a52005-12-14 22:29:34 +000046
Armin Rigo9ed73062005-12-14 18:10:45 +000047##
48# Limited XInclude support for the ElementTree package.
49##
50
51import copy
Alex Martellic5c45ba2006-08-21 20:54:38 +000052from . import ElementTree
Stefan Behnelc6a7bdb2019-11-25 16:36:25 +010053from urllib.parse import urljoin
Armin Rigo9ed73062005-12-14 18:10:45 +000054
55XINCLUDE = "{http://www.w3.org/2001/XInclude}"
56
57XINCLUDE_INCLUDE = XINCLUDE + "include"
58XINCLUDE_FALLBACK = XINCLUDE + "fallback"
59
Stefan Behnelc6a7bdb2019-11-25 16:36:25 +010060# For security reasons, the inclusion depth is limited to this read-only value by default.
61DEFAULT_MAX_INCLUSION_DEPTH = 6
62
63
Armin Rigo9ed73062005-12-14 18:10:45 +000064##
65# Fatal include error.
66
67class FatalIncludeError(SyntaxError):
68 pass
69
Stefan Behnelc6a7bdb2019-11-25 16:36:25 +010070
71class LimitedRecursiveIncludeError(FatalIncludeError):
72 pass
73
74
Armin Rigo9ed73062005-12-14 18:10:45 +000075##
76# Default loader. This loader reads an included resource from disk.
77#
78# @param href Resource reference.
79# @param parse Parse mode. Either "xml" or "text".
Victor Stinnereaf399e2011-06-30 18:10:14 +020080# @param encoding Optional text encoding (UTF-8 by default for "text").
Armin Rigo9ed73062005-12-14 18:10:45 +000081# @return The expanded resource. If the parse mode is "xml", this
82# is an ElementTree instance. If the parse mode is "text", this
83# is a Unicode string. If the loader fails, it can return None
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020084# or raise an OSError exception.
85# @throws OSError If the loader fails to load the resource.
Armin Rigo9ed73062005-12-14 18:10:45 +000086
87def default_loader(href, parse, encoding=None):
Armin Rigo9ed73062005-12-14 18:10:45 +000088 if parse == "xml":
Serhiy Storchaka91b0bc22014-01-25 19:43:02 +020089 with open(href, 'rb') as file:
90 data = ElementTree.parse(file).getroot()
Armin Rigo9ed73062005-12-14 18:10:45 +000091 else:
Victor Stinnereaf399e2011-06-30 18:10:14 +020092 if not encoding:
93 encoding = 'UTF-8'
Serhiy Storchaka91b0bc22014-01-25 19:43:02 +020094 with open(href, 'r', encoding=encoding) as file:
95 data = file.read()
Armin Rigo9ed73062005-12-14 18:10:45 +000096 return data
97
98##
99# Expand XInclude directives.
100#
101# @param elem Root element.
102# @param loader Optional resource loader. If omitted, it defaults
103# to {@link default_loader}. If given, it should be a callable
104# that implements the same interface as <b>default_loader</b>.
Stefan Behnelc6a7bdb2019-11-25 16:36:25 +0100105# @param base_url The base URL of the original file, to resolve
106# relative include file references.
107# @param max_depth The maximum number of recursive inclusions.
108# Limited to reduce the risk of malicious content explosion.
109# Pass a negative value to disable the limitation.
110# @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded.
Armin Rigo9ed73062005-12-14 18:10:45 +0000111# @throws FatalIncludeError If the function fails to include a given
112# resource, or if the tree contains malformed XInclude elements.
Stefan Behnelc6a7bdb2019-11-25 16:36:25 +0100113# @throws IOError If the function fails to load a given resource.
114# @returns the node or its replacement if it was an XInclude node
Armin Rigo9ed73062005-12-14 18:10:45 +0000115
Stefan Behnelc6a7bdb2019-11-25 16:36:25 +0100116def include(elem, loader=None, base_url=None,
117 max_depth=DEFAULT_MAX_INCLUSION_DEPTH):
118 if max_depth is None:
119 max_depth = -1
120 elif max_depth < 0:
121 raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth)
122
123 if hasattr(elem, 'getroot'):
124 elem = elem.getroot()
Armin Rigo9ed73062005-12-14 18:10:45 +0000125 if loader is None:
126 loader = default_loader
Stefan Behnelc6a7bdb2019-11-25 16:36:25 +0100127
128 _include(elem, loader, base_url, max_depth, set())
129
130
131def _include(elem, loader, base_url, max_depth, _parent_hrefs):
Armin Rigo9ed73062005-12-14 18:10:45 +0000132 # look for xinclude elements
133 i = 0
134 while i < len(elem):
135 e = elem[i]
136 if e.tag == XINCLUDE_INCLUDE:
137 # process xinclude directive
138 href = e.get("href")
Stefan Behnelc6a7bdb2019-11-25 16:36:25 +0100139 if base_url:
140 href = urljoin(base_url, href)
Armin Rigo9ed73062005-12-14 18:10:45 +0000141 parse = e.get("parse", "xml")
142 if parse == "xml":
Stefan Behnelc6a7bdb2019-11-25 16:36:25 +0100143 if href in _parent_hrefs:
144 raise FatalIncludeError("recursive include of %s" % href)
145 if max_depth == 0:
146 raise LimitedRecursiveIncludeError(
147 "maximum xinclude depth reached when including file %s" % href)
148 _parent_hrefs.add(href)
Armin Rigo9ed73062005-12-14 18:10:45 +0000149 node = loader(href, parse)
150 if node is None:
151 raise FatalIncludeError(
152 "cannot load %r as %r" % (href, parse)
153 )
Stefan Behnelc6a7bdb2019-11-25 16:36:25 +0100154 node = copy.copy(node) # FIXME: this makes little sense with recursive includes
155 _include(node, loader, href, max_depth - 1, _parent_hrefs)
156 _parent_hrefs.remove(href)
Armin Rigo9ed73062005-12-14 18:10:45 +0000157 if e.tail:
158 node.tail = (node.tail or "") + e.tail
159 elem[i] = node
160 elif parse == "text":
161 text = loader(href, parse, e.get("encoding"))
162 if text is None:
163 raise FatalIncludeError(
164 "cannot load %r as %r" % (href, parse)
165 )
Stefan Behnelc6a7bdb2019-11-25 16:36:25 +0100166 if e.tail:
167 text += e.tail
Armin Rigo9ed73062005-12-14 18:10:45 +0000168 if i:
169 node = elem[i-1]
Stefan Behnelc6a7bdb2019-11-25 16:36:25 +0100170 node.tail = (node.tail or "") + text
Armin Rigo9ed73062005-12-14 18:10:45 +0000171 else:
Stefan Behnelc6a7bdb2019-11-25 16:36:25 +0100172 elem.text = (elem.text or "") + text
Armin Rigo9ed73062005-12-14 18:10:45 +0000173 del elem[i]
174 continue
175 else:
176 raise FatalIncludeError(
177 "unknown parse type in xi:include tag (%r)" % parse
178 )
179 elif e.tag == XINCLUDE_FALLBACK:
180 raise FatalIncludeError(
181 "xi:fallback tag must be child of xi:include (%r)" % e.tag
182 )
183 else:
Stefan Behnelc6a7bdb2019-11-25 16:36:25 +0100184 _include(e, loader, base_url, max_depth, _parent_hrefs)
185 i += 1