Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 1 | # |
| 2 | # ElementTree |
Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 3 | # $Id: ElementInclude.py 3375 2008-02-13 08:05:08Z fredrik $ |
Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 4 | # |
| 5 | # limited xinclude support for element trees |
| 6 | # |
| 7 | # history: |
| 8 | # 2003-08-15 fl created |
| 9 | # 2003-11-14 fl fixed default loader |
| 10 | # |
| 11 | # Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. |
| 12 | # |
| 13 | # fredrik@pythonware.com |
| 14 | # http://www.pythonware.com |
| 15 | # |
| 16 | # -------------------------------------------------------------------- |
| 17 | # The ElementTree toolkit is |
| 18 | # |
Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 19 | # Copyright (c) 1999-2008 by Fredrik Lundh |
Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 20 | # |
| 21 | # By obtaining, using, and/or copying this software and/or its |
| 22 | # associated documentation, you agree that you have read, understood, |
| 23 | # and will comply with the following terms and conditions: |
| 24 | # |
| 25 | # Permission to use, copy, modify, and distribute this software and |
| 26 | # its associated documentation for any purpose and without fee is |
| 27 | # hereby granted, provided that the above copyright notice appears in |
| 28 | # all copies, and that both that copyright notice and this permission |
| 29 | # notice appear in supporting documentation, and that the name of |
| 30 | # Secret Labs AB or the author not be used in advertising or publicity |
| 31 | # pertaining to distribution of the software without specific, written |
| 32 | # prior permission. |
| 33 | # |
| 34 | # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD |
| 35 | # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- |
| 36 | # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR |
| 37 | # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY |
| 38 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, |
| 39 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS |
| 40 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE |
| 41 | # OF THIS SOFTWARE. |
| 42 | # -------------------------------------------------------------------- |
| 43 | |
Fredrik Lundh | 63168a5 | 2005-12-14 22:29:34 +0000 | [diff] [blame] | 44 | # Licensed to PSF under a Contributor Agreement. |
Miss Islington (bot) | f7f1c26 | 2021-07-30 07:25:28 -0700 | [diff] [blame] | 45 | # See https://www.python.org/psf/license for licensing details. |
Fredrik Lundh | 63168a5 | 2005-12-14 22:29:34 +0000 | [diff] [blame] | 46 | |
Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 47 | ## |
| 48 | # Limited XInclude support for the ElementTree package. |
| 49 | ## |
| 50 | |
| 51 | import copy |
Alex Martelli | c5c45ba | 2006-08-21 20:54:38 +0000 | [diff] [blame] | 52 | from . import ElementTree |
Stefan Behnel | c6a7bdb | 2019-11-25 16:36:25 +0100 | [diff] [blame] | 53 | from urllib.parse import urljoin |
Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 54 | |
| 55 | XINCLUDE = "{http://www.w3.org/2001/XInclude}" |
| 56 | |
| 57 | XINCLUDE_INCLUDE = XINCLUDE + "include" |
| 58 | XINCLUDE_FALLBACK = XINCLUDE + "fallback" |
| 59 | |
Stefan Behnel | c6a7bdb | 2019-11-25 16:36:25 +0100 | [diff] [blame] | 60 | # For security reasons, the inclusion depth is limited to this read-only value by default. |
| 61 | DEFAULT_MAX_INCLUSION_DEPTH = 6 |
| 62 | |
| 63 | |
Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 64 | ## |
| 65 | # Fatal include error. |
| 66 | |
| 67 | class FatalIncludeError(SyntaxError): |
| 68 | pass |
| 69 | |
Stefan Behnel | c6a7bdb | 2019-11-25 16:36:25 +0100 | [diff] [blame] | 70 | |
| 71 | class LimitedRecursiveIncludeError(FatalIncludeError): |
| 72 | pass |
| 73 | |
| 74 | |
Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 75 | ## |
| 76 | # Default loader. This loader reads an included resource from disk. |
| 77 | # |
| 78 | # @param href Resource reference. |
| 79 | # @param parse Parse mode. Either "xml" or "text". |
Victor Stinner | eaf399e | 2011-06-30 18:10:14 +0200 | [diff] [blame] | 80 | # @param encoding Optional text encoding (UTF-8 by default for "text"). |
Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 81 | # @return The expanded resource. If the parse mode is "xml", this |
| 82 | # is an ElementTree instance. If the parse mode is "text", this |
| 83 | # is a Unicode string. If the loader fails, it can return None |
Andrew Svetlov | f7a17b4 | 2012-12-25 16:47:37 +0200 | [diff] [blame] | 84 | # or raise an OSError exception. |
| 85 | # @throws OSError If the loader fails to load the resource. |
Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 86 | |
| 87 | def default_loader(href, parse, encoding=None): |
Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 88 | if parse == "xml": |
Serhiy Storchaka | 91b0bc2 | 2014-01-25 19:43:02 +0200 | [diff] [blame] | 89 | with open(href, 'rb') as file: |
| 90 | data = ElementTree.parse(file).getroot() |
Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 91 | else: |
Victor Stinner | eaf399e | 2011-06-30 18:10:14 +0200 | [diff] [blame] | 92 | if not encoding: |
| 93 | encoding = 'UTF-8' |
Serhiy Storchaka | 91b0bc2 | 2014-01-25 19:43:02 +0200 | [diff] [blame] | 94 | with open(href, 'r', encoding=encoding) as file: |
| 95 | data = file.read() |
Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 96 | return data |
| 97 | |
| 98 | ## |
| 99 | # Expand XInclude directives. |
| 100 | # |
| 101 | # @param elem Root element. |
| 102 | # @param loader Optional resource loader. If omitted, it defaults |
| 103 | # to {@link default_loader}. If given, it should be a callable |
| 104 | # that implements the same interface as <b>default_loader</b>. |
Stefan Behnel | c6a7bdb | 2019-11-25 16:36:25 +0100 | [diff] [blame] | 105 | # @param base_url The base URL of the original file, to resolve |
| 106 | # relative include file references. |
| 107 | # @param max_depth The maximum number of recursive inclusions. |
| 108 | # Limited to reduce the risk of malicious content explosion. |
| 109 | # Pass a negative value to disable the limitation. |
| 110 | # @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded. |
Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 111 | # @throws FatalIncludeError If the function fails to include a given |
| 112 | # resource, or if the tree contains malformed XInclude elements. |
Stefan Behnel | c6a7bdb | 2019-11-25 16:36:25 +0100 | [diff] [blame] | 113 | # @throws IOError If the function fails to load a given resource. |
| 114 | # @returns the node or its replacement if it was an XInclude node |
Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 115 | |
Stefan Behnel | c6a7bdb | 2019-11-25 16:36:25 +0100 | [diff] [blame] | 116 | def include(elem, loader=None, base_url=None, |
| 117 | max_depth=DEFAULT_MAX_INCLUSION_DEPTH): |
| 118 | if max_depth is None: |
| 119 | max_depth = -1 |
| 120 | elif max_depth < 0: |
| 121 | raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth) |
| 122 | |
| 123 | if hasattr(elem, 'getroot'): |
| 124 | elem = elem.getroot() |
Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 125 | if loader is None: |
| 126 | loader = default_loader |
Stefan Behnel | c6a7bdb | 2019-11-25 16:36:25 +0100 | [diff] [blame] | 127 | |
| 128 | _include(elem, loader, base_url, max_depth, set()) |
| 129 | |
| 130 | |
| 131 | def _include(elem, loader, base_url, max_depth, _parent_hrefs): |
Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 132 | # look for xinclude elements |
| 133 | i = 0 |
| 134 | while i < len(elem): |
| 135 | e = elem[i] |
| 136 | if e.tag == XINCLUDE_INCLUDE: |
| 137 | # process xinclude directive |
| 138 | href = e.get("href") |
Stefan Behnel | c6a7bdb | 2019-11-25 16:36:25 +0100 | [diff] [blame] | 139 | if base_url: |
| 140 | href = urljoin(base_url, href) |
Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 141 | parse = e.get("parse", "xml") |
| 142 | if parse == "xml": |
Stefan Behnel | c6a7bdb | 2019-11-25 16:36:25 +0100 | [diff] [blame] | 143 | if href in _parent_hrefs: |
| 144 | raise FatalIncludeError("recursive include of %s" % href) |
| 145 | if max_depth == 0: |
| 146 | raise LimitedRecursiveIncludeError( |
| 147 | "maximum xinclude depth reached when including file %s" % href) |
| 148 | _parent_hrefs.add(href) |
Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 149 | node = loader(href, parse) |
| 150 | if node is None: |
| 151 | raise FatalIncludeError( |
| 152 | "cannot load %r as %r" % (href, parse) |
| 153 | ) |
Stefan Behnel | c6a7bdb | 2019-11-25 16:36:25 +0100 | [diff] [blame] | 154 | node = copy.copy(node) # FIXME: this makes little sense with recursive includes |
| 155 | _include(node, loader, href, max_depth - 1, _parent_hrefs) |
| 156 | _parent_hrefs.remove(href) |
Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 157 | if e.tail: |
| 158 | node.tail = (node.tail or "") + e.tail |
| 159 | elem[i] = node |
| 160 | elif parse == "text": |
| 161 | text = loader(href, parse, e.get("encoding")) |
| 162 | if text is None: |
| 163 | raise FatalIncludeError( |
| 164 | "cannot load %r as %r" % (href, parse) |
| 165 | ) |
Stefan Behnel | c6a7bdb | 2019-11-25 16:36:25 +0100 | [diff] [blame] | 166 | if e.tail: |
| 167 | text += e.tail |
Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 168 | if i: |
| 169 | node = elem[i-1] |
Stefan Behnel | c6a7bdb | 2019-11-25 16:36:25 +0100 | [diff] [blame] | 170 | node.tail = (node.tail or "") + text |
Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 171 | else: |
Stefan Behnel | c6a7bdb | 2019-11-25 16:36:25 +0100 | [diff] [blame] | 172 | elem.text = (elem.text or "") + text |
Armin Rigo | 9ed7306 | 2005-12-14 18:10:45 +0000 | [diff] [blame] | 173 | del elem[i] |
| 174 | continue |
| 175 | else: |
| 176 | raise FatalIncludeError( |
| 177 | "unknown parse type in xi:include tag (%r)" % parse |
| 178 | ) |
| 179 | elif e.tag == XINCLUDE_FALLBACK: |
| 180 | raise FatalIncludeError( |
| 181 | "xi:fallback tag must be child of xi:include (%r)" % e.tag |
| 182 | ) |
| 183 | else: |
Stefan Behnel | c6a7bdb | 2019-11-25 16:36:25 +0100 | [diff] [blame] | 184 | _include(e, loader, base_url, max_depth, _parent_hrefs) |
| 185 | i += 1 |