bpo-20928: support base-URL and recursive includes in etree.ElementInclude (#5723)

* bpo-20928: bring elementtree's XInclude support en-par with the implementation in lxml by adding support for recursive includes and a base-URL.

* bpo-20928: Support xincluding the same file multiple times, just not recursively.

* bpo-20928: Add 'max_depth' parameter to xinclude that limits the maximum recursion depth to 6 by default.

* Add news entry for updated ElementInclude support
diff --git a/Lib/xml/etree/ElementInclude.py b/Lib/xml/etree/ElementInclude.py
index 963470e..5303062 100644
--- a/Lib/xml/etree/ElementInclude.py
+++ b/Lib/xml/etree/ElementInclude.py
@@ -50,18 +50,28 @@
 
 import copy
 from . import ElementTree
+from urllib.parse import urljoin
 
 XINCLUDE = "{http://www.w3.org/2001/XInclude}"
 
 XINCLUDE_INCLUDE = XINCLUDE + "include"
 XINCLUDE_FALLBACK = XINCLUDE + "fallback"
 
+# For security reasons, the inclusion depth is limited to this read-only value by default.
+DEFAULT_MAX_INCLUSION_DEPTH = 6
+
+
 ##
 # Fatal include error.
 
 class FatalIncludeError(SyntaxError):
     pass
 
+
+class LimitedRecursiveIncludeError(FatalIncludeError):
+    pass
+
+
 ##
 # Default loader.  This loader reads an included resource from disk.
 #
@@ -92,13 +102,33 @@
 # @param loader Optional resource loader.  If omitted, it defaults
 #     to {@link default_loader}.  If given, it should be a callable
 #     that implements the same interface as <b>default_loader</b>.
+# @param base_url The base URL of the original file, to resolve
+#     relative include file references.
+# @param max_depth The maximum number of recursive inclusions.
+#     Limited to reduce the risk of malicious content explosion.
+#     Pass a negative value to disable the limitation.
+# @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded.
 # @throws FatalIncludeError If the function fails to include a given
 #     resource, or if the tree contains malformed XInclude elements.
-# @throws OSError If the function fails to load a given resource.
+# @throws IOError If the function fails to load a given resource.
+# @returns the node or its replacement if it was an XInclude node
 
-def include(elem, loader=None):
+def include(elem, loader=None, base_url=None,
+            max_depth=DEFAULT_MAX_INCLUSION_DEPTH):
+    if max_depth is None:
+        max_depth = -1
+    elif max_depth < 0:
+        raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth)
+
+    if hasattr(elem, 'getroot'):
+        elem = elem.getroot()
     if loader is None:
         loader = default_loader
+
+    _include(elem, loader, base_url, max_depth, set())
+
+
+def _include(elem, loader, base_url, max_depth, _parent_hrefs):
     # look for xinclude elements
     i = 0
     while i < len(elem):
@@ -106,14 +136,24 @@
         if e.tag == XINCLUDE_INCLUDE:
             # process xinclude directive
             href = e.get("href")
+            if base_url:
+                href = urljoin(base_url, href)
             parse = e.get("parse", "xml")
             if parse == "xml":
+                if href in _parent_hrefs:
+                    raise FatalIncludeError("recursive include of %s" % href)
+                if max_depth == 0:
+                    raise LimitedRecursiveIncludeError(
+                        "maximum xinclude depth reached when including file %s" % href)
+                _parent_hrefs.add(href)
                 node = loader(href, parse)
                 if node is None:
                     raise FatalIncludeError(
                         "cannot load %r as %r" % (href, parse)
                         )
-                node = copy.copy(node)
+                node = copy.copy(node)  # FIXME: this makes little sense with recursive includes
+                _include(node, loader, href, max_depth - 1, _parent_hrefs)
+                _parent_hrefs.remove(href)
                 if e.tail:
                     node.tail = (node.tail or "") + e.tail
                 elem[i] = node
@@ -123,11 +163,13 @@
                     raise FatalIncludeError(
                         "cannot load %r as %r" % (href, parse)
                         )
+                if e.tail:
+                    text += e.tail
                 if i:
                     node = elem[i-1]
-                    node.tail = (node.tail or "") + text + (e.tail or "")
+                    node.tail = (node.tail or "") + text
                 else:
-                    elem.text = (elem.text or "") + text + (e.tail or "")
+                    elem.text = (elem.text or "") + text
                 del elem[i]
                 continue
             else:
@@ -139,5 +181,5 @@
                 "xi:fallback tag must be child of xi:include (%r)" % e.tag
                 )
         else:
-            include(e, loader)
-        i = i + 1
+            _include(e, loader, base_url, max_depth, _parent_hrefs)
+        i += 1