bpo-36676: Namespace prefix aware parsing support for the ET.XMLParser target (GH-12885)

* bpo-36676: Implement namespace prefix aware parsing support for the XMLParser target in ElementTree.
diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst
index c9e04c2..66090af 100644
--- a/Doc/library/xml.etree.elementtree.rst
+++ b/Doc/library/xml.etree.elementtree.rst
@@ -1086,7 +1086,7 @@
 
 
    In addition, a custom :class:`TreeBuilder` object can provide the
-   following method:
+   following methods:
 
    .. method:: doctype(name, pubid, system)
 
@@ -1096,6 +1096,23 @@
 
       .. versionadded:: 3.2
 
+   .. method:: start_ns(prefix, uri)
+
+      Is called whenever the parser encounters a new namespace declaration,
+      before the ``start()`` callback for the opening element that defines it.
+      *prefix* is ``''`` for the default namespace and the declared
+      namespace prefix name otherwise.  *uri* is the namespace URI.
+
+      .. versionadded:: 3.8
+
+   .. method:: end_ns(prefix)
+
+      Is called after the ``end()`` callback of an element that declared
+      a namespace prefix mapping, with the name of the *prefix* that went
+      out of scope.
+
+      .. versionadded:: 3.8
+
 
 .. _elementtree-xmlparser-objects:
 
@@ -1131,7 +1148,8 @@
 
    :meth:`XMLParser.feed` calls *target*\'s ``start(tag, attrs_dict)`` method
    for each opening tag, its ``end(tag)`` method for each closing tag, and data
-   is processed by method ``data(data)``.  :meth:`XMLParser.close` calls
+   is processed by method ``data(data)``.  For further supported callback
+   methods, see the :class:`TreeBuilder` class.  :meth:`XMLParser.close` calls
    *target*\'s method ``close()``. :class:`XMLParser` can be used not only for
    building a tree structure. This is an example of counting the maximum depth
    of an XML file::
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 8a228b8..0abc42a 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -14,12 +14,13 @@
 import operator
 import pickle
 import sys
+import textwrap
 import types
 import unittest
 import warnings
 import weakref
 
-from itertools import product
+from itertools import product, islice
 from test import support
 from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr
 
@@ -694,12 +695,17 @@
                 self.append(("pi", target, data))
             def comment(self, data):
                 self.append(("comment", data))
+            def start_ns(self, prefix, uri):
+                self.append(("start-ns", prefix, uri))
+            def end_ns(self, prefix):
+                self.append(("end-ns", prefix))
         builder = Builder()
         parser = ET.XMLParser(target=builder)
         parser.feed(data)
         self.assertEqual(builder, [
                 ('pi', 'pi', 'data'),
                 ('comment', ' comment '),
+                ('start-ns', '', 'namespace'),
                 ('start', '{namespace}root'),
                 ('start', '{namespace}element'),
                 ('end', '{namespace}element'),
@@ -708,8 +714,30 @@
                 ('start', '{namespace}empty-element'),
                 ('end', '{namespace}empty-element'),
                 ('end', '{namespace}root'),
+                ('end-ns', ''),
             ])
 
+    def test_custom_builder_only_end_ns(self):
+        class Builder(list):
+            def end_ns(self, prefix):
+                self.append(("end-ns", prefix))
+
+        builder = Builder()
+        parser = ET.XMLParser(target=builder)
+        parser.feed(textwrap.dedent("""\
+            <?pi data?>
+            <!-- comment -->
+            <root xmlns='namespace' xmlns:p='pns' xmlns:a='ans'>
+               <a:element key='value'>text</a:element>
+               <p:element>text</p:element>tail
+               <empty-element/>
+            </root>
+            """))
+        self.assertEqual(builder, [
+                ('end-ns', 'a'),
+                ('end-ns', 'p'),
+                ('end-ns', ''),
+            ])
 
     # Element.getchildren() and ElementTree.getiterator() are deprecated.
     @checkwarnings(("This method will be removed in future versions.  "
@@ -1194,14 +1222,19 @@
             for i in range(0, len(data), chunk_size):
                 parser.feed(data[i:i+chunk_size])
 
-    def assert_events(self, parser, expected):
+    def assert_events(self, parser, expected, max_events=None):
         self.assertEqual(
             [(event, (elem.tag, elem.text))
-             for event, elem in parser.read_events()],
+             for event, elem in islice(parser.read_events(), max_events)],
             expected)
 
-    def assert_event_tags(self, parser, expected):
-        events = parser.read_events()
+    def assert_event_tuples(self, parser, expected, max_events=None):
+        self.assertEqual(
+            list(islice(parser.read_events(), max_events)),
+            expected)
+
+    def assert_event_tags(self, parser, expected, max_events=None):
+        events = islice(parser.read_events(), max_events)
         self.assertEqual([(action, elem.tag) for action, elem in events],
                          expected)
 
@@ -1276,6 +1309,56 @@
         self.assertEqual(list(parser.read_events()), [('end-ns', None)])
         self.assertIsNone(parser.close())
 
+    def test_ns_events_start(self):
+        parser = ET.XMLPullParser(events=('start-ns', 'start', 'end'))
+        self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
+        self.assert_event_tuples(parser, [
+            ('start-ns', ('', 'abc')),
+            ('start-ns', ('p', 'xyz')),
+        ], max_events=2)
+        self.assert_event_tags(parser, [
+            ('start', '{abc}tag'),
+        ], max_events=1)
+
+        self._feed(parser, "<child />\n")
+        self.assert_event_tags(parser, [
+            ('start', '{abc}child'),
+            ('end', '{abc}child'),
+        ])
+
+        self._feed(parser, "</tag>\n")
+        parser.close()
+        self.assert_event_tags(parser, [
+            ('end', '{abc}tag'),
+        ])
+
+    def test_ns_events_start_end(self):
+        parser = ET.XMLPullParser(events=('start-ns', 'start', 'end', 'end-ns'))
+        self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
+        self.assert_event_tuples(parser, [
+            ('start-ns', ('', 'abc')),
+            ('start-ns', ('p', 'xyz')),
+        ], max_events=2)
+        self.assert_event_tags(parser, [
+            ('start', '{abc}tag'),
+        ], max_events=1)
+
+        self._feed(parser, "<child />\n")
+        self.assert_event_tags(parser, [
+            ('start', '{abc}child'),
+            ('end', '{abc}child'),
+        ])
+
+        self._feed(parser, "</tag>\n")
+        parser.close()
+        self.assert_event_tags(parser, [
+            ('end', '{abc}tag'),
+        ], max_events=1)
+        self.assert_event_tuples(parser, [
+            ('end-ns', None),
+            ('end-ns', None),
+        ])
+
     def test_events(self):
         parser = ET.XMLPullParser(events=())
         self._feed(parser, "<root/>\n")
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index c640048..5b26ac7 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -1518,6 +1518,10 @@
             parser.StartElementHandler = self._start
         if hasattr(target, 'end'):
             parser.EndElementHandler = self._end
+        if hasattr(target, 'start_ns'):
+            parser.StartNamespaceDeclHandler = self._start_ns
+        if hasattr(target, 'end_ns'):
+            parser.EndNamespaceDeclHandler = self._end_ns
         if hasattr(target, 'data'):
             parser.CharacterDataHandler = target.data
         # miscellaneous callbacks
@@ -1559,12 +1563,24 @@
                     append((event, end(tag)))
                 parser.EndElementHandler = handler
             elif event_name == "start-ns":
-                def handler(prefix, uri, event=event_name, append=append):
-                    append((event, (prefix or "", uri or "")))
+                # TreeBuilder does not implement .start_ns()
+                if hasattr(self.target, "start_ns"):
+                    def handler(prefix, uri, event=event_name, append=append,
+                                start_ns=self._start_ns):
+                        append((event, start_ns(prefix, uri)))
+                else:
+                    def handler(prefix, uri, event=event_name, append=append):
+                        append((event, (prefix or '', uri or '')))
                 parser.StartNamespaceDeclHandler = handler
             elif event_name == "end-ns":
-                def handler(prefix, event=event_name, append=append):
-                    append((event, None))
+                # TreeBuilder does not implement .end_ns()
+                if hasattr(self.target, "end_ns"):
+                    def handler(prefix, event=event_name, append=append,
+                                end_ns=self._end_ns):
+                        append((event, end_ns(prefix)))
+                else:
+                    def handler(prefix, event=event_name, append=append):
+                        append((event, None))
                 parser.EndNamespaceDeclHandler = handler
             elif event_name == 'comment':
                 def handler(text, event=event_name, append=append, self=self):
@@ -1595,6 +1611,12 @@
             self._names[key] = name
         return name
 
+    def _start_ns(self, prefix, uri):
+        return self.target.start_ns(prefix or '', uri or '')
+
+    def _end_ns(self, prefix):
+        return self.target.end_ns(prefix or '')
+
     def _start(self, tag, attr_list):
         # Handler for expat's StartElementHandler. Since ordered_attributes
         # is set, the attributes are reported as a list of alternating
diff --git a/Misc/NEWS.d/next/Library/2019-04-20-13-10-34.bpo-36676.XF4Egb.rst b/Misc/NEWS.d/next/Library/2019-04-20-13-10-34.bpo-36676.XF4Egb.rst
new file mode 100644
index 0000000..e0bede8
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-04-20-13-10-34.bpo-36676.XF4Egb.rst
@@ -0,0 +1,3 @@
+The XMLParser() in xml.etree.ElementTree provides namespace prefix context to the
+parser target if it defines the callback methods "start_ns()" and/or "end_ns()".
+Patch by Stefan Behnel.
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
index 5481c61..b69e3a4 100644
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -2911,6 +2911,39 @@
     return NULL;
 }
 
+LOCAL(PyObject*)
+treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri)
+{
+    PyObject* parcel;
+
+    if (self->events_append && self->start_ns_event_obj) {
+        parcel = PyTuple_Pack(2, prefix, uri);
+        if (!parcel) {
+            return NULL;
+        }
+
+        if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) {
+            Py_DECREF(parcel);
+            return NULL;
+        }
+        Py_DECREF(parcel);
+    }
+
+    Py_RETURN_NONE;
+}
+
+LOCAL(PyObject*)
+treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix)
+{
+    if (self->events_append && self->end_ns_event_obj) {
+        if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) {
+            return NULL;
+        }
+    }
+
+    Py_RETURN_NONE;
+}
+
 /* -------------------------------------------------------------------- */
 /* methods (in alphabetical order) */
 
@@ -3046,6 +3079,8 @@
 
     PyObject *names;
 
+    PyObject *handle_start_ns;
+    PyObject *handle_end_ns;
     PyObject *handle_start;
     PyObject *handle_data;
     PyObject *handle_end;
@@ -3357,42 +3392,89 @@
 }
 
 static void
-expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
-                       const XML_Char *uri)
+expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in,
+                       const XML_Char *uri_in)
 {
-    TreeBuilderObject *target = (TreeBuilderObject*) self->target;
-    PyObject *parcel;
+    PyObject* res = NULL;
+    PyObject* uri;
+    PyObject* prefix;
+    PyObject* stack[2];
 
     if (PyErr_Occurred())
         return;
 
-    if (!target->events_append || !target->start_ns_event_obj)
-        return;
+    if (!uri_in)
+        uri_in = "";
+    if (!prefix_in)
+        prefix_in = "";
 
-    if (!uri)
-        uri = "";
-    if (!prefix)
-        prefix = "";
+    if (TreeBuilder_CheckExact(self->target)) {
+        /* shortcut - TreeBuilder does not actually implement .start_ns() */
+        TreeBuilderObject *target = (TreeBuilderObject*) self->target;
 
-    parcel = Py_BuildValue("ss", prefix, uri);
-    if (!parcel)
-        return;
-    treebuilder_append_event(target, target->start_ns_event_obj, parcel);
-    Py_DECREF(parcel);
+        if (target->events_append && target->start_ns_event_obj) {
+            prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
+            if (!prefix)
+                return;
+            uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
+            if (!uri) {
+                Py_DECREF(prefix);
+                return;
+            }
+
+            res = treebuilder_handle_start_ns(target, prefix, uri);
+            Py_DECREF(uri);
+            Py_DECREF(prefix);
+        }
+    } else if (self->handle_start_ns) {
+        prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
+        if (!prefix)
+            return;
+        uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
+        if (!uri) {
+            Py_DECREF(prefix);
+            return;
+        }
+
+        stack[0] = prefix;
+        stack[1] = uri;
+        res = _PyObject_FastCall(self->handle_start_ns, stack, 2);
+        Py_DECREF(uri);
+        Py_DECREF(prefix);
+    }
+
+    Py_XDECREF(res);
 }
 
 static void
 expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
 {
-    TreeBuilderObject *target = (TreeBuilderObject*) self->target;
+    PyObject *res = NULL;
+    PyObject* prefix;
 
     if (PyErr_Occurred())
         return;
 
-    if (!target->events_append)
-        return;
+    if (!prefix_in)
+        prefix_in = "";
 
-    treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
+    if (TreeBuilder_CheckExact(self->target)) {
+        /* shortcut - TreeBuilder does not actually implement .end_ns() */
+        TreeBuilderObject *target = (TreeBuilderObject*) self->target;
+
+        if (target->events_append && target->end_ns_event_obj) {
+            res = treebuilder_handle_end_ns(target, Py_None);
+        }
+    } else if (self->handle_end_ns) {
+        prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
+        if (!prefix)
+            return;
+
+        res = _PyObject_FastCall(self->handle_end_ns, &prefix, 1);
+        Py_DECREF(prefix);
+    }
+
+    Py_XDECREF(res);
 }
 
 static void
@@ -3546,6 +3628,7 @@
     if (self) {
         self->parser = NULL;
         self->target = self->entity = self->names = NULL;
+        self->handle_start_ns = self->handle_end_ns = NULL;
         self->handle_start = self->handle_data = self->handle_end = NULL;
         self->handle_comment = self->handle_pi = self->handle_close = NULL;
         self->handle_doctype = NULL;
@@ -3614,6 +3697,14 @@
     }
     self->target = target;
 
+    self->handle_start_ns = PyObject_GetAttrString(target, "start_ns");
+    if (ignore_attribute_error(self->handle_start_ns)) {
+        return -1;
+    }
+    self->handle_end_ns = PyObject_GetAttrString(target, "end_ns");
+    if (ignore_attribute_error(self->handle_end_ns)) {
+        return -1;
+    }
     self->handle_start = PyObject_GetAttrString(target, "start");
     if (ignore_attribute_error(self->handle_start)) {
         return -1;
@@ -3645,6 +3736,12 @@
 
     /* configure parser */
     EXPAT(SetUserData)(self->parser, self);
+    if (self->handle_start_ns || self->handle_end_ns)
+        EXPAT(SetNamespaceDeclHandler)(
+            self->parser,
+            (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
+            (XML_EndNamespaceDeclHandler) expat_end_ns_handler
+            );
     EXPAT(SetElementHandler)(
         self->parser,
         (XML_StartElementHandler) expat_start_handler,
@@ -3689,6 +3786,9 @@
     Py_VISIT(self->handle_end);
     Py_VISIT(self->handle_data);
     Py_VISIT(self->handle_start);
+    Py_VISIT(self->handle_start_ns);
+    Py_VISIT(self->handle_end_ns);
+    Py_VISIT(self->handle_doctype);
 
     Py_VISIT(self->target);
     Py_VISIT(self->entity);
@@ -3712,6 +3812,8 @@
     Py_CLEAR(self->handle_end);
     Py_CLEAR(self->handle_data);
     Py_CLEAR(self->handle_start);
+    Py_CLEAR(self->handle_start_ns);
+    Py_CLEAR(self->handle_end_ns);
     Py_CLEAR(self->handle_doctype);
 
     Py_CLEAR(self->target);