bpo-36673: Implement comment/PI parsing support for the TreeBuilder in ElementTree. (#12883)

* bpo-36673: Implement comment/PI parsing support for the TreeBuilder in ElementTree.

* bpo-36673: Rewrite the comment/PI factory handling for the TreeBuilder in "_elementtree" to make it use the same factories as the ElementTree module, and to make it explicit when the comments/PIs are inserted into the tree and when they are not (which is the default).
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
index 1e58cd0..5481c61 100644
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -92,6 +92,8 @@
     PyObject *parseerror_obj;
     PyObject *deepcopy_obj;
     PyObject *elementpath_obj;
+    PyObject *comment_factory;
+    PyObject *pi_factory;
 } elementtreestate;
 
 static struct PyModuleDef elementtreemodule;
@@ -114,6 +116,8 @@
     Py_CLEAR(st->parseerror_obj);
     Py_CLEAR(st->deepcopy_obj);
     Py_CLEAR(st->elementpath_obj);
+    Py_CLEAR(st->comment_factory);
+    Py_CLEAR(st->pi_factory);
     return 0;
 }
 
@@ -124,6 +128,8 @@
     Py_VISIT(st->parseerror_obj);
     Py_VISIT(st->deepcopy_obj);
     Py_VISIT(st->elementpath_obj);
+    Py_VISIT(st->comment_factory);
+    Py_VISIT(st->pi_factory);
     return 0;
 }
 
@@ -2385,6 +2391,8 @@
     Py_ssize_t index; /* current stack size (0 means empty) */
 
     PyObject *element_factory;
+    PyObject *comment_factory;
+    PyObject *pi_factory;
 
     /* element tracing */
     PyObject *events_append; /* the append method of the list of events, or NULL */
@@ -2392,6 +2400,11 @@
     PyObject *end_event_obj;
     PyObject *start_ns_event_obj;
     PyObject *end_ns_event_obj;
+    PyObject *comment_event_obj;
+    PyObject *pi_event_obj;
+
+    char insert_comments;
+    char insert_pis;
 } TreeBuilderObject;
 
 #define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
@@ -2413,6 +2426,8 @@
 
         t->data = NULL;
         t->element_factory = NULL;
+        t->comment_factory = NULL;
+        t->pi_factory = NULL;
         t->stack = PyList_New(20);
         if (!t->stack) {
             Py_DECREF(t->this);
@@ -2425,6 +2440,8 @@
         t->events_append = NULL;
         t->start_event_obj = t->end_event_obj = NULL;
         t->start_ns_event_obj = t->end_ns_event_obj = NULL;
+        t->comment_event_obj = t->pi_event_obj = NULL;
+        t->insert_comments = t->insert_pis = 0;
     }
     return (PyObject *)t;
 }
@@ -2433,17 +2450,53 @@
 _elementtree.TreeBuilder.__init__
 
     element_factory: object = NULL
+    *
+    comment_factory: object = NULL
+    pi_factory: object = NULL
+    insert_comments: bool = False
+    insert_pis: bool = False
 
 [clinic start generated code]*/
 
 static int
 _elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
-                                       PyObject *element_factory)
-/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
+                                       PyObject *element_factory,
+                                       PyObject *comment_factory,
+                                       PyObject *pi_factory,
+                                       int insert_comments, int insert_pis)
+/*[clinic end generated code: output=8571d4dcadfdf952 input=1f967b5c245e0a71]*/
 {
-    if (element_factory) {
+    if (element_factory && element_factory != Py_None) {
         Py_INCREF(element_factory);
         Py_XSETREF(self->element_factory, element_factory);
+    } else {
+        Py_CLEAR(self->element_factory);
+    }
+
+    if (!comment_factory || comment_factory == Py_None) {
+        elementtreestate *st = ET_STATE_GLOBAL;
+        comment_factory = st->comment_factory;
+    }
+    if (comment_factory) {
+        Py_INCREF(comment_factory);
+        Py_XSETREF(self->comment_factory, comment_factory);
+        self->insert_comments = insert_comments;
+    } else {
+        Py_CLEAR(self->comment_factory);
+        self->insert_comments = 0;
+    }
+
+    if (!pi_factory || pi_factory == Py_None) {
+        elementtreestate *st = ET_STATE_GLOBAL;
+        pi_factory = st->pi_factory;
+    }
+    if (pi_factory) {
+        Py_INCREF(pi_factory);
+        Py_XSETREF(self->pi_factory, pi_factory);
+        self->insert_pis = insert_pis;
+    } else {
+        Py_CLEAR(self->pi_factory);
+        self->insert_pis = 0;
     }
 
     return 0;
@@ -2452,6 +2505,8 @@
 static int
 treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
 {
+    Py_VISIT(self->pi_event_obj);
+    Py_VISIT(self->comment_event_obj);
     Py_VISIT(self->end_ns_event_obj);
     Py_VISIT(self->start_ns_event_obj);
     Py_VISIT(self->end_event_obj);
@@ -2462,6 +2517,8 @@
     Py_VISIT(self->last);
     Py_VISIT(self->data);
     Py_VISIT(self->stack);
+    Py_VISIT(self->pi_factory);
+    Py_VISIT(self->comment_factory);
     Py_VISIT(self->element_factory);
     return 0;
 }
@@ -2469,6 +2526,8 @@
 static int
 treebuilder_gc_clear(TreeBuilderObject *self)
 {
+    Py_CLEAR(self->pi_event_obj);
+    Py_CLEAR(self->comment_event_obj);
     Py_CLEAR(self->end_ns_event_obj);
     Py_CLEAR(self->start_ns_event_obj);
     Py_CLEAR(self->end_event_obj);
@@ -2478,6 +2537,8 @@
     Py_CLEAR(self->data);
     Py_CLEAR(self->last);
     Py_CLEAR(self->this);
+    Py_CLEAR(self->pi_factory);
+    Py_CLEAR(self->comment_factory);
     Py_CLEAR(self->element_factory);
     Py_CLEAR(self->root);
     return 0;
@@ -2494,6 +2555,57 @@
 /* -------------------------------------------------------------------- */
 /* helpers for handling of arbitrary element-like objects */
 
+/*[clinic input]
+_elementtree._set_factories
+
+    comment_factory: object
+    pi_factory: object
+    /
+
+Change the factories used to create comments and processing instructions.
+
+For internal use only.
+[clinic start generated code]*/
+
+static PyObject *
+_elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
+                                 PyObject *pi_factory)
+/*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
+{
+    elementtreestate *st = ET_STATE_GLOBAL;
+    PyObject *old;
+
+    if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
+        PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
+                     Py_TYPE(comment_factory)->tp_name);
+        return NULL;
+    }
+    if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
+        PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
+                     Py_TYPE(pi_factory)->tp_name);
+        return NULL;
+    }
+
+    old = PyTuple_Pack(2,
+        st->comment_factory ? st->comment_factory : Py_None,
+        st->pi_factory ? st->pi_factory : Py_None);
+
+    if (comment_factory == Py_None) {
+        Py_CLEAR(st->comment_factory);
+    } else {
+        Py_INCREF(comment_factory);
+        Py_XSETREF(st->comment_factory, comment_factory);
+    }
+    if (pi_factory == Py_None) {
+        Py_CLEAR(st->pi_factory);
+    } else {
+        Py_INCREF(pi_factory);
+        Py_XSETREF(st->pi_factory, pi_factory);
+    }
+
+    return old;
+}
+
 static int
 treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
                                      PyObject **dest, _Py_Identifier *name)
@@ -2569,7 +2681,7 @@
         PyObject *event = PyTuple_Pack(2, action, node);
         if (event == NULL)
             return -1;
-        res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
+        res = _PyObject_FastCall(self->events_append, &event, 1);
         Py_DECREF(event);
         if (res == NULL)
             return -1;
@@ -2593,7 +2705,7 @@
         return NULL;
     }
 
-    if (!self->element_factory || self->element_factory == Py_None) {
+    if (!self->element_factory) {
         node = create_new_element(tag, attrib);
     } else if (attrib == Py_None) {
         attrib = PyDict_New();
@@ -2721,6 +2833,84 @@
     return (PyObject*) self->last;
 }
 
+LOCAL(PyObject*)
+treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
+{
+    PyObject* comment = NULL;
+    PyObject* this;
+
+    if (treebuilder_flush_data(self) < 0) {
+        return NULL;
+    }
+
+    if (self->comment_factory) {
+        comment = _PyObject_FastCall(self->comment_factory, &text, 1);
+        if (!comment)
+            return NULL;
+
+        this = self->this;
+        if (self->insert_comments && this != Py_None) {
+            if (treebuilder_add_subelement(this, comment) < 0)
+                goto error;
+        }
+    } else {
+        Py_INCREF(text);
+        comment = text;
+    }
+
+    if (self->events_append && self->comment_event_obj) {
+        if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
+            goto error;
+    }
+
+    return comment;
+
+  error:
+    Py_DECREF(comment);
+    return NULL;
+}
+
+LOCAL(PyObject*)
+treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
+{
+    PyObject* pi = NULL;
+    PyObject* this;
+    PyObject* stack[2] = {target, text};
+
+    if (treebuilder_flush_data(self) < 0) {
+        return NULL;
+    }
+
+    if (self->pi_factory) {
+        pi = _PyObject_FastCall(self->pi_factory, stack, 2);
+        if (!pi) {
+            return NULL;
+        }
+
+        this = self->this;
+        if (self->insert_pis && this != Py_None) {
+            if (treebuilder_add_subelement(this, pi) < 0)
+                goto error;
+        }
+    } else {
+        pi = PyTuple_Pack(2, target, text);
+        if (!pi) {
+            return NULL;
+        }
+    }
+
+    if (self->events_append && self->pi_event_obj) {
+        if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
+            goto error;
+    }
+
+    return pi;
+
+  error:
+    Py_DECREF(pi);
+    return NULL;
+}
+
 /* -------------------------------------------------------------------- */
 /* methods (in alphabetical order) */
 
@@ -2754,6 +2944,38 @@
     return treebuilder_handle_end(self, tag);
 }
 
+/*[clinic input]
+_elementtree.TreeBuilder.comment
+
+    text: object
+    /
+
+[clinic start generated code]*/
+
+static PyObject *
+_elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
+/*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
+{
+    return treebuilder_handle_comment(self, text);
+}
+
+/*[clinic input]
+_elementtree.TreeBuilder.pi
+
+    target: object
+    text: object = None
+    /
+
+[clinic start generated code]*/
+
+static PyObject *
+_elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
+                                 PyObject *text)
+/*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
+{
+    return treebuilder_handle_pi(self, target, text);
+}
+
 LOCAL(PyObject*)
 treebuilder_done(TreeBuilderObject* self)
 {
@@ -2925,7 +3147,7 @@
     if (errmsg == NULL)
         return;
 
-    error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
+    error = _PyObject_FastCall(st->parseerror_obj, &errmsg, 1);
     Py_DECREF(errmsg);
     if (!error)
         return;
@@ -2988,7 +3210,7 @@
                 (TreeBuilderObject*) self->target, value
                 );
         else if (self->handle_data)
-            res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
+            res = _PyObject_FastCall(self->handle_data, &value, 1);
         else
             res = NULL;
         Py_XDECREF(res);
@@ -3099,7 +3321,7 @@
         /* shortcut */
         res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
     else if (self->handle_data)
-        res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
+        res = _PyObject_FastCall(self->handle_data, &data, 1);
     else
         res = NULL;
 
@@ -3126,7 +3348,7 @@
     else if (self->handle_end) {
         tag = makeuniversal(self, tag_in);
         if (tag) {
-            res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
+            res = _PyObject_FastCall(self->handle_end, &tag, 1);
             Py_DECREF(tag);
         }
     }
@@ -3176,21 +3398,31 @@
 static void
 expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
 {
-    PyObject* comment;
-    PyObject* res;
+    PyObject* comment = NULL;
+    PyObject* res = NULL;
 
     if (PyErr_Occurred())
         return;
 
-    if (self->handle_comment) {
+    if (TreeBuilder_CheckExact(self->target)) {
+        /* shortcut */
+        TreeBuilderObject *target = (TreeBuilderObject*) self->target;
+
         comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
-        if (comment) {
-            res = PyObject_CallFunctionObjArgs(self->handle_comment,
-                                               comment, NULL);
-            Py_XDECREF(res);
-            Py_DECREF(comment);
-        }
+        if (!comment)
+            return; /* parser will look for errors */
+
+        res = treebuilder_handle_comment(target,  comment);
+    } else if (self->handle_comment) {
+        comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
+        if (!comment)
+            return;
+
+        res = _PyObject_FastCall(self->handle_comment, &comment, 1);
     }
+
+    Py_XDECREF(res);
+    Py_DECREF(comment);
 }
 
 static void
@@ -3258,27 +3490,51 @@
 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
                  const XML_Char* data_in)
 {
-    PyObject* target;
+    PyObject* pi_target = NULL;
     PyObject* data;
     PyObject* res;
+    PyObject* stack[2];
 
     if (PyErr_Occurred())
         return;
 
-    if (self->handle_pi) {
-        target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
-        data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
-        if (target && data) {
-            res = PyObject_CallFunctionObjArgs(self->handle_pi,
-                                               target, data, NULL);
+    if (TreeBuilder_CheckExact(self->target)) {
+        /* shortcut */
+        TreeBuilderObject *target = (TreeBuilderObject*) self->target;
+
+        if (target->events_append && target->pi_event_obj) {
+            pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
+            if (!pi_target)
+                goto error;
+            data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
+            if (!data)
+                goto error;
+            res = treebuilder_handle_pi(target, pi_target, data);
             Py_XDECREF(res);
             Py_DECREF(data);
-            Py_DECREF(target);
-        } else {
-            Py_XDECREF(data);
-            Py_XDECREF(target);
+            Py_DECREF(pi_target);
         }
+    } else if (self->handle_pi) {
+        pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
+        if (!pi_target)
+            goto error;
+        data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
+        if (!data)
+            goto error;
+
+        stack[0] = pi_target;
+        stack[1] = data;
+        res = _PyObject_FastCall(self->handle_pi, stack, 2);
+        Py_XDECREF(res);
+        Py_DECREF(data);
+        Py_DECREF(pi_target);
     }
+
+    return;
+
+  error:
+    Py_XDECREF(pi_target);
+    return;
 }
 
 /* -------------------------------------------------------------------- */
@@ -3695,6 +3951,8 @@
     Py_CLEAR(target->end_event_obj);
     Py_CLEAR(target->start_ns_event_obj);
     Py_CLEAR(target->end_ns_event_obj);
+    Py_CLEAR(target->comment_event_obj);
+    Py_CLEAR(target->pi_event_obj);
 
     if (events_to_report == Py_None) {
         /* default is "end" only */
@@ -3740,6 +3998,18 @@
                 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
                 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
                 );
+        } else if (strcmp(event_name, "comment") == 0) {
+            Py_XSETREF(target->comment_event_obj, event_name_obj);
+            EXPAT(SetCommentHandler)(
+                self->parser,
+                (XML_CommentHandler) expat_comment_handler
+                );
+        } else if (strcmp(event_name, "pi") == 0) {
+            Py_XSETREF(target->pi_event_obj, event_name_obj);
+            EXPAT(SetProcessingInstructionHandler)(
+                self->parser,
+                (XML_ProcessingInstructionHandler) expat_pi_handler
+                );
         } else {
             Py_DECREF(event_name_obj);
             Py_DECREF(events_seq);
@@ -3882,6 +4152,8 @@
     _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
     _ELEMENTTREE_TREEBUILDER_START_METHODDEF
     _ELEMENTTREE_TREEBUILDER_END_METHODDEF
+    _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
+    _ELEMENTTREE_TREEBUILDER_PI_METHODDEF
     _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
     {NULL, NULL}
 };
@@ -3983,6 +4255,7 @@
 
 static PyMethodDef _functions[] = {
     {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS},
+    _ELEMENTTREE__SET_FACTORIES_METHODDEF
     {NULL, NULL}
 };