Integrate the changes from PyXML's version of pyexpat.c revisions
1.47, 1.48, 1.49 (name interning support).
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index ae8bf1b..7c70f1b 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -60,6 +60,7 @@
     int ordered_attributes;     /* Return attributes as a list. */
     int specified_attributes;   /* Report only specified attributes. */
     int in_callback;            /* Is a callback active? */
+    PyObject *intern;           /* Dictionary to intern strings */
     PyObject **handlers;
 } xmlparseobject;
 
@@ -123,7 +124,7 @@
    Returns None if str is a null pointer. */
 
 static PyObject *
-conv_string_to_unicode(XML_Char *str)
+conv_string_to_unicode(const XML_Char *str)
 {
     /* XXX currently this code assumes that XML_Char is 8-bit, 
        and hence in UTF-8.  */
@@ -132,8 +133,7 @@
         Py_INCREF(Py_None);
         return Py_None;
     }
-    return PyUnicode_DecodeUTF8((const char *)str, 
-                                strlen((const char *)str), 
+    return PyUnicode_DecodeUTF8(str, strlen(str), 
                                 "strict");
 }
 
@@ -155,7 +155,7 @@
    Returns None if str is a null pointer. */
 
 static PyObject *
-conv_string_to_utf8(XML_Char *str)
+conv_string_to_utf8(const XML_Char *str)
 {
     /* XXX currently this code assumes that XML_Char is 8-bit, 
        and hence in UTF-8.  */
@@ -164,7 +164,7 @@
         Py_INCREF(Py_None);
         return Py_None;
     }
-    return PyString_FromString((const char *)str);
+    return PyString_FromString(str);
 }
 
 static PyObject *
@@ -275,6 +275,25 @@
                           ? conv_string_to_unicode : conv_string_to_utf8)
 #endif
 
+static PyObject*
+string_intern(xmlparseobject *self, const char* str)
+{
+    PyObject *result = STRING_CONV_FUNC(str);
+    PyObject *value;
+    if (!self->intern)
+	return result;
+    value = PyDict_GetItem(self->intern, result);
+    if (!value) {
+	if (PyDict_SetItem(self->intern, result, result) == 0)
+            return result;
+        else
+            return NULL;
+    }
+    Py_INCREF(value);
+    Py_DECREF(result);
+    return value;
+}
+
 static void
 my_StartElementHandler(void *userData,
                        const XML_Char *name, const XML_Char **atts)
@@ -307,7 +326,7 @@
             return;
         }
         for (i = 0; i < max; i += 2) {
-            PyObject *n = STRING_CONV_FUNC((XML_Char *) atts[i]);
+            PyObject *n = string_intern(self, (XML_Char *) atts[i]);
             PyObject *v;
             if (n == NULL) {
                 flag_error(self);
@@ -336,7 +355,7 @@
                 Py_DECREF(v);
             }
         }
-        args = Py_BuildValue("(O&N)", STRING_CONV_FUNC,name, container);
+	args = Py_BuildValue("(NN)", string_intern(self, name), container);
         if (args == NULL) {
             Py_DECREF(container);
             return;
@@ -394,13 +413,13 @@
 
 VOID_HANDLER(EndElement, 
              (void *userData, const XML_Char *name), 
-             ("(O&)", STRING_CONV_FUNC, name))
+             ("(N)", string_intern(self, name)))
 
 VOID_HANDLER(ProcessingInstruction,
              (void *userData, 
               const XML_Char *target, 
               const XML_Char *data),
-             ("(O&O&)",STRING_CONV_FUNC,target, STRING_CONV_FUNC,data))
+             ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
 
 #ifndef Py_USING_UNICODE
 VOID_HANDLER(CharacterData, 
@@ -421,10 +440,10 @@
               const XML_Char *systemId,
               const XML_Char *publicId,
               const XML_Char *notationName),
-             ("(O&O&O&O&O&)", 
-              STRING_CONV_FUNC,entityName, STRING_CONV_FUNC,base, 
-              STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId, 
-              STRING_CONV_FUNC,notationName))
+             ("(NNNNN)",
+              string_intern(self, entityName), string_intern(self, base), 
+              string_intern(self, systemId), string_intern(self, publicId), 
+              string_intern(self, notationName)))
 
 #ifndef Py_USING_UNICODE
 VOID_HANDLER(EntityDecl,
@@ -437,11 +456,12 @@
               const XML_Char *systemId,
               const XML_Char *publicId,
               const XML_Char *notationName),
-             ("O&iNO&O&O&O&",
-              STRING_CONV_FUNC,entityName, is_parameter_entity,
+             ("NiNNNNN",
+              string_intern(self, entityName), is_parameter_entity,
               conv_string_len_to_utf8(value, value_length),
-              STRING_CONV_FUNC,base, STRING_CONV_FUNC,systemId,
-              STRING_CONV_FUNC,publicId, STRING_CONV_FUNC,notationName))
+              string_intern(self, base), string_intern(self, systemId),
+              string_intern(self, publicId),
+              string_intern(self, notationName)))
 #else
 VOID_HANDLER(EntityDecl,
              (void *userData,
@@ -453,13 +473,14 @@
               const XML_Char *systemId,
               const XML_Char *publicId,
               const XML_Char *notationName),
-             ("O&iNO&O&O&O&",
-              STRING_CONV_FUNC,entityName, is_parameter_entity,
+             ("NiNNNNN",
+              string_intern(self, entityName), is_parameter_entity,
               (self->returns_unicode 
                ? conv_string_len_to_unicode(value, value_length) 
                : conv_string_len_to_utf8(value, value_length)),
-              STRING_CONV_FUNC,base, STRING_CONV_FUNC,systemId,
-              STRING_CONV_FUNC,publicId, STRING_CONV_FUNC,notationName))
+              string_intern(self, base), string_intern(self, systemId),
+              string_intern(self, publicId),
+              string_intern(self, notationName)))
 #endif
 
 VOID_HANDLER(XmlDecl,
@@ -473,7 +494,7 @@
 
 static PyObject *
 conv_content_model(XML_Content * const model,
-                   PyObject *(*conv_string)(XML_Char *))
+                   PyObject *(*conv_string)(const XML_Char *))
 {
     PyObject *result = NULL;
     PyObject *children = PyTuple_New(model->numchildren);
@@ -514,8 +535,8 @@
              (void *userData,
               const XML_Char *name,
               XML_Content *model),
-             ("O&O&",
-              STRING_CONV_FUNC,name,
+             ("NO&",
+              string_intern(self, name),
               (self->returns_unicode ? conv_content_model_unicode
                                      : conv_content_model_utf8),model))
 #else
@@ -523,8 +544,8 @@
              (void *userData,
               const XML_Char *name,
               XML_Content *model),
-             ("O&O&",
-              STRING_CONV_FUNC,name, conv_content_model_utf8,model))
+             ("NO&",
+              string_intern(self, name), conv_content_model_utf8,model))
 #endif
 
 VOID_HANDLER(AttlistDecl,
@@ -534,8 +555,8 @@
               const XML_Char *att_type,
               const XML_Char *dflt,
               int isrequired),
-             ("(O&O&O&O&i)",
-              STRING_CONV_FUNC,elname, STRING_CONV_FUNC,attname,
+             ("(NNO&O&i)",
+              string_intern(self, elname), string_intern(self, attname),
               STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
               isrequired))
 
@@ -545,24 +566,25 @@
 			const XML_Char *base,
 			const XML_Char *systemId,
 			const XML_Char *publicId),
-                ("(O&O&O&O&)", 
-		 STRING_CONV_FUNC,notationName, STRING_CONV_FUNC,base, 
-		 STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId))
+                ("(NNNN)",
+		 string_intern(self, notationName), string_intern(self, base), 
+		 string_intern(self, systemId), string_intern(self, publicId)))
 
 VOID_HANDLER(StartNamespaceDecl,
 		(void *userData,
 		      const XML_Char *prefix,
 		      const XML_Char *uri),
-                ("(O&O&)", STRING_CONV_FUNC,prefix, STRING_CONV_FUNC,uri))
+                ("(NN)",
+                 string_intern(self, prefix), string_intern(self, uri)))
 
 VOID_HANDLER(EndNamespaceDecl,
 		(void *userData,
 		    const XML_Char *prefix),
-                ("(O&)", STRING_CONV_FUNC,prefix))
+                ("(N)", string_intern(self, prefix)))
 
 VOID_HANDLER(Comment,
-               (void *userData, const XML_Char *prefix),
-                ("(O&)", STRING_CONV_FUNC,prefix))
+               (void *userData, const XML_Char *data),
+                ("(O&)", STRING_CONV_FUNC,data))
 
 VOID_HANDLER(StartCdataSection,
                (void *userData),
@@ -605,9 +627,9 @@
 		    const XML_Char *systemId,
 		    const XML_Char *publicId),
 		int rc=0;,
-                ("(O&O&O&O&)", 
-		 STRING_CONV_FUNC,context, STRING_CONV_FUNC,base, 
-		 STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId),
+                ("(O&NNN)",
+		 STRING_CONV_FUNC,context, string_intern(self, base), 
+		 string_intern(self, systemId), string_intern(self, publicId)),
 		rc = PyInt_AsLong(rv);, rc,
 		XML_GetUserData(parser))
 
@@ -617,8 +639,8 @@
              (void *userData, const XML_Char *doctypeName,
               const XML_Char *sysid, const XML_Char *pubid,
               int has_internal_subset),
-             ("(O&O&O&i)", STRING_CONV_FUNC,doctypeName,
-              STRING_CONV_FUNC,sysid, STRING_CONV_FUNC,pubid,
+             ("(NNNi)", string_intern(self, doctypeName),
+              string_intern(self, sysid), string_intern(self, pubid),
               has_internal_subset))
 
 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
@@ -856,6 +878,8 @@
     new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
 							encoding);
     new_parser->handlers = 0;
+    new_parser->intern = self->intern;
+    Py_XINCREF(new_parser->intern);
 #ifdef Py_TPFLAGS_HAVE_GC
     PyObject_GC_Track(new_parser);
 #else
@@ -988,7 +1012,7 @@
 #endif
 
 static PyObject *
-newxmlparseobject(char *encoding, char *namespace_separator)
+newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
 {
     int i;
     xmlparseobject *self;
@@ -1022,6 +1046,8 @@
     else {
         self->itself = XML_ParserCreate(encoding);
     }
+    self->intern = intern;
+    Py_XINCREF(self->intern);
 #ifdef Py_TPFLAGS_HAVE_GC
     PyObject_GC_Track(self);
 #else
@@ -1074,6 +1100,7 @@
         }
         free(self->handlers);
     }
+    Py_XDECREF(self->intern);
 #if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
     /* Code for versions before 1.6 */
     free(self);
@@ -1118,6 +1145,16 @@
         return PyInt_FromLong((long) self->returns_unicode);
     if (strcmp(name, "specified_attributes") == 0)
         return PyInt_FromLong((long) self->specified_attributes);
+    if (strcmp(name, "intern") == 0) {
+        if (self->intern == NULL) {
+            Py_INCREF(Py_None);
+            return Py_None;
+        }
+        else {
+            Py_INCREF(self->intern);
+            return self->intern;
+        }
+    }
 
     handlernum = handlername2int(name);
 
@@ -1138,6 +1175,7 @@
         PyList_Append(rc, PyString_FromString("ordered_attributes"));
         PyList_Append(rc, PyString_FromString("returns_unicode"));
         PyList_Append(rc, PyString_FromString("specified_attributes"));
+        PyList_Append(rc, PyString_FromString("intern"));
 
         return rc;
     }
@@ -1221,6 +1259,8 @@
 xmlparse_clear(xmlparseobject *op)
 {
     clear_handlers(op, 0);
+    Py_XDECREF(op->intern);
+    op->intern = 0;
     return 0;
 }
 #endif
@@ -1275,10 +1315,14 @@
 {
     char *encoding = NULL;
     char *namespace_separator = NULL;
-    static char *kwlist[] = {"encoding", "namespace_separator", NULL};
+    PyObject *intern = NULL;
+    PyObject *result;
+    int intern_decref = 0;
+    static char *kwlist[] = {"encoding", "namespace_separator", 
+			     "intern", NULL};
 
-    if (!PyArg_ParseTupleAndKeywords(args, kw, "|zz:ParserCreate", kwlist,
-                                     &encoding, &namespace_separator))
+    if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
+                                     &encoding, &namespace_separator, &intern))
         return NULL;
     if (namespace_separator != NULL
         && strlen(namespace_separator) > 1) {
@@ -1287,7 +1331,26 @@
                         " character, omitted, or None");
         return NULL;
     }
-    return newxmlparseobject(encoding, namespace_separator);
+    /* Explicitly passing None means no interning is desired.
+       Not passing anything means that a new dictionary is used. */
+    if (intern == Py_None)
+	intern = NULL;
+    else if (intern == NULL) {
+	intern = PyDict_New();
+	if (!intern)
+	    return NULL;
+	intern_decref = 1;
+    } 
+    else if (!PyDict_Check(intern)) {
+	PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
+	return NULL;
+    }
+
+    result = newxmlparseobject(encoding, namespace_separator, intern);
+    if (intern_decref) {
+	Py_DECREF(intern);
+    }
+    return result;
 }
 
 PyDoc_STRVAR(pyexpat_ErrorString__doc__,