bpo-36012: Avoid linear slot search for non-dunder methods (GH-11907)

diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst
index 6bc1314..66e1c13 100644
--- a/Doc/whatsnew/3.8.rst
+++ b/Doc/whatsnew/3.8.rst
@@ -375,6 +375,11 @@
   This makes the created list 12% smaller on average. (Contributed by
   Raymond Hettinger and Pablo Galindo in :issue:`33234`.)
 
+* Doubled the speed of class variable writes.  When a non-dunder attribute
+  was updated, there was an unnecessary call to update slots.
+  (Contributed by Stefan Behnel, Pablo Galindo Salgado, Raymond Hettinger,
+  Neil Schemenauer, and Serhiy Storchaka in :issue:`36012`.)
+
 
 Build and C API Changes
 =======================
diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-02-19-10-47-51.bpo-36012.xq7C9E.rst b/Misc/NEWS.d/next/Core and Builtins/2019-02-19-10-47-51.bpo-36012.xq7C9E.rst
new file mode 100644
index 0000000..ff3fdbf
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2019-02-19-10-47-51.bpo-36012.xq7C9E.rst
@@ -0,0 +1,2 @@
+Doubled the speed of class variable writes.  When a non-dunder attribute was
+updated, there was an unnecessary call to update slots.
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index e6cf4fb..4234726 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -3164,6 +3164,24 @@
     return _PyType_Lookup(type, oname);
 }
 
+/* Check if the "readied" PyUnicode name
+   is a double-underscore special name. */
+static int
+is_dunder_name(PyObject *name)
+{
+    Py_ssize_t length = PyUnicode_GET_LENGTH(name);
+    int kind = PyUnicode_KIND(name);
+    /* Special names contain at least "__x__" and are always ASCII. */
+    if (length > 4 && kind == PyUnicode_1BYTE_KIND) {
+        Py_UCS1 *characters = PyUnicode_1BYTE_DATA(name);
+        return (
+            ((characters[length-2] == '_') && (characters[length-1] == '_')) &&
+            ((characters[0] == '_') && (characters[1] == '_'))
+        );
+    }
+    return 0;
+}
+
 /* This is similar to PyObject_GenericGetAttr(),
    but uses _PyType_Lookup() instead of just looking in type->tp_dict. */
 static PyObject *
@@ -3275,12 +3293,14 @@
             if (name == NULL)
                 return -1;
         }
-        PyUnicode_InternInPlace(&name);
         if (!PyUnicode_CHECK_INTERNED(name)) {
-            PyErr_SetString(PyExc_MemoryError,
-                            "Out of memory interning an attribute name");
-            Py_DECREF(name);
-            return -1;
+            PyUnicode_InternInPlace(&name);
+            if (!PyUnicode_CHECK_INTERNED(name)) {
+                PyErr_SetString(PyExc_MemoryError,
+                                "Out of memory interning an attribute name");
+                Py_DECREF(name);
+                return -1;
+            }
         }
     }
     else {
@@ -3289,7 +3309,16 @@
     }
     res = _PyObject_GenericSetAttrWithDict((PyObject *)type, name, value, NULL);
     if (res == 0) {
-        res = update_slot(type, name);
+        /* Clear the VALID_VERSION flag of 'type' and all its
+           subclasses.  This could possibly be unified with the
+           update_subclasses() recursion in update_slot(), but carefully:
+           they each have their own conditions on which to stop
+           recursing into subclasses. */
+        PyType_Modified(type);
+
+        if (is_dunder_name(name)) {
+            res = update_slot(type, name);
+        }
         assert(_PyType_CheckConsistency(type));
     }
     Py_DECREF(name);
@@ -7236,13 +7265,6 @@
     assert(PyUnicode_CheckExact(name));
     assert(PyUnicode_CHECK_INTERNED(name));
 
-    /* Clear the VALID_VERSION flag of 'type' and all its
-       subclasses.  This could possibly be unified with the
-       update_subclasses() recursion below, but carefully:
-       they each have their own conditions on which to stop
-       recursing into subclasses. */
-    PyType_Modified(type);
-
     init_slotdefs();
     pp = ptrs;
     for (p = slotdefs; p->name; p++) {
@@ -7281,6 +7303,9 @@
 {
     slotdef *p;
 
+    /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */
+    PyType_Modified(type);
+
     init_slotdefs();
     for (p = slotdefs; p->name; p++) {
         /* update_slot returns int but can't actually fail */