[3.6] bpo-30604: Move co_extra_freefuncs to interpreter state to avoid crashes in threads (#2015)

* Move co_extra_freefuncs to interpreter state to avoid crashes in
multi-threaded scenarios involving deletion of code objects

* Don't require that extra be zero initialized

* Build test list instead of defining empty test class

* Ensure extra is always assigned on success

* Keep the old fields in the thread state object, just don't use them
Add new linked list of code extra objects on a per-interpreter basis
  so that interpreter state size isn't changed

* Rename __PyCodeExtraState_Get and add comment about it going away in 3.7
Fix sort order of import's in test_code.py

* Remove an extraneous space

* Remove docstrings for comments

* Touch up formatting

* Fix casing of coextra local

* Fix casing of another variable

* Prefix PyCodeExtraState with __ to match C API for getting it

* Update NEWS file for bpo-30604
diff --git a/Python/ceval.c b/Python/ceval.c
index eba892c..ea79f5f 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -5453,14 +5453,14 @@
 Py_ssize_t
 _PyEval_RequestCodeExtraIndex(freefunc free)
 {
-    PyThreadState *tstate = PyThreadState_Get();
+    __PyCodeExtraState *state = __PyCodeExtraState_Get();
     Py_ssize_t new_index;
 
-    if (tstate->co_extra_user_count == MAX_CO_EXTRA_USERS - 1) {
+    if (state->co_extra_user_count == MAX_CO_EXTRA_USERS - 1) {
         return -1;
     }
-    new_index = tstate->co_extra_user_count++;
-    tstate->co_extra_freefuncs[new_index] = free;
+    new_index = state->co_extra_user_count++;
+    state->co_extra_freefuncs[new_index] = free;
     return new_index;
 }
 
diff --git a/Python/pystate.c b/Python/pystate.c
index ccb0092..92d08c4 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -55,6 +55,7 @@
 #endif
 
 static PyInterpreterState *interp_head = NULL;
+static __PyCodeExtraState *coextra_head = NULL;
 
 /* Assuming the current thread holds the GIL, this is the
    PyThreadState for the current thread. */
@@ -73,6 +74,12 @@
                                  PyMem_RawMalloc(sizeof(PyInterpreterState));
 
     if (interp != NULL) {
+        __PyCodeExtraState* coextra = PyMem_RawMalloc(sizeof(__PyCodeExtraState));
+        if (coextra == NULL) {
+            PyMem_RawFree(interp);
+            return NULL;
+        }
+
         HEAD_INIT();
 #ifdef WITH_THREAD
         if (head_mutex == NULL)
@@ -92,6 +99,8 @@
         interp->importlib = NULL;
         interp->import_func = NULL;
         interp->eval_frame = _PyEval_EvalFrameDefault;
+        coextra->co_extra_user_count = 0;
+        coextra->interp = interp;
 #ifdef HAVE_DLOPEN
 #if HAVE_DECL_RTLD_NOW
         interp->dlopenflags = RTLD_NOW;
@@ -103,6 +112,8 @@
         HEAD_LOCK();
         interp->next = interp_head;
         interp_head = interp;
+        coextra->next = coextra_head;
+        coextra_head = coextra;
         HEAD_UNLOCK();
     }
 
@@ -147,9 +158,10 @@
 PyInterpreterState_Delete(PyInterpreterState *interp)
 {
     PyInterpreterState **p;
+    __PyCodeExtraState **pextra;
     zapthreads(interp);
     HEAD_LOCK();
-    for (p = &interp_head; ; p = &(*p)->next) {
+    for (p = &interp_head; /* N/A */; p = &(*p)->next) {
         if (*p == NULL)
             Py_FatalError(
                 "PyInterpreterState_Delete: invalid interp");
@@ -159,6 +171,18 @@
     if (interp->tstate_head != NULL)
         Py_FatalError("PyInterpreterState_Delete: remaining threads");
     *p = interp->next;
+
+    for (pextra = &coextra_head; ; pextra = &(*pextra)->next) {
+        if (*pextra == NULL)
+            Py_FatalError(
+                "PyInterpreterState_Delete: invalid extra");
+        __PyCodeExtraState* extra = *pextra;
+        if (extra->interp == interp) {
+            *pextra = extra->next;
+            PyMem_RawFree(extra);
+            break;
+        }
+    }
     HEAD_UNLOCK();
     PyMem_RawFree(interp);
 #ifdef WITH_THREAD
@@ -224,7 +248,6 @@
 
         tstate->coroutine_wrapper = NULL;
         tstate->in_coroutine_wrapper = 0;
-        tstate->co_extra_user_count = 0;
 
         tstate->async_gen_firstiter = NULL;
         tstate->async_gen_finalizer = NULL;
@@ -548,6 +571,23 @@
     return oldts;
 }
 
+__PyCodeExtraState* 
+__PyCodeExtraState_Get() {
+    PyInterpreterState* interp = PyThreadState_Get()->interp;
+
+    HEAD_LOCK();
+    for (__PyCodeExtraState* cur = coextra_head; cur != NULL; cur = cur->next) {
+        if (cur->interp == interp) {
+            HEAD_UNLOCK();
+            return cur;
+        }
+    }
+    HEAD_UNLOCK();
+
+    Py_FatalError("__PyCodeExtraState_Get: no code state for interpreter");
+    return NULL;
+}
+
 /* An extension mechanism to store arbitrary additional per-thread state.
    PyThreadState_GetDict() returns a dictionary that can be used to hold such
    state; the caller should pick a unique key and store its state there.  If