Fix bug:

[ 1163563 ] Sub threads execute in restricted mode

basically by fixing bug 1010677 in a non-broken way.

Backport candidate.
diff --git a/Misc/NEWS b/Misc/NEWS
index 4763598..80f7dc7 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,12 @@
 Core and builtins
 -----------------
 
+- SF bug #1163563: the original fix for bug #1010677 ("thread Module
+  Breaks PyGILState_Ensure()") broke badly in the case of multiple
+  interpreter states; back out that fix and do a better job (see
+  http://mail.python.org/pipermail/python-dev/2005-June/054258.html
+  for a longer write-up of the problem).
+
 - SF patch #1180995: marshal now uses a binary format by default when
   serializing floats.
 
diff --git a/Modules/threadmodule.c b/Modules/threadmodule.c
index 098a784..3025595 100644
--- a/Modules/threadmodule.c
+++ b/Modules/threadmodule.c
@@ -413,10 +413,12 @@
 t_bootstrap(void *boot_raw)
 {
 	struct bootstate *boot = (struct bootstate *) boot_raw;
-	PyGILState_STATE gstate;
+	PyThreadState *tstate;
 	PyObject *res;
 
-	gstate = PyGILState_Ensure();
+	tstate = PyThreadState_New(boot->interp);
+
+	PyEval_AcquireThread(tstate);
 	res = PyEval_CallObjectWithKeywords(
 		boot->func, boot->args, boot->keyw);
 	if (res == NULL) {
@@ -441,7 +443,8 @@
 	Py_DECREF(boot->args);
 	Py_XDECREF(boot->keyw);
 	PyMem_DEL(boot_raw);
-	PyGILState_Release(gstate);
+	PyThreadState_Clear(tstate);
+	PyThreadState_DeleteCurrent();
 	PyThread_exit_thread();
 }
 
diff --git a/Python/pystate.c b/Python/pystate.c
index e2cf7c5..3ac799c 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -36,6 +36,12 @@
 #define HEAD_INIT() (void)(head_mutex || (head_mutex = PyThread_allocate_lock()))
 #define HEAD_LOCK() PyThread_acquire_lock(head_mutex, WAIT_LOCK)
 #define HEAD_UNLOCK() PyThread_release_lock(head_mutex)
+
+/* The single PyInterpreterState used by this process'
+   GILState implementation
+*/
+static PyInterpreterState *autoInterpreterState = NULL;
+static int autoTLSkey = 0;
 #else
 #define HEAD_INIT() /* Nothing */
 #define HEAD_LOCK() /* Nothing */
@@ -47,6 +53,8 @@
 PyThreadState *_PyThreadState_Current = NULL;
 PyThreadFrameGetter _PyThreadState_GetFrame = NULL;
 
+static void _PyGILState_NoteThreadState(PyThreadState* tstate);
+
 
 PyInterpreterState *
 PyInterpreterState_New(void)
@@ -180,6 +188,8 @@
 		tstate->c_profileobj = NULL;
 		tstate->c_traceobj = NULL;
 
+		_PyGILState_NoteThreadState(tstate);
+
 		HEAD_LOCK();
 		tstate->next = interp->tstate_head;
 		interp->tstate_head = tstate;
@@ -261,6 +271,8 @@
 			"PyThreadState_DeleteCurrent: no current tstate");
 	_PyThreadState_Current = NULL;
 	tstate_delete_common(tstate);
+	if (autoTLSkey && PyThread_get_key_value(autoTLSkey) == tstate)
+		PyThread_delete_key_value(autoTLSkey);
 	PyEval_ReleaseLock();
 }
 #endif /* WITH_THREAD */
@@ -393,12 +405,6 @@
 	return tstate == _PyThreadState_Current;
 }
 
-/* The single PyInterpreterState used by this process'
-   GILState implementation
-*/
-static PyInterpreterState *autoInterpreterState = NULL;
-static int autoTLSkey = 0;
-
 /* Internal initialization/finalization functions called by
    Py_Initialize/Py_Finalize
 */
@@ -408,12 +414,10 @@
 	assert(i && t); /* must init with valid states */
 	autoTLSkey = PyThread_create_key();
 	autoInterpreterState = i;
-	/* Now stash the thread state for this thread in TLS */
 	assert(PyThread_get_key_value(autoTLSkey) == NULL);
-	if (PyThread_set_key_value(autoTLSkey, (void *)t) < 0)
-		Py_FatalError("Couldn't create autoTLSkey mapping");
-	assert(t->gilstate_counter == 0); /* must be a new thread state */
-	t->gilstate_counter = 1;
+	assert(t->gilstate_counter == 0);
+
+	_PyGILState_NoteThreadState(t);
 }
 
 void
@@ -424,6 +428,41 @@
 	autoInterpreterState = NULL;;
 }
 
+/* When a thread state is created for a thread by some mechanism other than
+   PyGILState_Ensure, it's important that the GILState machinery knows about
+   it so it doesn't try to create another thread state for the thread (this is
+   a better fix for SF bug #1010677 than the first one attempted).
+*/
+void
+_PyGILState_NoteThreadState(PyThreadState* tstate)
+{
+	/* If autoTLSkey is 0, this must be the very first threadstate created
+	   in Py_Initialize().  Don't do anything for now (we'll be back here
+	   when _PyGILState_Init is called). */
+	if (!autoTLSkey) 
+		return;
+	
+	/* Stick the thread state for this thread in thread local storage.
+
+	   The only situation where you can legitimately have more than one
+	   thread state for an OS level thread is when there are multiple
+	   interpreters, when:
+	       
+	       a) You shouldn't really be using the PyGILState_ APIs anyway,
+	          and:
+
+	       b) The slightly odd way PyThread_set_key_value works (see
+	          comments by its implementation) means that the first thread
+	          state created for that given OS level thread will "win",
+	          which seems reasonable behaviour.
+	*/
+	if (PyThread_set_key_value(autoTLSkey, (void *)tstate) < 0)
+		Py_FatalError("Couldn't create autoTLSkey mapping");
+
+	/* PyGILState_Release must not try to delete this thread state. */
+	tstate->gilstate_counter = 1;
+}
+
 /* The public functions */
 PyThreadState *
 PyGILState_GetThisThreadState(void)
@@ -450,8 +489,9 @@
 		tcur = PyThreadState_New(autoInterpreterState);
 		if (tcur == NULL)
 			Py_FatalError("Couldn't create thread-state for new thread");
-		if (PyThread_set_key_value(autoTLSkey, (void *)tcur) < 0)
-			Py_FatalError("Couldn't create autoTLSkey mapping");
+		/* This is our thread state!  We'll need to delete it in the
+		   matching call to PyGILState_Release(). */
+		tcur->gilstate_counter = 0;
 		current = 0; /* new thread state is never current */
 	}
 	else
@@ -498,8 +538,6 @@
 		 * habit of coming back).
 		 */
 		PyThreadState_DeleteCurrent();
-		/* Delete this thread from our TLS. */
-		PyThread_delete_key_value(autoTLSkey);
 	}
 	/* Release the lock if necessary */
 	else if (oldstate == PyGILState_UNLOCKED)