Use PyThreadState_GET() in performance critical code

It seems like _PyThreadState_UncheckedGet() is not inlined as expected, even
when using gcc -O3.
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index cb67dda..290686b 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -1409,7 +1409,7 @@
        Let's just hope that no exception occurs then...  This must be
        _PyThreadState_Current and not PyThreadState_GET() because in debug
        mode, the latter complains if tstate is NULL. */
-    tstate = _PyThreadState_UncheckedGet();
+    tstate = PyThreadState_GET();
     if (tstate != NULL && tstate->curexc_type != NULL) {
         /* preserve the existing exception */
         PyObject *err_type, *err_value, *err_tb;
diff --git a/Python/errors.c b/Python/errors.c
index 918f4df..0c38f7c 100644
--- a/Python/errors.c
+++ b/Python/errors.c
@@ -161,7 +161,7 @@
 PyObject *
 PyErr_Occurred(void)
 {
-    PyThreadState *tstate = _PyThreadState_UncheckedGet();
+    PyThreadState *tstate = PyThreadState_GET();
     return tstate == NULL ? NULL : tstate->curexc_type;
 }
 
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index e348b38..9247d4e 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -1547,8 +1547,9 @@
     Py_XDECREF(name);
     Py_XDECREF(value);
     /* No return value, therefore clear error state if possible */
-    if (_PyThreadState_UncheckedGet())
+    if (_PyThreadState_UncheckedGet()) {
         PyErr_Clear();
+    }
 }
 
 PyObject *