bpo-25658: Implement PEP 539 for Thread Specific Storage (TSS) API (GH-1362)

See PEP 539 for details.

Highlights of changes:

- Add Thread Specific Storage (TSS) API
- Document the Thread Local Storage (TLS) API as deprecated
- Update code that used TLS API to use TSS API
diff --git a/Include/pythread.h b/Include/pythread.h
index dbacb8b..d667468 100644
--- a/Include/pythread.h
+++ b/Include/pythread.h
@@ -29,8 +29,8 @@
 PyAPI_FUNC(PyThread_type_lock) PyThread_allocate_lock(void);
 PyAPI_FUNC(void) PyThread_free_lock(PyThread_type_lock);
 PyAPI_FUNC(int) PyThread_acquire_lock(PyThread_type_lock, int);
-#define WAIT_LOCK	1
-#define NOWAIT_LOCK	0
+#define WAIT_LOCK       1
+#define NOWAIT_LOCK     0
 
 /* PY_TIMEOUT_T is the integral type used to specify timeouts when waiting
    on a lock (see PyThread_acquire_lock_timed() below).
@@ -77,15 +77,69 @@
 PyAPI_FUNC(PyObject*) PyThread_GetInfo(void);
 #endif
 
-/* Thread Local Storage (TLS) API */
-PyAPI_FUNC(int) PyThread_create_key(void);
-PyAPI_FUNC(void) PyThread_delete_key(int);
-PyAPI_FUNC(int) PyThread_set_key_value(int, void *);
-PyAPI_FUNC(void *) PyThread_get_key_value(int);
-PyAPI_FUNC(void) PyThread_delete_key_value(int key);
+
+/* Thread Local Storage (TLS) API
+   TLS API is DEPRECATED.  Use Thread Specific Storage (TSS) API.
+
+   The existing TLS API has used int to represent TLS keys across all
+   platforms, but it is not POSIX-compliant.  Therefore, the new TSS API uses
+   opaque data type to represent TSS keys to be compatible (see PEP 539).
+*/
+PyAPI_FUNC(int) PyThread_create_key(void) Py_DEPRECATED(3.7);
+PyAPI_FUNC(void) PyThread_delete_key(int key) Py_DEPRECATED(3.7);
+PyAPI_FUNC(int) PyThread_set_key_value(int key, void *value) Py_DEPRECATED(3.7);
+PyAPI_FUNC(void *) PyThread_get_key_value(int key) Py_DEPRECATED(3.7);
+PyAPI_FUNC(void) PyThread_delete_key_value(int key) Py_DEPRECATED(3.7);
 
 /* Cleanup after a fork */
-PyAPI_FUNC(void) PyThread_ReInitTLS(void);
+PyAPI_FUNC(void) PyThread_ReInitTLS(void) Py_DEPRECATED(3.7);
+
+
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03070000
+/* New in 3.7 */
+/* Thread Specific Storage (TSS) API */
+
+typedef struct _Py_tss_t Py_tss_t;  /* opaque */
+
+#ifndef Py_LIMITED_API
+#if defined(_POSIX_THREADS)
+    /* Darwin needs pthread.h to know type name the pthread_key_t. */
+#   include <pthread.h>
+#   define NATIVE_TSS_KEY_T     pthread_key_t
+#elif defined(NT_THREADS)
+    /* In Windows, native TSS key type is DWORD,
+       but hardcode the unsigned long to avoid errors for include directive.
+    */
+#   define NATIVE_TSS_KEY_T     unsigned long
+#else
+#   error "Require native threads. See https://bugs.python.org/issue31370"
+#endif
+
+/* When Py_LIMITED_API is not defined, the type layout of Py_tss_t is
+   exposed to allow static allocation in the API clients.  Even in this case,
+   you must handle TSS keys through API functions due to compatibility.
+*/
+struct _Py_tss_t {
+    int _is_initialized;
+    NATIVE_TSS_KEY_T _key;
+};
+
+#undef NATIVE_TSS_KEY_T
+
+/* When static allocation, you must initialize with Py_tss_NEEDS_INIT. */
+#define Py_tss_NEEDS_INIT   {0}
+#endif  /* !Py_LIMITED_API */
+
+PyAPI_FUNC(Py_tss_t *) PyThread_tss_alloc(void);
+PyAPI_FUNC(void) PyThread_tss_free(Py_tss_t *key);
+
+/* The parameter key must not be NULL. */
+PyAPI_FUNC(int) PyThread_tss_is_created(Py_tss_t *key);
+PyAPI_FUNC(int) PyThread_tss_create(Py_tss_t *key);
+PyAPI_FUNC(void) PyThread_tss_delete(Py_tss_t *key);
+PyAPI_FUNC(int) PyThread_tss_set(Py_tss_t *key, void *value);
+PyAPI_FUNC(void *) PyThread_tss_get(Py_tss_t *key);
+#endif  /* New in 3.7 */
 
 #ifdef __cplusplus
 }