bpo-39465: Fix _PyUnicode_FromId() for subinterpreters (GH-20058)

Make _PyUnicode_FromId() function compatible with subinterpreters.
Each interpreter now has an array of identifier objects (interned
strings decoded from UTF-8).

* Add PyInterpreterState.unicode.identifiers: array of identifiers
  objects.
* Add _PyRuntimeState.unicode_ids used to allocate unique indexes
  to _Py_Identifier.
* Rewrite the _Py_Identifier structure.

Microbenchmark on _PyUnicode_FromId(&PyId_a) with _Py_IDENTIFIER(a):

[ref] 2.42 ns +- 0.00 ns -> [atomic] 3.39 ns +- 0.00 ns: 1.40x slower

This change adds 1 ns per _PyUnicode_FromId() call in average.
diff --git a/Include/cpython/object.h b/Include/cpython/object.h
index 19c066b..86889f8 100644
--- a/Include/cpython/object.h
+++ b/Include/cpython/object.h
@@ -35,12 +35,13 @@ PyAPI_FUNC(Py_ssize_t) _Py_GetRefTotal(void);
    _PyObject_{Get,Set,Has}AttrId are __getattr__ versions using _Py_Identifier*.
 */
 typedef struct _Py_Identifier {
-    struct _Py_Identifier *next;
     const char* string;
-    PyObject *object;
+    // Index in PyInterpreterState.unicode.ids.array. It is process-wide
+    // unique and must be initialized to -1.
+    Py_ssize_t index;
 } _Py_Identifier;
 
-#define _Py_static_string_init(value) { .next = NULL, .string = value, .object = NULL }
+#define _Py_static_string_init(value) { .string = value, .index = -1 }
 #define _Py_static_string(varname, value)  static _Py_Identifier varname = _Py_static_string_init(value)
 #define _Py_IDENTIFIER(varname) _Py_static_string(PyId_##varname, #varname)
 
diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h
index 9ec5358..8c61802 100644
--- a/Include/internal/pycore_interp.h
+++ b/Include/internal/pycore_interp.h
@@ -64,6 +64,11 @@ struct _Py_bytes_state {
     PyBytesObject *characters[256];
 };
 
+struct _Py_unicode_ids {
+    Py_ssize_t size;
+    PyObject **array;
+};
+
 struct _Py_unicode_state {
     // The empty Unicode object is a singleton to improve performance.
     PyObject *empty_string;
@@ -71,6 +76,8 @@ struct _Py_unicode_state {
        shared as well. */
     PyObject *latin1[256];
     struct _Py_unicode_fs_codec fs_codec;
+    // Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
+    struct _Py_unicode_ids ids;
 };
 
 struct _Py_float_state {
diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h
index 3a01d64..8c54abb 100644
--- a/Include/internal/pycore_runtime.h
+++ b/Include/internal/pycore_runtime.h
@@ -49,6 +49,11 @@ typedef struct _Py_AuditHookEntry {
     void *userData;
 } _Py_AuditHookEntry;
 
+struct _Py_unicode_runtime_ids {
+    PyThread_type_lock lock;
+    Py_ssize_t next_index;
+};
+
 /* Full Python runtime state */
 
 typedef struct pyruntimestate {
@@ -106,6 +111,8 @@ typedef struct pyruntimestate {
     void *open_code_userdata;
     _Py_AuditHookEntry *audit_hook_head;
 
+    struct _Py_unicode_runtime_ids unicode_ids;
+
     // XXX Consolidate globals found via the check-c-globals script.
 } _PyRuntimeState;