bpo-33042: Fix pre-initialization sys module configuration (GH-6157)


- new test case for pre-initialization of sys.warnoptions and sys._xoptions
- restored ability to call these APIs prior to Py_Initialize
- updated the docs for the affected APIs to make it clear they can be
  called before Py_Initialize
- also enhanced the existing embedding test cases
  to check for expected settings in the sys module
(cherry picked from commit bc77eff8b96be4f035e665ab35c1d06e22f46491)

Co-authored-by: Nick Coghlan <ncoghlan@gmail.com>
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 873657f..d68572d 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -1609,11 +1609,141 @@
     return list;
 }
 
+/* Pre-initialization support for sys.warnoptions and sys._xoptions
+ *
+ * Modern internal code paths:
+ *   These APIs get called after _Py_InitializeCore and get to use the
+ *   regular CPython list, dict, and unicode APIs.
+ *
+ * Legacy embedding code paths:
+ *   The multi-phase initialization API isn't public yet, so embedding
+ *   apps still need to be able configure sys.warnoptions and sys._xoptions
+ *   before they call Py_Initialize. To support this, we stash copies of
+ *   the supplied wchar * sequences in linked lists, and then migrate the
+ *   contents of those lists to the sys module in _PyInitializeCore.
+ *
+ */
+
+struct _preinit_entry {
+    wchar_t *value;
+    struct _preinit_entry *next;
+};
+
+typedef struct _preinit_entry *_Py_PreInitEntry;
+
+static _Py_PreInitEntry _preinit_warnoptions = NULL;
+static _Py_PreInitEntry _preinit_xoptions = NULL;
+
+static _Py_PreInitEntry
+_alloc_preinit_entry(const wchar_t *value)
+{
+    /* To get this to work, we have to initialize the runtime implicitly */
+    _PyRuntime_Initialize();
+
+    /* Force default allocator, so we can ensure that it also gets used to
+     * destroy the linked list in _clear_preinit_entries.
+     */
+    PyMemAllocatorEx old_alloc;
+    _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
+
+    _Py_PreInitEntry node = PyMem_RawCalloc(1, sizeof(*node));
+    if (node != NULL) {
+        node->value = _PyMem_RawWcsdup(value);
+        if (node->value == NULL) {
+            PyMem_RawFree(node);
+            node = NULL;
+        };
+    };
+
+    PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
+    return node;
+};
+
+static int
+_append_preinit_entry(_Py_PreInitEntry *optionlist, const wchar_t *value)
+{
+    _Py_PreInitEntry new_entry = _alloc_preinit_entry(value);
+    if (new_entry == NULL) {
+        return -1;
+    }
+    /* We maintain the linked list in this order so it's easy to play back
+     * the add commands in the same order later on in _Py_InitializeCore
+     */
+    _Py_PreInitEntry last_entry = *optionlist;
+    if (last_entry == NULL) {
+        *optionlist = new_entry;
+    } else {
+        while (last_entry->next != NULL) {
+            last_entry = last_entry->next;
+        }
+        last_entry->next = new_entry;
+    }
+    return 0;
+};
+
+static void
+_clear_preinit_entries(_Py_PreInitEntry *optionlist)
+{
+    _Py_PreInitEntry current = *optionlist;
+    *optionlist = NULL;
+    /* Deallocate the nodes and their contents using the default allocator */
+    PyMemAllocatorEx old_alloc;
+    _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
+    while (current != NULL) {
+        _Py_PreInitEntry next = current->next;
+        PyMem_RawFree(current->value);
+        PyMem_RawFree(current);
+        current = next;
+    }
+    PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
+};
+
+static void
+_clear_all_preinit_options(void)
+{
+    _clear_preinit_entries(&_preinit_warnoptions);
+    _clear_preinit_entries(&_preinit_xoptions);
+}
+
+static int
+_PySys_ReadPreInitOptions(void)
+{
+    /* Rerun the add commands with the actual sys module available */
+    PyThreadState *tstate = PyThreadState_GET();
+    if (tstate == NULL) {
+        /* Still don't have a thread state, so something is wrong! */
+        return -1;
+    }
+    _Py_PreInitEntry entry = _preinit_warnoptions;
+    while (entry != NULL) {
+        PySys_AddWarnOption(entry->value);
+        entry = entry->next;
+    }
+    entry = _preinit_xoptions;
+    while (entry != NULL) {
+        PySys_AddXOption(entry->value);
+        entry = entry->next;
+    }
+
+    _clear_all_preinit_options();
+    return 0;
+};
+
 static PyObject *
 get_warnoptions(void)
 {
     PyObject *warnoptions = _PySys_GetObjectId(&PyId_warnoptions);
     if (warnoptions == NULL || !PyList_Check(warnoptions)) {
+        /* PEP432 TODO: we can reach this if warnoptions is NULL in the main
+        *  interpreter config. When that happens, we need to properly set
+         * the `warnoptions` reference in the main interpreter config as well.
+         *
+         * For Python 3.7, we shouldn't be able to get here due to the
+         * combination of how _PyMainInterpreter_ReadConfig and _PySys_EndInit
+         * work, but we expect 3.8+ to make the _PyMainInterpreter_ReadConfig
+         * call optional for embedding applications, thus making this
+         * reachable again.
+         */
         Py_XDECREF(warnoptions);
         warnoptions = PyList_New(0);
         if (warnoptions == NULL)
@@ -1630,6 +1760,12 @@
 void
 PySys_ResetWarnOptions(void)
 {
+    PyThreadState *tstate = PyThreadState_GET();
+    if (tstate == NULL) {
+        _clear_preinit_entries(&_preinit_warnoptions);
+        return;
+    }
+
     PyObject *warnoptions = _PySys_GetObjectId(&PyId_warnoptions);
     if (warnoptions == NULL || !PyList_Check(warnoptions))
         return;
@@ -1658,6 +1794,11 @@
 void
 PySys_AddWarnOption(const wchar_t *s)
 {
+    PyThreadState *tstate = PyThreadState_GET();
+    if (tstate == NULL) {
+        _append_preinit_entry(&_preinit_warnoptions, s);
+        return;
+    }
     PyObject *unicode;
     unicode = PyUnicode_FromWideChar(s, -1);
     if (unicode == NULL)
@@ -1678,6 +1819,16 @@
 {
     PyObject *xoptions = _PySys_GetObjectId(&PyId__xoptions);
     if (xoptions == NULL || !PyDict_Check(xoptions)) {
+        /* PEP432 TODO: we can reach this if xoptions is NULL in the main
+        *  interpreter config. When that happens, we need to properly set
+         * the `xoptions` reference in the main interpreter config as well.
+         *
+         * For Python 3.7, we shouldn't be able to get here due to the
+         * combination of how _PyMainInterpreter_ReadConfig and _PySys_EndInit
+         * work, but we expect 3.8+ to make the _PyMainInterpreter_ReadConfig
+         * call optional for embedding applications, thus making this
+         * reachable again.
+         */
         Py_XDECREF(xoptions);
         xoptions = PyDict_New();
         if (xoptions == NULL)
@@ -1730,6 +1881,11 @@
 void
 PySys_AddXOption(const wchar_t *s)
 {
+    PyThreadState *tstate = PyThreadState_GET();
+    if (tstate == NULL) {
+        _append_preinit_entry(&_preinit_xoptions, s);
+        return;
+    }
     if (_PySys_AddXOptionWithError(s) < 0) {
         /* No return value, therefore clear error state if possible */
         if (_PyThreadState_UncheckedGet()) {
@@ -2257,6 +2413,7 @@
     }
 
     *sysmod = m;
+
     return _Py_INIT_OK();
 
 type_init_failed:
@@ -2333,6 +2490,11 @@
     if (get_xoptions() == NULL)
         return -1;
 
+    /* Transfer any sys.warnoptions and sys._xoptions set directly
+     * by an embedding application from the linked list to the module. */
+    if (_PySys_ReadPreInitOptions() != 0)
+        return -1;
+
     if (PyErr_Occurred())
         return -1;
     return 0;