bpo-33042: Fix pre-initialization sys module configuration (GH-6157)


- new test case for pre-initialization of sys.warnoptions and sys._xoptions
- restored ability to call these APIs prior to Py_Initialize
- updated the docs for the affected APIs to make it clear they can be
  called before Py_Initialize
- also enhanced the existing embedding test cases
  to check for expected settings in the sys module
(cherry picked from commit bc77eff8b96be4f035e665ab35c1d06e22f46491)

Co-authored-by: Nick Coghlan <ncoghlan@gmail.com>
diff --git a/Programs/_testembed.c b/Programs/_testembed.c
index b3d7aa4..09b7bf6 100644
--- a/Programs/_testembed.c
+++ b/Programs/_testembed.c
@@ -2,6 +2,7 @@
 #include "pythread.h"
 #include <inttypes.h>
 #include <stdio.h>
+#include <wchar.h>
 
 /*********************************************************
  * Embedded interpreter tests that need a custom exe
@@ -130,23 +131,89 @@
  * Test parts of the C-API that work before initialization
  *********************************************************/
 
+/* The pre-initialization tests tend to break by segfaulting, so explicitly
+ * flushed progress messages make the broken API easier to find when they fail.
+ */
+#define _Py_EMBED_PREINIT_CHECK(msg) \
+    do {printf(msg); fflush(stdout);} while (0);
+
 static int test_pre_initialization_api(void)
 {
     /* Leading "./" ensures getpath.c can still find the standard library */
+    _Py_EMBED_PREINIT_CHECK("Checking Py_DecodeLocale\n");
     wchar_t *program = Py_DecodeLocale("./spam", NULL);
     if (program == NULL) {
         fprintf(stderr, "Fatal error: cannot decode program name\n");
         return 1;
     }
+    _Py_EMBED_PREINIT_CHECK("Checking Py_SetProgramName\n");
     Py_SetProgramName(program);
 
+    _Py_EMBED_PREINIT_CHECK("Initializing interpreter\n");
     Py_Initialize();
+    _Py_EMBED_PREINIT_CHECK("Check sys module contents\n");
+    PyRun_SimpleString("import sys; "
+                       "print('sys.executable:', sys.executable)");
+    _Py_EMBED_PREINIT_CHECK("Finalizing interpreter\n");
     Py_Finalize();
 
+    _Py_EMBED_PREINIT_CHECK("Freeing memory allocated by Py_DecodeLocale\n");
     PyMem_RawFree(program);
     return 0;
 }
 
+
+/* bpo-33042: Ensure embedding apps can predefine sys module options */
+static int test_pre_initialization_sys_options(void)
+{
+    /* We allocate a couple of the option dynamically, and then delete
+     * them before calling Py_Initialize. This ensures the interpreter isn't
+     * relying on the caller to keep the passed in strings alive.
+     */
+    wchar_t *static_warnoption = L"once";
+    wchar_t *static_xoption = L"also_not_an_option=2";
+    size_t warnoption_len = wcslen(static_warnoption);
+    size_t xoption_len = wcslen(static_xoption);
+    wchar_t *dynamic_once_warnoption = calloc(warnoption_len+1, sizeof(wchar_t));
+    wchar_t *dynamic_xoption = calloc(xoption_len+1, sizeof(wchar_t));
+    wcsncpy(dynamic_once_warnoption, static_warnoption, warnoption_len+1);
+    wcsncpy(dynamic_xoption, static_xoption, xoption_len+1);
+
+    _Py_EMBED_PREINIT_CHECK("Checking PySys_AddWarnOption\n");
+    PySys_AddWarnOption(L"default");
+    _Py_EMBED_PREINIT_CHECK("Checking PySys_ResetWarnOptions\n");
+    PySys_ResetWarnOptions();
+    _Py_EMBED_PREINIT_CHECK("Checking PySys_AddWarnOption linked list\n");
+    PySys_AddWarnOption(dynamic_once_warnoption);
+    PySys_AddWarnOption(L"module");
+    PySys_AddWarnOption(L"default");
+    _Py_EMBED_PREINIT_CHECK("Checking PySys_AddXOption\n");
+    PySys_AddXOption(L"not_an_option=1");
+    PySys_AddXOption(dynamic_xoption);
+
+    /* Delete the dynamic options early */
+    free(dynamic_once_warnoption);
+    dynamic_once_warnoption = NULL;
+    free(dynamic_xoption);
+    dynamic_xoption = NULL;
+
+    _Py_EMBED_PREINIT_CHECK("Initializing interpreter\n");
+    _testembed_Py_Initialize();
+    _Py_EMBED_PREINIT_CHECK("Check sys module contents\n");
+    PyRun_SimpleString("import sys; "
+                       "print('sys.warnoptions:', sys.warnoptions); "
+                       "print('sys._xoptions:', sys._xoptions); "
+                       "warnings = sys.modules['warnings']; "
+                       "latest_filters = [f[0] for f in warnings.filters[:3]]; "
+                       "print('warnings.filters[:3]:', latest_filters)");
+    _Py_EMBED_PREINIT_CHECK("Finalizing interpreter\n");
+    Py_Finalize();
+
+    return 0;
+}
+
+
+/* bpo-20891: Avoid race condition when initialising the GIL */
 static void bpo20891_thread(void *lockp)
 {
     PyThread_type_lock lock = *((PyThread_type_lock*)lockp);
@@ -217,6 +284,7 @@
     { "forced_io_encoding", test_forced_io_encoding },
     { "repeated_init_and_subinterpreters", test_repeated_init_and_subinterpreters },
     { "pre_initialization_api", test_pre_initialization_api },
+    { "pre_initialization_sys_options", test_pre_initialization_sys_options },
     { "bpo20891", test_bpo20891 },
     { NULL, NULL }
 };
@@ -232,13 +300,13 @@
 
     /* No match found, or no test name provided, so display usage */
     printf("Python " PY_VERSION " _testembed executable for embedded interpreter tests\n"
-           "Normally executed via 'EmbeddingTests' in Lib/test/test_capi.py\n\n"
+           "Normally executed via 'EmbeddingTests' in Lib/test/test_embed.py\n\n"
            "Usage: %s TESTNAME\n\nAll available tests:\n", argv[0]);
     for (struct TestCase *tc = TestCases; tc && tc->name; tc++) {
         printf("  %s\n", tc->name);
     }
 
-    /* Non-zero exit code will cause test_capi.py tests to fail.
+    /* Non-zero exit code will cause test_embed.py tests to fail.
        This is intentional. */
     return -1;
 }