bpo-36142: Add _PyPreConfig_ReadFromArgv() (GH-12173)

The new function is now responsible to parse -E and -I command line
arguments.
diff --git a/Include/internal/pycore_coreconfig.h b/Include/internal/pycore_coreconfig.h
index 6469fca..5135969 100644
--- a/Include/internal/pycore_coreconfig.h
+++ b/Include/internal/pycore_coreconfig.h
@@ -44,12 +44,13 @@
 PyAPI_FUNC(_PyInitError) _PyPreConfig_Read(_PyPreConfig *config);
 PyAPI_FUNC(int) _PyPreConfig_AsDict(const _PyPreConfig *config,
     PyObject *dict);
-
+PyAPI_FUNC(_PyInitError) _PyPreConfig_ReadFromArgv(_PyPreConfig *config,
+    const _PyArgv *args);
+PyAPI_FUNC(void) _PyPreConfig_Write(const _PyPreConfig *config);
 
 
 /* --- _PyCoreConfig ---------------------------------------------- */
 
-PyAPI_FUNC(_PyInitError) _PyCoreConfig_Read(_PyCoreConfig *config);
 PyAPI_FUNC(void) _PyCoreConfig_Clear(_PyCoreConfig *);
 PyAPI_FUNC(int) _PyCoreConfig_Copy(
     _PyCoreConfig *config,
@@ -67,8 +68,11 @@
     wchar_t **dest,
     wchar_t *wname,
     char *name);
+PyAPI_FUNC(_PyInitError) _PyCoreConfig_Read(_PyCoreConfig *config,
+    const _PyPreConfig *preconfig);
 PyAPI_FUNC(_PyInitError) _PyCoreConfig_ReadFromArgv(_PyCoreConfig *config,
-    const _PyArgv *args);
+    const _PyArgv *args,
+    const _PyPreConfig *preconfig);
 PyAPI_FUNC(void) _PyCoreConfig_Write(const _PyCoreConfig *config);
 
 #ifdef __cplusplus
diff --git a/Include/internal/pycore_getopt.h b/Include/internal/pycore_getopt.h
index e6f4654..1d30f5b 100644
--- a/Include/internal/pycore_getopt.h
+++ b/Include/internal/pycore_getopt.h
@@ -17,7 +17,6 @@
     int val;
 } _PyOS_LongOption;
 
-extern int _PyOS_GetOpt(int argc, wchar_t **argv, wchar_t *optstring,
-                        const _PyOS_LongOption *longopts, int *longindex);
+extern int _PyOS_GetOpt(int argc, wchar_t **argv, int *longindex);
 
 #endif /* !Py_INTERNAL_PYGETOPT_H */
diff --git a/Modules/main.c b/Modules/main.c
index ff2e2f0..34032ad 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -286,20 +286,32 @@
 
 /* --- pymain_init() ---------------------------------------------- */
 
-static void
-config_clear(_PyCoreConfig *config)
+static _PyInitError
+preconfig_read_write(_PyPreConfig *config, const _PyArgv *args)
 {
+    _PyInitError err;
+
     PyMemAllocatorEx old_alloc;
     _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
 
-    _PyCoreConfig_Clear(config);
+    _PyPreConfig_GetGlobalConfig(config);
+
+    err = _PyPreConfig_ReadFromArgv(config, args);
 
     PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
+
+    if (_Py_INIT_FAILED(err)) {
+        return err;
+    }
+
+    _PyPreConfig_Write(config);
+    return _Py_INIT_OK();
 }
 
 
 static _PyInitError
-config_read_write(_PyCoreConfig *config, const _PyArgv *args)
+config_read_write(_PyCoreConfig *config, const _PyArgv *args,
+                  const _PyPreConfig *preconfig)
 {
     _PyInitError err;
 
@@ -308,7 +320,7 @@
 
     _PyCoreConfig_GetGlobalConfig(config);
 
-    err = _PyCoreConfig_ReadFromArgv(config, args);
+    err = _PyCoreConfig_ReadFromArgv(config, args, preconfig);
 
     PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
 
@@ -344,6 +356,7 @@
 pymain_init(const _PyArgv *args, PyInterpreterState **interp_p)
 {
     _PyInitError err;
+    PyMemAllocatorEx old_alloc;
 
     err = _PyRuntime_Initialize();
     if (_Py_INIT_FAILED(err)) {
@@ -359,10 +372,18 @@
     fedisableexcept(FE_OVERFLOW);
 #endif
 
+    _PyPreConfig local_preconfig = _PyPreConfig_INIT;
+    _PyPreConfig *preconfig = &local_preconfig;
+
     _PyCoreConfig local_config = _PyCoreConfig_INIT;
     _PyCoreConfig *config = &local_config;
 
-    err = config_read_write(config, args);
+    err = preconfig_read_write(preconfig, args);
+    if (_Py_INIT_FAILED(err)) {
+        goto done;
+    }
+
+    err = config_read_write(config, args, preconfig);
     if (_Py_INIT_FAILED(err)) {
         goto done;
     }
@@ -382,7 +403,12 @@
     err = _Py_INIT_OK();
 
 done:
-    config_clear(config);
+    _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
+
+    _PyPreConfig_Clear(preconfig);
+    _PyCoreConfig_Clear(config);
+
+    PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
     return err;
 }
 
diff --git a/Python/coreconfig.c b/Python/coreconfig.c
index 3486da4..a6aa89b 100644
--- a/Python/coreconfig.c
+++ b/Python/coreconfig.c
@@ -23,13 +23,6 @@
 
 /* --- Command line options --------------------------------------- */
 
-#define PROGRAM_OPTS L"bBc:dEhiIJm:OqRsStuvVW:xX:?"
-
-static const _PyOS_LongOption longoptions[] = {
-    {L"check-hash-based-pycs", 1, 0},
-    {NULL, 0, 0},
-};
-
 /* Short usage message (with %s for argv0) */
 static const char usage_line[] =
 "usage: %ls [option] ... [-c cmd | -m mod | file | -] [arg] ...\n";
@@ -1483,28 +1476,61 @@
 }
 
 
-/* Read configuration settings from standard locations
- *
- * This function doesn't make any changes to the interpreter state - it
- * merely populates any missing configuration settings. This allows an
- * embedding application to completely override a config option by
- * setting it before calling this function, or else modify the default
- * setting before passing the fully populated config to Py_EndInitialization.
- *
- * More advanced selective initialization tricks are possible by calling
- * this function multiple times with various preconfigured settings.
- */
+static _PyInitError
+_PyCoreConfig_ReadPreConfig(_PyCoreConfig *config)
+{
+    _PyInitError err;
+    _PyPreConfig local_preconfig = _PyPreConfig_INIT;
+    _PyPreConfig_GetGlobalConfig(&local_preconfig);
 
+    if (_PyPreConfig_Copy(&local_preconfig, &config->preconfig) < 0) {
+        err = _Py_INIT_NO_MEMORY();
+        goto done;
+    }
+
+    err = _PyPreConfig_Read(&local_preconfig);
+    if (_Py_INIT_FAILED(err)) {
+        goto done;
+    }
+
+    if (_PyPreConfig_Copy(&config->preconfig, &local_preconfig) < 0) {
+        err = _Py_INIT_NO_MEMORY();
+        goto done;
+    }
+    err = _Py_INIT_OK();
+
+done:
+    _PyPreConfig_Clear(&local_preconfig);
+    return err;
+}
+
+
+/* Read the configuration into _PyCoreConfig and initialize the LC_CTYPE
+   locale: enable UTF-8 mode (PEP 540) and/or coerce the C locale (PEP 538).
+
+   Read the configuration from:
+
+   * Environment variables
+   * Py_xxx global configuration variables
+
+   See _PyCoreConfig_ReadFromArgv() to parse also command line arguments. */
 _PyInitError
-_PyCoreConfig_Read(_PyCoreConfig *config)
+_PyCoreConfig_Read(_PyCoreConfig *config, const _PyPreConfig *preconfig)
 {
     _PyInitError err;
 
     _PyCoreConfig_GetGlobalConfig(config);
 
-    err = _PyPreConfig_Read(&config->preconfig);
-    if (_Py_INIT_FAILED(err)) {
-        return err;
+    if (preconfig != NULL) {
+        if (_PyPreConfig_Copy(&config->preconfig, preconfig) < 0) {
+            return _Py_INIT_NO_MEMORY();
+        }
+    }
+    else {
+        err = _PyCoreConfig_ReadPreConfig(config);
+        if (_Py_INIT_FAILED(err)) {
+            return err;
+        }
     }
 
     assert(config->preconfig.use_environment >= 0);
@@ -1851,8 +1877,7 @@
     _PyOS_ResetGetOpt();
     do {
         int longindex = -1;
-        int c = _PyOS_GetOpt(cmdline->args->argc, cmdline->argv, PROGRAM_OPTS,
-                             longoptions, &longindex);
+        int c = _PyOS_GetOpt(cmdline->args->argc, cmdline->argv, &longindex);
         if (c == EOF) {
             break;
         }
@@ -1915,8 +1940,9 @@
             config->interactive++;
             break;
 
+        case 'E':
         case 'I':
-            config->preconfig.isolated++;
+            /* option handled by _PyPreConfig_ReadFromArgv() */
             break;
 
         /* case 'J': reserved for Jython */
@@ -1937,10 +1963,6 @@
             config->site_import = 0;
             break;
 
-        case 'E':
-            config->preconfig.use_environment = 0;
-            break;
-
         case 't':
             /* ignored for backwards compatibility */
             break;
@@ -2235,7 +2257,8 @@
 
 /* Parse command line options and environment variables. */
 static _PyInitError
-config_from_cmdline(_PyCoreConfig *config, _PyCmdline *cmdline)
+config_from_cmdline(_PyCoreConfig *config, _PyCmdline *cmdline,
+                    const _PyPreConfig *preconfig)
 {
     int need_usage = 0;
     _PyInitError err;
@@ -2271,7 +2294,7 @@
         return err;
     }
 
-    err = _PyCoreConfig_Read(config);
+    err = _PyCoreConfig_Read(config, preconfig);
     if (_Py_INIT_FAILED(err)) {
         return err;
     }
@@ -2296,7 +2319,8 @@
 
 
 static _PyInitError
-config_read_from_argv_impl(_PyCoreConfig *config, const _PyArgv *args)
+config_read_from_argv_impl(_PyCoreConfig *config, const _PyArgv *args,
+                           const _PyPreConfig *preconfig)
 {
     _PyInitError err;
 
@@ -2309,7 +2333,7 @@
         goto done;
     }
 
-    err = config_from_cmdline(config, &cmdline);
+    err = config_from_cmdline(config, &cmdline, preconfig);
     if (_Py_INIT_FAILED(err)) {
         goto done;
     }
@@ -2330,7 +2354,8 @@
    * Environment variables
    * Py_xxx global configuration variables */
 _PyInitError
-_PyCoreConfig_ReadFromArgv(_PyCoreConfig *config, const _PyArgv *args)
+_PyCoreConfig_ReadFromArgv(_PyCoreConfig *config, const _PyArgv *args,
+                           const _PyPreConfig *preconfig)
 {
     _PyInitError err;
     int init_utf8_mode = Py_UTF8Mode;
@@ -2381,7 +2406,7 @@
         Py_LegacyWindowsFSEncodingFlag = config->legacy_windows_fs_encoding;
 #endif
 
-        err = config_read_from_argv_impl(config, args);
+        err = config_read_from_argv_impl(config, args, preconfig);
         if (_Py_INIT_FAILED(err)) {
             goto done;
         }
diff --git a/Python/getopt.c b/Python/getopt.c
index c165a94..1dc8720 100644
--- a/Python/getopt.c
+++ b/Python/getopt.c
@@ -43,6 +43,16 @@
 
 static wchar_t *opt_ptr = L"";
 
+/* Python command line short and long options */
+
+#define SHORT_OPTS L"bBc:dEhiIJm:OqRsStuvVW:xX:?"
+
+static const _PyOS_LongOption longopts[] = {
+    {L"check-hash-based-pycs", 1, 0},
+    {NULL, 0, 0},
+};
+
+
 void _PyOS_ResetGetOpt(void)
 {
     _PyOS_opterr = 1;
@@ -51,8 +61,7 @@
     opt_ptr = L"";
 }
 
-int _PyOS_GetOpt(int argc, wchar_t **argv, wchar_t *optstring,
-                 const _PyOS_LongOption *longopts, int *longindex)
+int _PyOS_GetOpt(int argc, wchar_t **argv, int *longindex)
 {
     wchar_t *ptr;
     wchar_t option;
@@ -128,7 +137,7 @@
         return '_';
     }
 
-    if ((ptr = wcschr(optstring, option)) == NULL) {
+    if ((ptr = wcschr(SHORT_OPTS, option)) == NULL) {
         if (_PyOS_opterr)
             fprintf(stderr, "Unknown option: -%c\n", (char)option);
         return '_';
diff --git a/Python/pathconfig.c b/Python/pathconfig.c
index 41fc9e2..14dbba7 100644
--- a/Python/pathconfig.c
+++ b/Python/pathconfig.c
@@ -393,7 +393,7 @@
     _PyInitError err;
     _PyCoreConfig config = _PyCoreConfig_INIT;
 
-    err = _PyCoreConfig_Read(&config);
+    err = _PyCoreConfig_Read(&config, NULL);
     if (_Py_INIT_FAILED(err)) {
         goto error;
     }
diff --git a/Python/preconfig.c b/Python/preconfig.c
index bb1e830..af70f38 100644
--- a/Python/preconfig.c
+++ b/Python/preconfig.c
@@ -1,5 +1,6 @@
 #include "Python.h"
 #include "pycore_coreconfig.h"
+#include "pycore_getopt.h"
 
 
 #define DECODE_LOCALE_ERR(NAME, LEN) \
@@ -92,6 +93,25 @@
 }
 
 
+/* --- _PyPreCmdline ------------------------------------------------- */
+
+typedef struct {
+    const _PyArgv *args;
+    int argc;
+    wchar_t **argv;
+} _PyPreCmdline;
+
+
+static void
+precmdline_clear(_PyPreCmdline *cmdline)
+{
+    if (cmdline->args->use_bytes_argv && cmdline->argv != NULL) {
+        _Py_wstrlist_clear(cmdline->args->argc, cmdline->argv);
+    }
+    cmdline->argv = NULL;
+}
+
+
 /* --- _PyPreConfig ----------------------------------------------- */
 
 void
@@ -169,6 +189,7 @@
         config->use_environment = 0;
     }
 
+    assert(config->isolated >= 0);
     assert(config->use_environment >= 0);
 
     return _Py_INIT_OK();
@@ -203,3 +224,76 @@
 #undef SET_ITEM
 #undef SET_ITEM_INT
 }
+
+
+/* Parse the command line arguments */
+static _PyInitError
+preconfig_parse_cmdline(_PyPreConfig *config, _PyPreCmdline *cmdline)
+{
+    _PyOS_ResetGetOpt();
+    /* Don't log parsing errors into stderr here: _PyCoreConfig_ReadFromArgv()
+       is responsible for that */
+    _PyOS_opterr = 0;
+    do {
+        int longindex = -1;
+        int c = _PyOS_GetOpt(cmdline->args->argc, cmdline->argv, &longindex);
+
+        if (c == EOF || c == 'c' || c == 'm') {
+            break;
+        }
+
+        switch (c) {
+        case 'E':
+            config->use_environment = 0;
+            break;
+
+        case 'I':
+            config->isolated++;
+            break;
+
+        default:
+            /* ignore other argument:
+               handled by _PyCoreConfig_ReadFromArgv() */
+            break;
+        }
+    } while (1);
+
+    return _Py_INIT_OK();
+}
+
+
+_PyInitError
+_PyPreConfig_ReadFromArgv(_PyPreConfig *config, const _PyArgv *args)
+{
+    _PyInitError err;
+
+    _PyPreCmdline cmdline;
+    memset(&cmdline, 0, sizeof(cmdline));
+    cmdline.args = args;
+
+    err = _PyArgv_Decode(cmdline.args, &cmdline.argv);
+    if (_Py_INIT_FAILED(err)) {
+        goto done;
+    }
+
+    err = preconfig_parse_cmdline(config, &cmdline);
+    if (_Py_INIT_FAILED(err)) {
+        goto done;
+    }
+
+    err = _PyPreConfig_Read(config);
+    if (_Py_INIT_FAILED(err)) {
+        goto done;
+    }
+    err = _Py_INIT_OK();
+
+done:
+    precmdline_clear(&cmdline);
+    return err;
+}
+
+
+void
+_PyPreConfig_Write(const _PyPreConfig *config)
+{
+}
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index a5cfc07..7cf4a6d 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -763,7 +763,7 @@
 
     _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
     if (_PyCoreConfig_Copy(&config, src_config) >= 0) {
-        err = _PyCoreConfig_Read(&config);
+        err = _PyCoreConfig_Read(&config, NULL);
     }
     else {
         err = _Py_INIT_ERR("failed to copy core config");