bpo-32030: Add _PyPathConfig_ComputeArgv0() (#4845)

Changes:

* Split _PySys_SetArgvWithError() into subfunctions for Py_Main():

  * Create the Python list object
  * Set sys.argv to the list
  * Compute argv0
  * Prepend argv0 to sys.path

* Add _PyPathConfig_ComputeArgv0()
* Remove _PySys_SetArgvWithError()
* Py_Main() now splits the code to compute sys.argv/path0 and the
  code to update the sys module: add pymain_compute_argv()
  subfunction.
diff --git a/Include/pylifecycle.h b/Include/pylifecycle.h
index 5423060..17ed110 100644
--- a/Include/pylifecycle.h
+++ b/Include/pylifecycle.h
@@ -105,6 +105,7 @@
 #ifdef Py_BUILD_CORE
 PyAPI_FUNC(_PyInitError) _PyPathConfig_Init(
     const _PyMainInterpreterConfig *main_config);
+PyAPI_FUNC(PyObject*) _PyPathConfig_ComputeArgv0(int argc, wchar_t **argv);
 #endif
 PyAPI_FUNC(void)      Py_SetPath(const wchar_t *);
 #ifdef MS_WINDOWS
diff --git a/Include/sysmodule.h b/Include/sysmodule.h
index b709629..719ecfc 100644
--- a/Include/sysmodule.h
+++ b/Include/sysmodule.h
@@ -16,12 +16,6 @@
 
 PyAPI_FUNC(void) PySys_SetArgv(int, wchar_t **);
 PyAPI_FUNC(void) PySys_SetArgvEx(int, wchar_t **, int);
-#ifdef Py_BUILD_CORE
-PyAPI_FUNC(_PyInitError) _PySys_SetArgvWithError(
-    int argc,
-    wchar_t **argv,
-    int updatepath);
-#endif
 PyAPI_FUNC(void) PySys_SetPath(const wchar_t *);
 
 PyAPI_FUNC(void) PySys_WriteStdout(const char *format, ...)
diff --git a/Modules/main.c b/Modules/main.c
index c8848a6..7c71775 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -410,6 +410,13 @@
 #endif
 } _Py_CommandLineDetails;
 
+/* FIXME: temporary structure until sys module configuration can be moved
+   into _PyMainInterpreterConfig */
+typedef struct {
+    PyObject *argv;     /* sys.argv list */
+    PyObject *path0;    /* path0: if set, it is prepended to sys.path */
+} _PySysConfig;
+
 /* Structure used by Py_Main() to pass data to subfunctions */
 typedef struct {
     /* Exit status ("exit code") */
@@ -419,6 +426,7 @@
     int stdin_is_interactive;
     _PyCoreConfig core_config;
     _PyMainInterpreterConfig config;
+    _PySysConfig sys_config;
     _Py_CommandLineDetails cmdline;
     PyObject *main_importer_path;
     /* non-zero if filename, command (-c) or module (-m) is set
@@ -492,6 +500,8 @@
     pymain_optlist_clear(&pymain->env_warning_options);
     Py_CLEAR(pymain->main_importer_path);
 
+    Py_CLEAR(pymain->sys_config.argv);
+    Py_CLEAR(pymain->sys_config.path0);
 }
 
 static void
@@ -510,26 +520,20 @@
 static int
 pymain_run_main_from_importer(_PyMain *pymain)
 {
-    PyObject *sys_path0 = pymain->main_importer_path;
-    PyObject *sys_path;
-    int sts;
-
     /* Assume sys_path0 has already been checked by pymain_get_importer(),
      * so put it in sys.path[0] and import __main__ */
-    sys_path = PySys_GetObject("path");
+    PyObject *sys_path = PySys_GetObject("path");
     if (sys_path == NULL) {
         PyErr_SetString(PyExc_RuntimeError, "unable to get sys.path");
         goto error;
     }
 
-    sts = PyList_Insert(sys_path, 0, sys_path0);
-    if (sts) {
-        sys_path0 = NULL;
+    if (PyList_Insert(sys_path, 0, pymain->main_importer_path)) {
         goto error;
     }
 
-    sts = pymain_run_module(L"__main__", 0);
-    return sts != 0;
+    int sts = pymain_run_module(L"__main__", 0);
+    return (sts != 0);
 
 error:
     Py_CLEAR(pymain->main_importer_path);
@@ -1082,8 +1086,36 @@
 }
 
 
+static PyObject *
+pymain_create_argv_list(int argc, wchar_t **argv)
+{
+    if (argc <= 0 || argv == NULL) {
+        /* Ensure at least one (empty) argument is seen */
+        static wchar_t *empty_argv[1] = {L""};
+        argv = empty_argv;
+        argc = 1;
+    }
+
+    PyObject *av = PyList_New(argc);
+    if (av == NULL) {
+        return NULL;
+    }
+
+    for (int i = 0; i < argc; i++) {
+        PyObject *v = PyUnicode_FromWideChar(argv[i], -1);
+        if (v == NULL) {
+            Py_DECREF(av);
+            return NULL;
+        }
+        PyList_SET_ITEM(av, i, v);
+    }
+    return av;
+}
+
+
+/* Create sys.argv list and maybe also path0 */
 static int
-pymain_set_sys_argv(_PyMain *pymain)
+pymain_compute_argv(_PyMain *pymain)
 {
     _Py_CommandLineDetails *cmdline = &pymain->cmdline;
 
@@ -1112,6 +1144,14 @@
         argv2[0] = L"-m";
     }
 
+    /* Create sys.argv list */
+    pymain->sys_config.argv = pymain_create_argv_list(argc2, argv2);
+    if (pymain->sys_config.argv == NULL) {
+        pymain->err = _Py_INIT_ERR("failed to create sys.argv");
+        goto error;
+    }
+
+    /* Need to update sys.path[0]? */
     int update_path;
     if (pymain->main_importer_path != NULL) {
         /* Let pymain_run_main_from_importer() adjust sys.path[0] later */
@@ -1121,16 +1161,48 @@
         update_path = (Py_IsolatedFlag == 0);
     }
 
-    /* Set sys.argv. If '-c' and '-m' options are not used in the command line
+    /* If '-c' and '-m' options are not used in the command line
        and update_path is non-zero, prepend argv[0] to sys.path. If argv[0] is
        a symlink, use the real path. */
-    _PyInitError err = _PySys_SetArgvWithError(argc2, argv2, update_path);
-    if (_Py_INIT_FAILED(err)) {
-        pymain->err = err;
+    if (update_path) {
+        pymain->sys_config.path0 = _PyPathConfig_ComputeArgv0(argc2, argv2);
+        if (pymain->sys_config.path0 == NULL) {
+            pymain->err = _Py_INIT_NO_MEMORY();
+            goto error;
+        }
+    }
+    PyMem_RawFree(argv2);
+    return 0;
+
+error:
+    return -1;
+}
+
+static int
+pymain_set_sys_argv(_PyMain *pymain)
+{
+    /* Set sys.argv */
+    if (PySys_SetObject("argv", pymain->sys_config.argv) != 0) {
+        pymain->err = _Py_INIT_ERR("can't assign sys.argv");
         return -1;
     }
+    Py_CLEAR(pymain->sys_config.argv);
 
-    PyMem_RawFree(argv2);
+    if (pymain->sys_config.path0 != NULL) {
+        /* Prepend path0 to sys.path */
+        PyObject *sys_path = PySys_GetObject("path");
+        if (sys_path == NULL) {
+            pymain->err = _Py_INIT_ERR("can't get sys.path");
+            return -1;
+        }
+
+        if (PyList_Insert(sys_path, 0, pymain->sys_config.path0) < 0) {
+            pymain->err = _Py_INIT_ERR("sys.path.insert(0, path0) failed");
+            return -1;
+        }
+        Py_CLEAR(pymain->sys_config.path0);
+    }
+
     return 0;
 }
 
@@ -1822,6 +1894,9 @@
        Currently, PySys_SetArgvEx() can still modify sys.path and so must be
        called after _Py_InitializeMainInterpreter() which calls
        _PyPathConfig_Init(). */
+    if (pymain_compute_argv(pymain) < 0) {
+        return -1;
+    }
     if (pymain_set_sys_argv(pymain) < 0) {
         return -1;
     }
diff --git a/Python/pathconfig.c b/Python/pathconfig.c
index 53ddfc9..b17ae82 100644
--- a/Python/pathconfig.c
+++ b/Python/pathconfig.c
@@ -261,6 +261,104 @@
 }
 
 
+#define _HAVE_SCRIPT_ARGUMENT(argc, argv) \
+  (argc > 0 && argv0 != NULL && \
+   wcscmp(argv0, L"-c") != 0 && wcscmp(argv0, L"-m") != 0)
+
+/* Compute argv[0] which will be prepended to sys.argv */
+PyObject*
+_PyPathConfig_ComputeArgv0(int argc, wchar_t **argv)
+{
+    wchar_t *argv0;
+    wchar_t *p = NULL;
+    Py_ssize_t n = 0;
+#ifdef HAVE_READLINK
+    wchar_t link[MAXPATHLEN+1];
+    wchar_t argv0copy[2*MAXPATHLEN+1];
+    int nr = 0;
+#endif
+#if defined(HAVE_REALPATH)
+    wchar_t fullpath[MAXPATHLEN];
+#elif defined(MS_WINDOWS)
+    wchar_t fullpath[MAX_PATH];
+#endif
+
+
+    argv0 = argv[0];
+
+#ifdef HAVE_READLINK
+    if (_HAVE_SCRIPT_ARGUMENT(argc, argv))
+        nr = _Py_wreadlink(argv0, link, MAXPATHLEN);
+    if (nr > 0) {
+        /* It's a symlink */
+        link[nr] = '\0';
+        if (link[0] == SEP)
+            argv0 = link; /* Link to absolute path */
+        else if (wcschr(link, SEP) == NULL)
+            ; /* Link without path */
+        else {
+            /* Must join(dirname(argv0), link) */
+            wchar_t *q = wcsrchr(argv0, SEP);
+            if (q == NULL)
+                argv0 = link; /* argv0 without path */
+            else {
+                /* Must make a copy, argv0copy has room for 2 * MAXPATHLEN */
+                wcsncpy(argv0copy, argv0, MAXPATHLEN);
+                q = wcsrchr(argv0copy, SEP);
+                wcsncpy(q+1, link, MAXPATHLEN);
+                q[MAXPATHLEN + 1] = L'\0';
+                argv0 = argv0copy;
+            }
+        }
+    }
+#endif /* HAVE_READLINK */
+
+#if SEP == '\\'
+    /* Special case for Microsoft filename syntax */
+    if (_HAVE_SCRIPT_ARGUMENT(argc, argv)) {
+        wchar_t *q;
+#if defined(MS_WINDOWS)
+        /* Replace the first element in argv with the full path. */
+        wchar_t *ptemp;
+        if (GetFullPathNameW(argv0,
+                           Py_ARRAY_LENGTH(fullpath),
+                           fullpath,
+                           &ptemp)) {
+            argv0 = fullpath;
+        }
+#endif
+        p = wcsrchr(argv0, SEP);
+        /* Test for alternate separator */
+        q = wcsrchr(p ? p : argv0, '/');
+        if (q != NULL)
+            p = q;
+        if (p != NULL) {
+            n = p + 1 - argv0;
+            if (n > 1 && p[-1] != ':')
+                n--; /* Drop trailing separator */
+        }
+    }
+#else /* All other filename syntaxes */
+    if (_HAVE_SCRIPT_ARGUMENT(argc, argv)) {
+#if defined(HAVE_REALPATH)
+        if (_Py_wrealpath(argv0, fullpath, Py_ARRAY_LENGTH(fullpath))) {
+            argv0 = fullpath;
+        }
+#endif
+        p = wcsrchr(argv0, SEP);
+    }
+    if (p != NULL) {
+        n = p + 1 - argv0;
+#if SEP == '/' /* Special case for Unix filename syntax */
+        if (n > 1)
+            n--; /* Drop trailing separator */
+#endif /* Unix */
+    }
+#endif /* All others */
+
+    return PyUnicode_FromWideChar(argv0, n);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index ea2ccb2..b33a316 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -2336,143 +2336,41 @@
                 av = NULL;
                 break;
             }
-            PyList_SetItem(av, i, v);
+            PyList_SET_ITEM(av, i, v);
         }
     }
     return av;
 }
 
-#define _HAVE_SCRIPT_ARGUMENT(argc, argv) \
-  (argc > 0 && argv0 != NULL && \
-   wcscmp(argv0, L"-c") != 0 && wcscmp(argv0, L"-m") != 0)
-
-static void
-sys_update_path(int argc, wchar_t **argv)
-{
-    wchar_t *argv0;
-    wchar_t *p = NULL;
-    Py_ssize_t n = 0;
-    PyObject *a;
-    PyObject *path;
-#ifdef HAVE_READLINK
-    wchar_t link[MAXPATHLEN+1];
-    wchar_t argv0copy[2*MAXPATHLEN+1];
-    int nr = 0;
-#endif
-#if defined(HAVE_REALPATH)
-    wchar_t fullpath[MAXPATHLEN];
-#elif defined(MS_WINDOWS)
-    wchar_t fullpath[MAX_PATH];
-#endif
-
-    path = _PySys_GetObjectId(&PyId_path);
-    if (path == NULL)
-        return;
-
-    argv0 = argv[0];
-
-#ifdef HAVE_READLINK
-    if (_HAVE_SCRIPT_ARGUMENT(argc, argv))
-        nr = _Py_wreadlink(argv0, link, MAXPATHLEN);
-    if (nr > 0) {
-        /* It's a symlink */
-        link[nr] = '\0';
-        if (link[0] == SEP)
-            argv0 = link; /* Link to absolute path */
-        else if (wcschr(link, SEP) == NULL)
-            ; /* Link without path */
-        else {
-            /* Must join(dirname(argv0), link) */
-            wchar_t *q = wcsrchr(argv0, SEP);
-            if (q == NULL)
-                argv0 = link; /* argv0 without path */
-            else {
-                /* Must make a copy, argv0copy has room for 2 * MAXPATHLEN */
-                wcsncpy(argv0copy, argv0, MAXPATHLEN);
-                q = wcsrchr(argv0copy, SEP);
-                wcsncpy(q+1, link, MAXPATHLEN);
-                q[MAXPATHLEN + 1] = L'\0';
-                argv0 = argv0copy;
-            }
-        }
-    }
-#endif /* HAVE_READLINK */
-#if SEP == '\\' /* Special case for MS filename syntax */
-    if (_HAVE_SCRIPT_ARGUMENT(argc, argv)) {
-        wchar_t *q;
-#if defined(MS_WINDOWS)
-        /* Replace the first element in argv with the full path. */
-        wchar_t *ptemp;
-        if (GetFullPathNameW(argv0,
-                           Py_ARRAY_LENGTH(fullpath),
-                           fullpath,
-                           &ptemp)) {
-            argv0 = fullpath;
-        }
-#endif
-        p = wcsrchr(argv0, SEP);
-        /* Test for alternate separator */
-        q = wcsrchr(p ? p : argv0, '/');
-        if (q != NULL)
-            p = q;
-        if (p != NULL) {
-            n = p + 1 - argv0;
-            if (n > 1 && p[-1] != ':')
-                n--; /* Drop trailing separator */
-        }
-    }
-#else /* All other filename syntaxes */
-    if (_HAVE_SCRIPT_ARGUMENT(argc, argv)) {
-#if defined(HAVE_REALPATH)
-        if (_Py_wrealpath(argv0, fullpath, Py_ARRAY_LENGTH(fullpath))) {
-            argv0 = fullpath;
-        }
-#endif
-        p = wcsrchr(argv0, SEP);
-    }
-    if (p != NULL) {
-        n = p + 1 - argv0;
-#if SEP == '/' /* Special case for Unix filename syntax */
-        if (n > 1)
-            n--; /* Drop trailing separator */
-#endif /* Unix */
-    }
-#endif /* All others */
-    a = PyUnicode_FromWideChar(argv0, n);
-    if (a == NULL)
-        Py_FatalError("no mem for sys.path insertion");
-    if (PyList_Insert(path, 0, a) < 0)
-        Py_FatalError("sys.path.insert(0) failed");
-    Py_DECREF(a);
-}
-
-_PyInitError
-_PySys_SetArgvWithError(int argc, wchar_t **argv, int updatepath)
+void
+PySys_SetArgvEx(int argc, wchar_t **argv, int updatepath)
 {
     PyObject *av = makeargvobject(argc, argv);
     if (av == NULL) {
-        return _Py_INIT_NO_MEMORY();
+        Py_FatalError("no mem for sys.argv");
     }
     if (PySys_SetObject("argv", av) != 0) {
         Py_DECREF(av);
-        return _Py_INIT_ERR("can't assign sys.argv");
+        Py_FatalError("can't assign sys.argv");
     }
     Py_DECREF(av);
 
     if (updatepath) {
         /* If argv[0] is not '-c' nor '-m', prepend argv[0] to sys.path.
            If argv[0] is a symlink, use the real path. */
-        sys_update_path(argc, argv);
-    }
-    return _Py_INIT_OK();
-}
+        PyObject *argv0 = _PyPathConfig_ComputeArgv0(argc, argv);
+        if (argv0 == NULL) {
+            Py_FatalError("can't compute path0 from argv");
+        }
 
-void
-PySys_SetArgvEx(int argc, wchar_t **argv, int updatepath)
-{
-    _PyInitError err = _PySys_SetArgvWithError(argc, argv, updatepath);
-    if (_Py_INIT_FAILED(err)) {
-        _Py_FatalInitError(err);
+        PyObject *sys_path = _PySys_GetObjectId(&PyId_path);
+        if (sys_path != NULL) {
+            if (PyList_Insert(sys_path, 0, argv0) < 0) {
+                Py_DECREF(argv0);
+                Py_FatalError("can't prepend path0 to sys.path");
+            }
+        }
+        Py_DECREF(argv0);
     }
 }