bpo-29240, bpo-32030: Py_Main() re-reads config if encoding changes (#4899)

bpo-29240, bpo-32030: If the encoding change (C locale coerced or
UTF-8 Mode changed), Py_Main() now reads again the configuration with
the new encoding.

Changes:

* Add _Py_UnixMain() called by main().
* Rename pymain_free_pymain() to pymain_clear_pymain(), it can now be
  called multipled times.
* Rename pymain_parse_cmdline_envvars() to pymain_read_conf().
* Py_Main() now clears orig_argc and orig_argv at exit.
* Remove argv_copy2, Py_Main() doesn't modify argv anymore. There is
  no need anymore to get two copies of the wchar_t** argv.
* _PyCoreConfig: add coerce_c_locale and coerce_c_locale_warn.
* Py_UTF8Mode is now initialized to -1.
* Locale coercion (PEP 538) now respects -I and -E options.
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index 604493d..e702f7c 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -29,9 +29,10 @@
 int Py_HasFileSystemDefaultEncoding = 0;
 #endif
 const char *Py_FileSystemDefaultEncodeErrors = "surrogateescape";
-/* UTF-8 mode (PEP 540): if non-zero, use the UTF-8 encoding, and change stdin
-   and stdout error handler to "surrogateescape". */
-int Py_UTF8Mode = 0;
+/* UTF-8 mode (PEP 540): if equals to 1, use the UTF-8 encoding, and change
+   stdin and stdout error handler to "surrogateescape". It is equal to
+   -1 by default: unknown, will be set by Py_Main() */
+int Py_UTF8Mode = -1;
 
 _Py_IDENTIFIER(__builtins__);
 _Py_IDENTIFIER(__dict__);
diff --git a/Python/fileutils.c b/Python/fileutils.c
index 4b69049..c4d495d 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -393,7 +393,7 @@
 #if defined(__APPLE__) || defined(__ANDROID__)
     return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
 #else
-    if (Py_UTF8Mode) {
+    if (Py_UTF8Mode == 1) {
         return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
     }
 
@@ -539,7 +539,7 @@
 #if defined(__APPLE__) || defined(__ANDROID__)
     return _Py_EncodeLocaleUTF8(text, error_pos);
 #else   /* __APPLE__ */
-    if (Py_UTF8Mode) {
+    if (Py_UTF8Mode == 1) {
         return _Py_EncodeLocaleUTF8(text, error_pos);
     }
 
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 8c62607..6500995 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -385,18 +385,10 @@
     "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
     "locales is recommended.\n";
 
-static int
-_legacy_locale_warnings_enabled(void)
-{
-    const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
-    return (coerce_c_locale != NULL &&
-            strncmp(coerce_c_locale, "warn", 5) == 0);
-}
-
 static void
-_emit_stderr_warning_for_legacy_locale(void)
+_emit_stderr_warning_for_legacy_locale(const _PyCoreConfig *core_config)
 {
-    if (_legacy_locale_warnings_enabled()) {
+    if (core_config->coerce_c_locale_warn) {
         if (_Py_LegacyLocaleDetected()) {
             fprintf(stderr, "%s", _C_LOCALE_WARNING);
         }
@@ -440,12 +432,12 @@
 }
 
 #ifdef PY_COERCE_C_LOCALE
-static const char _C_LOCALE_COERCION_WARNING[] =
+static const char C_LOCALE_COERCION_WARNING[] =
     "Python detected LC_CTYPE=C: LC_CTYPE coerced to %.20s (set another locale "
     "or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior).\n";
 
 static void
-_coerce_default_locale_settings(const _LocaleCoercionTarget *target)
+_coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoercionTarget *target)
 {
     const char *newloc = target->locale_name;
 
@@ -458,8 +450,8 @@
                 "Error setting LC_CTYPE, skipping C locale coercion\n");
         return;
     }
-    if (_legacy_locale_warnings_enabled()) {
-        fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc);
+    if (config->coerce_c_locale_warn) {
+        fprintf(stderr, C_LOCALE_COERCION_WARNING, newloc);
     }
 
     /* Reconfigure with the overridden environment variables */
@@ -468,47 +460,31 @@
 #endif
 
 void
-_Py_CoerceLegacyLocale(void)
+_Py_CoerceLegacyLocale(const _PyCoreConfig *config)
 {
 #ifdef PY_COERCE_C_LOCALE
-    /* We ignore the Python -E and -I flags here, as the CLI needs to sort out
-     * the locale settings *before* we try to do anything with the command
-     * line arguments. For cross-platform debugging purposes, we also need
-     * to give end users a way to force even scripts that are otherwise
-     * isolated from their environment to use the legacy ASCII-centric C
-     * locale.
-     *
-     * Ignoring -E and -I is safe from a security perspective, as we only use
-     * the setting to turn *off* the implicit locale coercion, and anyone with
-     * access to the process environment already has the ability to set
-     * `LC_ALL=C` to override the C level locale settings anyway.
-     */
-    const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
-    if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) {
-        /* PYTHONCOERCECLOCALE is not set, or is set to something other than "0" */
-        const char *locale_override = getenv("LC_ALL");
-        if (locale_override == NULL || *locale_override == '\0') {
-            /* LC_ALL is also not set (or is set to an empty string) */
-            const _LocaleCoercionTarget *target = NULL;
-            for (target = _TARGET_LOCALES; target->locale_name; target++) {
-                const char *new_locale = setlocale(LC_CTYPE,
-                                                   target->locale_name);
-                if (new_locale != NULL) {
+    const char *locale_override = getenv("LC_ALL");
+    if (locale_override == NULL || *locale_override == '\0') {
+        /* LC_ALL is also not set (or is set to an empty string) */
+        const _LocaleCoercionTarget *target = NULL;
+        for (target = _TARGET_LOCALES; target->locale_name; target++) {
+            const char *new_locale = setlocale(LC_CTYPE,
+                                               target->locale_name);
+            if (new_locale != NULL) {
 #if !defined(__APPLE__) && !defined(__ANDROID__) && \
-    defined(HAVE_LANGINFO_H) && defined(CODESET)
-                    /* Also ensure that nl_langinfo works in this locale */
-                    char *codeset = nl_langinfo(CODESET);
-                    if (!codeset || *codeset == '\0') {
-                        /* CODESET is not set or empty, so skip coercion */
-                        new_locale = NULL;
-                        _Py_SetLocaleFromEnv(LC_CTYPE);
-                        continue;
-                    }
-#endif
-                    /* Successfully configured locale, so make it the default */
-                    _coerce_default_locale_settings(target);
-                    return;
+defined(HAVE_LANGINFO_H) && defined(CODESET)
+                /* Also ensure that nl_langinfo works in this locale */
+                char *codeset = nl_langinfo(CODESET);
+                if (!codeset || *codeset == '\0') {
+                    /* CODESET is not set or empty, so skip coercion */
+                    new_locale = NULL;
+                    _Py_SetLocaleFromEnv(LC_CTYPE);
+                    continue;
                 }
+#endif
+                /* Successfully configured locale, so make it the default */
+                _coerce_default_locale_settings(config, target);
+                return;
             }
         }
     }
@@ -648,7 +624,7 @@
        the locale's charset without having to switch
        locales. */
     _Py_SetLocaleFromEnv(LC_CTYPE);
-    _emit_stderr_warning_for_legacy_locale();
+    _emit_stderr_warning_for_legacy_locale(core_config);
 #endif
 
     err = _Py_HashRandomization_Init(core_config);