blob: 3befecfaa03606b81cd3d2dfd27db56d3d71d932 [file] [log] [blame]
Victor Stinner91b9ecf2019-03-01 17:52:56 +01001#include "Python.h"
2#include "pycore_coreconfig.h"
Victor Stinner6dcb5422019-03-05 02:44:12 +01003#include "pycore_getopt.h"
Victor Stinner5a02e0d2019-03-05 12:32:09 +01004#include "pycore_pystate.h" /* _PyRuntime_Initialize() */
5#include <locale.h> /* setlocale() */
Victor Stinner91b9ecf2019-03-01 17:52:56 +01006
7
8#define DECODE_LOCALE_ERR(NAME, LEN) \
9 (((LEN) == -2) \
10 ? _Py_INIT_USER_ERR("cannot decode " NAME) \
11 : _Py_INIT_NO_MEMORY())
12
13
14/* --- File system encoding/errors -------------------------------- */
15
16/* The filesystem encoding is chosen by config_init_fs_encoding(),
17 see also initfsencoding(). */
18const char *Py_FileSystemDefaultEncoding = NULL;
19int Py_HasFileSystemDefaultEncoding = 0;
20const char *Py_FileSystemDefaultEncodeErrors = NULL;
21int _Py_HasFileSystemDefaultEncodeErrors = 0;
22
23void
24_Py_ClearFileSystemEncoding(void)
25{
26 if (!Py_HasFileSystemDefaultEncoding && Py_FileSystemDefaultEncoding) {
27 PyMem_RawFree((char*)Py_FileSystemDefaultEncoding);
28 Py_FileSystemDefaultEncoding = NULL;
29 }
30 if (!_Py_HasFileSystemDefaultEncodeErrors && Py_FileSystemDefaultEncodeErrors) {
31 PyMem_RawFree((char*)Py_FileSystemDefaultEncodeErrors);
32 Py_FileSystemDefaultEncodeErrors = NULL;
33 }
34}
35
36
37/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
38 global configuration variables. */
39int
40_Py_SetFileSystemEncoding(const char *encoding, const char *errors)
41{
42 char *encoding2 = _PyMem_RawStrdup(encoding);
43 if (encoding2 == NULL) {
44 return -1;
45 }
46
47 char *errors2 = _PyMem_RawStrdup(errors);
48 if (errors2 == NULL) {
49 PyMem_RawFree(encoding2);
50 return -1;
51 }
52
53 _Py_ClearFileSystemEncoding();
54
55 Py_FileSystemDefaultEncoding = encoding2;
56 Py_HasFileSystemDefaultEncoding = 0;
57
58 Py_FileSystemDefaultEncodeErrors = errors2;
59 _Py_HasFileSystemDefaultEncodeErrors = 0;
60 return 0;
61}
62
63
64/* --- _PyArgv ---------------------------------------------------- */
65
66_PyInitError
67_PyArgv_Decode(const _PyArgv *args, wchar_t*** argv_p)
68{
69 wchar_t** argv;
70 if (args->use_bytes_argv) {
71 /* +1 for a the NULL terminator */
72 size_t size = sizeof(wchar_t*) * (args->argc + 1);
73 argv = (wchar_t **)PyMem_RawMalloc(size);
74 if (argv == NULL) {
75 return _Py_INIT_NO_MEMORY();
76 }
77
78 for (int i = 0; i < args->argc; i++) {
79 size_t len;
80 wchar_t *arg = Py_DecodeLocale(args->bytes_argv[i], &len);
81 if (arg == NULL) {
82 _Py_wstrlist_clear(i, argv);
83 return DECODE_LOCALE_ERR("command line arguments",
84 (Py_ssize_t)len);
85 }
86 argv[i] = arg;
87 }
88 argv[args->argc] = NULL;
89 }
90 else {
91 argv = args->wchar_argv;
92 }
93 *argv_p = argv;
94 return _Py_INIT_OK();
95}
Victor Stinnercad1f742019-03-05 02:01:27 +010096
97
Victor Stinner6dcb5422019-03-05 02:44:12 +010098/* --- _PyPreCmdline ------------------------------------------------- */
99
100typedef struct {
101 const _PyArgv *args;
102 int argc;
103 wchar_t **argv;
Victor Stinner5a02e0d2019-03-05 12:32:09 +0100104 int nxoption; /* Number of -X options */
105 wchar_t **xoptions; /* -X options */
Victor Stinner6dcb5422019-03-05 02:44:12 +0100106} _PyPreCmdline;
107
108
109static void
110precmdline_clear(_PyPreCmdline *cmdline)
111{
112 if (cmdline->args->use_bytes_argv && cmdline->argv != NULL) {
113 _Py_wstrlist_clear(cmdline->args->argc, cmdline->argv);
114 }
115 cmdline->argv = NULL;
Victor Stinner5a02e0d2019-03-05 12:32:09 +0100116
117 _Py_wstrlist_clear(cmdline->nxoption, cmdline->xoptions);
118 cmdline->nxoption = 0;
119 cmdline->xoptions = NULL;
Victor Stinner6dcb5422019-03-05 02:44:12 +0100120}
121
122
Victor Stinnercad1f742019-03-05 02:01:27 +0100123/* --- _PyPreConfig ----------------------------------------------- */
124
125void
126_PyPreConfig_Clear(_PyPreConfig *config)
127{
128}
129
130
131int
132_PyPreConfig_Copy(_PyPreConfig *config, const _PyPreConfig *config2)
133{
134 _PyPreConfig_Clear(config);
135
136#define COPY_ATTR(ATTR) config->ATTR = config2->ATTR
137
138 COPY_ATTR(isolated);
139 COPY_ATTR(use_environment);
Victor Stinner5a02e0d2019-03-05 12:32:09 +0100140 COPY_ATTR(coerce_c_locale);
141 COPY_ATTR(coerce_c_locale_warn);
142#ifdef MS_WINDOWS
143 COPY_ATTR(legacy_windows_fs_encoding);
144#endif
145 COPY_ATTR(utf8_mode);
Victor Stinnercad1f742019-03-05 02:01:27 +0100146
147#undef COPY_ATTR
148 return 0;
149}
150
151
152void
153_PyPreConfig_GetGlobalConfig(_PyPreConfig *config)
154{
155#define COPY_FLAG(ATTR, VALUE) \
156 if (config->ATTR == -1) { \
157 config->ATTR = VALUE; \
158 }
159#define COPY_NOT_FLAG(ATTR, VALUE) \
160 if (config->ATTR == -1) { \
161 config->ATTR = !(VALUE); \
162 }
163
164 COPY_FLAG(isolated, Py_IsolatedFlag);
165 COPY_NOT_FLAG(use_environment, Py_IgnoreEnvironmentFlag);
Victor Stinner5a02e0d2019-03-05 12:32:09 +0100166#ifdef MS_WINDOWS
167 COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag);
168#endif
169 COPY_FLAG(utf8_mode, Py_UTF8Mode);
Victor Stinnercad1f742019-03-05 02:01:27 +0100170
171#undef COPY_FLAG
172#undef COPY_NOT_FLAG
173}
174
175
176void
177_PyPreConfig_SetGlobalConfig(const _PyPreConfig *config)
178{
179#define COPY_FLAG(ATTR, VAR) \
180 if (config->ATTR != -1) { \
181 VAR = config->ATTR; \
182 }
183#define COPY_NOT_FLAG(ATTR, VAR) \
184 if (config->ATTR != -1) { \
185 VAR = !config->ATTR; \
186 }
187
188 COPY_FLAG(isolated, Py_IsolatedFlag);
189 COPY_NOT_FLAG(use_environment, Py_IgnoreEnvironmentFlag);
Victor Stinner5a02e0d2019-03-05 12:32:09 +0100190#ifdef MS_WINDOWS
191 COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag);
192#endif
193 COPY_FLAG(utf8_mode, Py_UTF8Mode);
Victor Stinnercad1f742019-03-05 02:01:27 +0100194
195#undef COPY_FLAG
196#undef COPY_NOT_FLAG
197}
198
199
Victor Stinner5a02e0d2019-03-05 12:32:09 +0100200const char*
201_PyPreConfig_GetEnv(const _PyPreConfig *config, const char *name)
202{
203 assert(config->use_environment >= 0);
204
205 if (!config->use_environment) {
206 return NULL;
207 }
208
209 const char *var = getenv(name);
210 if (var && var[0] != '\0') {
211 return var;
212 }
213 else {
214 return NULL;
215 }
216}
217
218
219int
220_Py_str_to_int(const char *str, int *result)
221{
222 const char *endptr = str;
223 errno = 0;
224 long value = strtol(str, (char **)&endptr, 10);
225 if (*endptr != '\0' || errno == ERANGE) {
226 return -1;
227 }
228 if (value < INT_MIN || value > INT_MAX) {
229 return -1;
230 }
231
232 *result = (int)value;
233 return 0;
234}
235
236
237void
238_Py_get_env_flag(_PyPreConfig *config, int *flag, const char *name)
239{
240 const char *var = _PyPreConfig_GetEnv(config, name);
241 if (!var) {
242 return;
243 }
244 int value;
245 if (_Py_str_to_int(var, &value) < 0 || value < 0) {
246 /* PYTHONDEBUG=text and PYTHONDEBUG=-2 behave as PYTHONDEBUG=1 */
247 value = 1;
248 }
249 if (*flag < value) {
250 *flag = value;
251 }
252}
253
254
255const wchar_t*
256_Py_get_xoption(int nxoption, wchar_t * const *xoptions, const wchar_t *name)
257{
258 for (int i=0; i < nxoption; i++) {
259 const wchar_t *option = xoptions[i];
260 size_t len;
261 wchar_t *sep = wcschr(option, L'=');
262 if (sep != NULL) {
263 len = (sep - option);
264 }
265 else {
266 len = wcslen(option);
267 }
268 if (wcsncmp(option, name, len) == 0 && name[len] == L'\0') {
269 return option;
270 }
271 }
272 return NULL;
273}
274
275
276static _PyInitError
277preconfig_init_utf8_mode(_PyPreConfig *config, const _PyPreCmdline *cmdline)
278{
279 const wchar_t *xopt;
280 if (cmdline) {
281 xopt = _Py_get_xoption(cmdline->nxoption, cmdline->xoptions, L"utf8");
282 }
283 else {
284 xopt = NULL;
285 }
286 if (xopt) {
287 wchar_t *sep = wcschr(xopt, L'=');
288 if (sep) {
289 xopt = sep + 1;
290 if (wcscmp(xopt, L"1") == 0) {
291 config->utf8_mode = 1;
292 }
293 else if (wcscmp(xopt, L"0") == 0) {
294 config->utf8_mode = 0;
295 }
296 else {
297 return _Py_INIT_USER_ERR("invalid -X utf8 option value");
298 }
299 }
300 else {
301 config->utf8_mode = 1;
302 }
303 return _Py_INIT_OK();
304 }
305
306 const char *opt = _PyPreConfig_GetEnv(config, "PYTHONUTF8");
307 if (opt) {
308 if (strcmp(opt, "1") == 0) {
309 config->utf8_mode = 1;
310 }
311 else if (strcmp(opt, "0") == 0) {
312 config->utf8_mode = 0;
313 }
314 else {
315 return _Py_INIT_USER_ERR("invalid PYTHONUTF8 environment "
316 "variable value");
317 }
318 return _Py_INIT_OK();
319 }
320
321 return _Py_INIT_OK();
322}
323
324
325static void
326preconfig_init_locale(_PyPreConfig *config)
327{
328 /* Test also if coerce_c_locale equals 1: PYTHONCOERCECLOCALE=1 doesn't
329 imply that the C locale is always coerced. It is only coerced if
330 if the LC_CTYPE locale is "C". */
331 if (config->coerce_c_locale != 0) {
332 /* The C locale enables the C locale coercion (PEP 538) */
333 if (_Py_LegacyLocaleDetected()) {
334 config->coerce_c_locale = 1;
335 }
336 else {
337 config->coerce_c_locale = 0;
338 }
339 }
340}
341
342
343static _PyInitError
344preconfig_read(_PyPreConfig *config, const _PyPreCmdline *cmdline)
Victor Stinnercad1f742019-03-05 02:01:27 +0100345{
346 _PyPreConfig_GetGlobalConfig(config);
347
348 if (config->isolated > 0) {
349 config->use_environment = 0;
350 }
351
352 /* Default values */
353 if (config->use_environment < 0) {
354 config->use_environment = 0;
355 }
356
Victor Stinner5a02e0d2019-03-05 12:32:09 +0100357 if (config->use_environment) {
358#ifdef MS_WINDOWS
359 _Py_get_env_flag(config, &config->legacy_windows_fs_encoding,
360 "PYTHONLEGACYWINDOWSFSENCODING");
361#endif
362
363 const char *env = _PyPreConfig_GetEnv(config, "PYTHONCOERCECLOCALE");
364 if (env) {
365 if (strcmp(env, "0") == 0) {
366 if (config->coerce_c_locale < 0) {
367 config->coerce_c_locale = 0;
368 }
369 }
370 else if (strcmp(env, "warn") == 0) {
371 config->coerce_c_locale_warn = 1;
372 }
373 else {
374 if (config->coerce_c_locale < 0) {
375 config->coerce_c_locale = 1;
376 }
377 }
378 }
379 }
380
381#ifdef MS_WINDOWS
382 if (config->legacy_windows_fs_encoding) {
383 config->utf8_mode = 0;
384 }
385#endif
386
387 if (config->utf8_mode < 0) {
388 _PyInitError err = preconfig_init_utf8_mode(config, cmdline);
389 if (_Py_INIT_FAILED(err)) {
390 return err;
391 }
392 }
393
394 if (config->coerce_c_locale != 0) {
395 preconfig_init_locale(config);
396 }
397
398#ifndef MS_WINDOWS
399 if (config->utf8_mode < 0) {
400 /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
401 const char *ctype_loc = setlocale(LC_CTYPE, NULL);
402 if (ctype_loc != NULL
403 && (strcmp(ctype_loc, "C") == 0
404 || strcmp(ctype_loc, "POSIX") == 0))
405 {
406 config->utf8_mode = 1;
407 }
408 }
409#endif
410
411 if (config->coerce_c_locale < 0) {
412 config->coerce_c_locale = 0;
413 }
414 if (config->utf8_mode < 0) {
415 config->utf8_mode = 0;
416 }
417
418 assert(config->coerce_c_locale >= 0);
419 assert(config->utf8_mode >= 0);
Victor Stinner6dcb5422019-03-05 02:44:12 +0100420 assert(config->isolated >= 0);
Victor Stinnercad1f742019-03-05 02:01:27 +0100421 assert(config->use_environment >= 0);
422
423 return _Py_INIT_OK();
424}
425
426
Victor Stinner5a02e0d2019-03-05 12:32:09 +0100427_PyInitError
428_PyPreConfig_Read(_PyPreConfig *config)
429{
430 return preconfig_read(config, NULL);
431}
432
433
Victor Stinnercad1f742019-03-05 02:01:27 +0100434int
435_PyPreConfig_AsDict(const _PyPreConfig *config, PyObject *dict)
436{
437#define SET_ITEM(KEY, EXPR) \
438 do { \
439 PyObject *obj = (EXPR); \
440 if (obj == NULL) { \
441 goto fail; \
442 } \
443 int res = PyDict_SetItemString(dict, (KEY), obj); \
444 Py_DECREF(obj); \
445 if (res < 0) { \
446 goto fail; \
447 } \
448 } while (0)
449#define SET_ITEM_INT(ATTR) \
450 SET_ITEM(#ATTR, PyLong_FromLong(config->ATTR))
451
452 SET_ITEM_INT(isolated);
453 SET_ITEM_INT(use_environment);
Victor Stinner5a02e0d2019-03-05 12:32:09 +0100454 SET_ITEM_INT(coerce_c_locale);
455 SET_ITEM_INT(coerce_c_locale_warn);
456 SET_ITEM_INT(utf8_mode);
457#ifdef MS_WINDOWS
458 SET_ITEM_INT(legacy_windows_fs_encoding);
459#endif
Victor Stinnercad1f742019-03-05 02:01:27 +0100460 return 0;
461
462fail:
463 return -1;
464
465#undef SET_ITEM
466#undef SET_ITEM_INT
467}
Victor Stinner6dcb5422019-03-05 02:44:12 +0100468
469
470/* Parse the command line arguments */
471static _PyInitError
472preconfig_parse_cmdline(_PyPreConfig *config, _PyPreCmdline *cmdline)
473{
474 _PyOS_ResetGetOpt();
475 /* Don't log parsing errors into stderr here: _PyCoreConfig_ReadFromArgv()
476 is responsible for that */
477 _PyOS_opterr = 0;
478 do {
479 int longindex = -1;
480 int c = _PyOS_GetOpt(cmdline->args->argc, cmdline->argv, &longindex);
481
482 if (c == EOF || c == 'c' || c == 'm') {
483 break;
484 }
485
486 switch (c) {
487 case 'E':
488 config->use_environment = 0;
489 break;
490
491 case 'I':
492 config->isolated++;
493 break;
494
Victor Stinner5a02e0d2019-03-05 12:32:09 +0100495 case 'X':
496 {
497 _PyInitError err;
498 err = _Py_wstrlist_append(&cmdline->nxoption,
499 &cmdline->xoptions,
500 _PyOS_optarg);
501 if (_Py_INIT_FAILED(err)) {
502 return err;
503 }
504 break;
505 }
506
Victor Stinner6dcb5422019-03-05 02:44:12 +0100507 default:
508 /* ignore other argument:
509 handled by _PyCoreConfig_ReadFromArgv() */
510 break;
511 }
512 } while (1);
513
514 return _Py_INIT_OK();
515}
516
517
Victor Stinner5a02e0d2019-03-05 12:32:09 +0100518static _PyInitError
519preconfig_from_argv(_PyPreConfig *config, const _PyArgv *args)
Victor Stinner6dcb5422019-03-05 02:44:12 +0100520{
521 _PyInitError err;
522
523 _PyPreCmdline cmdline;
524 memset(&cmdline, 0, sizeof(cmdline));
525 cmdline.args = args;
526
527 err = _PyArgv_Decode(cmdline.args, &cmdline.argv);
528 if (_Py_INIT_FAILED(err)) {
529 goto done;
530 }
531
532 err = preconfig_parse_cmdline(config, &cmdline);
533 if (_Py_INIT_FAILED(err)) {
534 goto done;
535 }
536
Victor Stinner5a02e0d2019-03-05 12:32:09 +0100537 err = preconfig_read(config, &cmdline);
Victor Stinner6dcb5422019-03-05 02:44:12 +0100538 if (_Py_INIT_FAILED(err)) {
539 goto done;
540 }
541 err = _Py_INIT_OK();
542
543done:
544 precmdline_clear(&cmdline);
545 return err;
546}
547
548
Victor Stinner5a02e0d2019-03-05 12:32:09 +0100549/* Read the preconfiguration. */
550_PyInitError
551_PyPreConfig_ReadFromArgv(_PyPreConfig *config, const _PyArgv *args)
552{
553 _PyInitError err;
554
555 err = _PyRuntime_Initialize();
556 if (_Py_INIT_FAILED(err)) {
557 return err;
558 }
559
560 char *init_ctype_locale = NULL;
561 int init_utf8_mode = Py_UTF8Mode;
562#ifdef MS_WINDOWS
563 int init_legacy_encoding = Py_LegacyWindowsFSEncodingFlag;
564#endif
565 _PyPreConfig save_config = _PyPreConfig_INIT;
566 int locale_coerced = 0;
567 int loops = 0;
568
569 /* copy LC_CTYPE locale */
570 const char *loc = setlocale(LC_CTYPE, NULL);
571 if (loc == NULL) {
572 err = _Py_INIT_ERR("failed to LC_CTYPE locale");
573 goto done;
574 }
575 init_ctype_locale = _PyMem_RawStrdup(loc);
576 if (init_ctype_locale == NULL) {
577 err = _Py_INIT_NO_MEMORY();
578 goto done;
579 }
580
581 if (_PyPreConfig_Copy(&save_config, config) < 0) {
582 err = _Py_INIT_NO_MEMORY();
583 goto done;
584 }
585
586 /* Set LC_CTYPE to the user preferred locale */
587 _Py_SetLocaleFromEnv(LC_CTYPE);
588
589 while (1) {
590 int utf8_mode = config->utf8_mode;
591
592 /* Watchdog to prevent an infinite loop */
593 loops++;
594 if (loops == 3) {
595 err = _Py_INIT_ERR("Encoding changed twice while "
596 "reading the configuration");
597 goto done;
598 }
599
600 /* bpo-34207: Py_DecodeLocale() and Py_EncodeLocale() depend
601 on Py_UTF8Mode and Py_LegacyWindowsFSEncodingFlag. */
602 Py_UTF8Mode = config->utf8_mode;
603#ifdef MS_WINDOWS
604 Py_LegacyWindowsFSEncodingFlag = config->legacy_windows_fs_encoding;
605#endif
606
607 err = preconfig_from_argv(config, args);
608 if (_Py_INIT_FAILED(err)) {
609 goto done;
610 }
611
612 if (locale_coerced) {
613 config->coerce_c_locale = 1;
614 }
615
616 /* The legacy C locale assumes ASCII as the default text encoding, which
617 * causes problems not only for the CPython runtime, but also other
618 * components like GNU readline.
619 *
620 * Accordingly, when the CLI detects it, it attempts to coerce it to a
621 * more capable UTF-8 based alternative.
622 *
623 * See the documentation of the PYTHONCOERCECLOCALE setting for more
624 * details.
625 */
626 int encoding_changed = 0;
627 if (config->coerce_c_locale && !locale_coerced) {
628 locale_coerced = 1;
629 _Py_CoerceLegacyLocale(0);
630 encoding_changed = 1;
631 }
632
633 if (utf8_mode == -1) {
634 if (config->utf8_mode == 1) {
635 /* UTF-8 Mode enabled */
636 encoding_changed = 1;
637 }
638 }
639 else {
640 if (config->utf8_mode != utf8_mode) {
641 encoding_changed = 1;
642 }
643 }
644
645 if (!encoding_changed) {
646 break;
647 }
648
649 /* Reset the configuration before reading again the configuration,
650 just keep UTF-8 Mode value. */
651 int new_utf8_mode = config->utf8_mode;
652 int new_coerce_c_locale = config->coerce_c_locale;
653 if (_PyPreConfig_Copy(config, &save_config) < 0) {
654 err = _Py_INIT_NO_MEMORY();
655 goto done;
656 }
657 config->utf8_mode = new_utf8_mode;
658 config->coerce_c_locale = new_coerce_c_locale;
659
660 /* The encoding changed: read again the configuration
661 with the new encoding */
662 }
663 err = _Py_INIT_OK();
664
665done:
666 if (init_ctype_locale != NULL) {
667 setlocale(LC_CTYPE, init_ctype_locale);
668 }
669 _PyPreConfig_Clear(&save_config);
670 Py_UTF8Mode = init_utf8_mode ;
671#ifdef MS_WINDOWS
672 Py_LegacyWindowsFSEncodingFlag = init_legacy_encoding;
673#endif
674 return err;
675}
676
677
Victor Stinner6dcb5422019-03-05 02:44:12 +0100678void
679_PyPreConfig_Write(const _PyPreConfig *config)
680{
Victor Stinner5a02e0d2019-03-05 12:32:09 +0100681 _PyPreConfig_SetGlobalConfig(config);
682
683 if (config->coerce_c_locale) {
684 _Py_CoerceLegacyLocale(config->coerce_c_locale_warn);
685 }
686
687 /* Set LC_CTYPE to the user preferred locale */
688 _Py_SetLocaleFromEnv(LC_CTYPE);
Victor Stinner6dcb5422019-03-05 02:44:12 +0100689}