Patch #774665: Make Python LC_NUMERIC agnostic.

commit: 737ea82a5abd448b3e214b44d7d3c579b77e8155 [log] [tgz]
author: Martin v. Löwis <martin@v.loewis.de> Tue Jun 08 18:52:54 2004 +0000
committer: Martin v. Löwis <martin@v.loewis.de> Tue Jun 08 18:52:54 2004 +0000
tree: b0f710ba716db5247dbb6ac88e0a20ea8623f716
parent: 6ccc9a99dfbb2575daa8e01a8e8e3531b61a9d60 [diff]
diff --git a/Doc/lib/liblocale.tex b/Doc/lib/liblocale.tex
index 967c759..bc96189 100644
--- a/Doc/lib/liblocale.tex
+++ b/Doc/lib/liblocale.tex

@@ -456,25 +456,6 @@
 only be used portably to restore it, that is not very useful (except
 perhaps to find out whether or not the locale is \samp{C}).
 
-When Python is embedded in an application, if the application sets the
-locale to something specific before initializing Python, that is
-generally okay, and Python will use whatever locale is set,
-\emph{except} that the \constant{LC_NUMERIC} locale should always be
-\samp{C}.
-
-The \function{setlocale()} function in the \module{locale} module
-gives the Python programmer the impression that you can manipulate the
-\constant{LC_NUMERIC} locale setting, but this not the case at the C
-level: C code will always find that the \constant{LC_NUMERIC} locale
-setting is \samp{C}.  This is because too much would break when the
-decimal point character is set to something else than a period
-(e.g. the Python parser would break).  Caveat: threads that run
-without holding Python's global interpreter lock may occasionally find
-that the numeric locale setting differs; this is because the only
-portable way to implement this feature is to set the numeric locale
-settings to what the user requests, extract the relevant
-characteristics, and then restore the \samp{C} numeric locale.
-
 When Python code uses the \module{locale} module to change the locale,
 this also affects the embedding application.  If the embedding
 application doesn't want this to happen, it should remove the

diff --git a/Include/Python.h b/Include/Python.h
index f332836..0d9a797 100644
--- a/Include/Python.h
+++ b/Include/Python.h

@@ -119,6 +119,8 @@
 #include "compile.h"
 #include "eval.h"
 
+#include "pystrtod.h"
+
 /* _Py_Mangle is defined in compile.c */
 PyAPI_FUNC(int) _Py_Mangle(char *p, char *name, \
 				 char *buffer, size_t maxlen);

diff --git a/Include/pystrtod.h b/Include/pystrtod.h
new file mode 100644
index 0000000..e4e5e52
--- /dev/null
+++ b/Include/pystrtod.h

@@ -0,0 +1,18 @@
+#ifndef Py_STRTOD_H
+#define Py_STRTOD_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+double PyOS_ascii_strtod(const char *str, char **ptr);
+double PyOS_ascii_atof(const char *str);
+char * PyOS_ascii_formatd(char *buffer, int buf_len,  const char *format, double d);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !Py_STRTOD_H */

diff --git a/Makefile.pre.in b/Makefile.pre.in
index e4a12b0..8ac3143 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in

@@ -247,6 +247,7 @@
 		Python/sysmodule.o \
 		Python/traceback.o \
 		Python/getopt.o \
+		Python/pystrtod.o \
 		Python/$(DYNLOADFILE) \
 		$(MACHDEP_OBJS) \
 		$(THREADOBJ)

diff --git a/Misc/NEWS b/Misc/NEWS
index b7cf4b4..a93ae18 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS

@@ -12,6 +12,10 @@
 Core and builtins
 -----------------
 
+- Python no longer relies on the LC_NUMERIC locale setting to be
+  the "C" locale; as a result, it no longer tries to prevent changing
+  the LC_NUMERIC category.
+
 - Bug #952807:  Unpickling pickled instances of subclasses of
   datetime.date, datetime.datetime and datetime.time could yield insane
   objects.  Thanks to Jiwon Seo for a fix.

diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c
index 5edb7f3..0ab79cb 100644
--- a/Modules/_localemodule.c
+++ b/Modules/_localemodule.c

@@ -51,13 +51,6 @@
 PyDoc_STRVAR(setlocale__doc__,
 "(integer,string=None) -> string. Activates/queries locale processing.");
 
-/* to record the LC_NUMERIC settings */
-static PyObject* grouping = NULL;
-static PyObject* thousands_sep = NULL;
-static PyObject* decimal_point = NULL;
-/* if non-null, indicates that LC_NUMERIC is different from "C" */
-static char* saved_numeric = NULL;
-
 /* the grouping is terminated by either 0 or CHAR_MAX */
 static PyObject*
 copy_grouping(char* s)
@@ -167,7 +160,6 @@
     int category;
     char *locale = NULL, *result;
     PyObject *result_object;
-    struct lconv *lc;
 
     if (!PyArg_ParseTuple(args, "i|z:setlocale", &category, &locale))
         return NULL;
@@ -183,29 +175,6 @@
         result_object = PyString_FromString(result);
         if (!result_object)
             return NULL;
-        /* record changes to LC_NUMERIC */
-        if (category == LC_NUMERIC || category == LC_ALL) {
-            if (strcmp(locale, "C") == 0 || strcmp(locale, "POSIX") == 0) {
-                /* user just asked for default numeric locale */
-                if (saved_numeric)
-                    free(saved_numeric);
-                saved_numeric = NULL;
-            } else {
-                /* remember values */
-                lc = localeconv();
-                Py_XDECREF(grouping);
-                grouping = copy_grouping(lc->grouping);
-                Py_XDECREF(thousands_sep);
-                thousands_sep = PyString_FromString(lc->thousands_sep);
-                Py_XDECREF(decimal_point);
-                decimal_point = PyString_FromString(lc->decimal_point);
-                if (saved_numeric)
-                    free(saved_numeric);
-                saved_numeric = strdup(locale);
-                /* restore to "C" */
-                setlocale(LC_NUMERIC, "C");
-            }
-        }
         /* record changes to LC_CTYPE */
         if (category == LC_CTYPE || category == LC_ALL)
             fixup_ulcase();
@@ -213,18 +182,12 @@
         PyErr_Clear();
     } else {
         /* get locale */
-        /* restore LC_NUMERIC first, if appropriate */
-        if (saved_numeric)
-            setlocale(LC_NUMERIC, saved_numeric);
         result = setlocale(category, NULL);
         if (!result) {
             PyErr_SetString(Error, "locale query failed");
             return NULL;
         }
         result_object = PyString_FromString(result);
-        /* restore back to "C" */
-        if (saved_numeric)
-            setlocale(LC_NUMERIC, "C");
     }
     return result_object;
 }
@@ -262,20 +225,13 @@
     Py_XDECREF(x)
 
     /* Numeric information */
-    if (saved_numeric){
-        /* cannot use localeconv results */
-        PyDict_SetItemString(result, "decimal_point", decimal_point);
-        PyDict_SetItemString(result, "grouping", grouping);
-        PyDict_SetItemString(result, "thousands_sep", thousands_sep);
-    } else {
-        RESULT_STRING(decimal_point);
-        RESULT_STRING(thousands_sep);
-        x = copy_grouping(l->grouping);
-        if (!x)
-            goto failed;
-        PyDict_SetItemString(result, "grouping", x);
-        Py_XDECREF(x);
-    }
+    RESULT_STRING(decimal_point);
+    RESULT_STRING(thousands_sep);
+    x = copy_grouping(l->grouping);
+    if (!x)
+        goto failed;
+    PyDict_SetItemString(result, "grouping", x);
+    Py_XDECREF(x);
 
     /* Monetary information */
     RESULT_STRING(int_curr_symbol);
@@ -579,18 +535,6 @@
     /* Check whether this is a supported constant. GNU libc sometimes
        returns numeric values in the char* return value, which would
        crash PyString_FromString.  */
-#ifdef RADIXCHAR
-    if (saved_numeric) {
-	if(item == RADIXCHAR) {
-            Py_INCREF(decimal_point);
-            return decimal_point;
-        }
-        if(item == THOUSEP) {
-            Py_INCREF(thousands_sep);
-            return thousands_sep;
-        }
-    }
-#endif
     for (i = 0; langinfo_constants[i].name; i++)
         if (langinfo_constants[i].value == item) {
             /* Check NULL as a workaround for GNU libc's returning NULL

diff --git a/Modules/cPickle.c b/Modules/cPickle.c
index 105a2e9..e5e20cb 100644
--- a/Modules/cPickle.c
+++ b/Modules/cPickle.c

@@ -3319,7 +3319,7 @@
 	if (!( s=pystrndup(s,len)))  return -1;
 
 	errno = 0;
-	d = strtod(s, &endptr);
+	d = PyOS_ascii_strtod(s, &endptr);
 
 	if (errno || (endptr[0] != '\n') || (endptr[1] != '\0')) {
 		PyErr_SetString(PyExc_ValueError,

diff --git a/Modules/stropmodule.c b/Modules/stropmodule.c
index 8eb64a0..ce19a05 100644
--- a/Modules/stropmodule.c
+++ b/Modules/stropmodule.c

@@ -838,7 +838,6 @@
 static PyObject *
 strop_atof(PyObject *self, PyObject *args)
 {
-	extern double strtod(const char *, char **);
 	char *s, *end;
 	double x;
 	char buffer[256]; /* For errors */
@@ -854,7 +853,7 @@
 	}
 	errno = 0;
 	PyFPE_START_PROTECT("strop_atof", return 0)
-	x = strtod(s, &end);
+	x = PyOS_ascii_strtod(s, &end);
 	PyFPE_END_PROTECT(x)
 	while (*end && isspace(Py_CHARMASK(*end)))
 		end++;

diff --git a/Objects/complexobject.c b/Objects/complexobject.c
index c29d48d..4023fa0 100644
--- a/Objects/complexobject.c
+++ b/Objects/complexobject.c

@@ -272,13 +272,19 @@
 static void
 complex_to_buf(char *buf, int bufsz, PyComplexObject *v, int precision)
 {
-	if (v->cval.real == 0.)
-		PyOS_snprintf(buf, bufsz, "%.*gj",
-			      precision, v->cval.imag);
-	else
-		PyOS_snprintf(buf, bufsz, "(%.*g%+.*gj)",
-			      precision, v->cval.real,
-			      precision, v->cval.imag);
+	char format[32];
+	if (v->cval.real == 0.) {
+		PyOS_snprintf(format, 32, "%%.%ig", precision);
+		PyOS_ascii_formatd(buf, bufsz, format, v->cval.imag);
+		strncat(buf, "j", bufsz);
+	} else {
+		char re[64], im[64];
+		
+		PyOS_snprintf(format, 32, "%%.%ig", precision);
+		PyOS_ascii_formatd(re, 64, format, v->cval.real);
+		PyOS_ascii_formatd(im, 64, format, v->cval.imag);
+		PyOS_snprintf(buf, bufsz, "(%s+%sj)", re, im);
+	}
 }
 
 static int
@@ -662,7 +668,6 @@
 static PyObject *
 complex_subtype_from_string(PyTypeObject *type, PyObject *v)
 {
-	extern double strtod(const char *, char **);
 	const char *s, *start;
 	char *end;
 	double x=0.0, y=0.0, z;
@@ -774,7 +779,7 @@
 			}
 			errno = 0;
 			PyFPE_START_PROTECT("strtod", return 0)
-				z = strtod(s, &end) ;
+				z = PyOS_ascii_strtod(s, &end) ;
 			PyFPE_END_PROTECT(z)
 				if (errno != 0) {
 					PyOS_snprintf(buffer, sizeof(buffer),

diff --git a/Objects/floatobject.c b/Objects/floatobject.c
index f1c8e42..bbf56c6 100644
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c

@@ -132,7 +132,7 @@
 	 * key off errno.
          */
 	PyFPE_START_PROTECT("strtod", return NULL)
-	x = strtod(s, (char **)&end);
+	x = PyOS_ascii_strtod(s, (char **)&end);
 	PyFPE_END_PROTECT(x)
 	errno = 0;
 	/* Believe it or not, Solaris 2.6 can move end *beyond* the null
@@ -164,7 +164,7 @@
 		/* See above -- may have been strtod being anal
 		   about denorms. */
 		PyFPE_START_PROTECT("atof", return NULL)
-		x = atof(s);
+		x = PyOS_ascii_atof(s);
 		PyFPE_END_PROTECT(x)
 		errno = 0;    /* whether atof ever set errno is undefined */
 	}
@@ -223,6 +223,7 @@
 format_float(char *buf, size_t buflen, PyFloatObject *v, int precision)
 {
 	register char *cp;
+	char format[32];
 	/* Subroutine for float_repr and float_print.
 	   We want float numbers to be recognizable as such,
 	   i.e., they should contain a decimal point or an exponent.
@@ -230,7 +231,8 @@
 	   in such cases, we append ".0" to the string. */
 
 	assert(PyFloat_Check(v));
-	PyOS_snprintf(buf, buflen, "%.*g", precision, v->ob_fval);
+	PyOS_snprintf(format, 32, "%%.%ig", precision);
+	PyOS_ascii_formatd(buf, buflen, format, v->ob_fval);
 	cp = buf;
 	if (*cp == '-')
 		cp++;

diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index 361d84d..b14dc51 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c

@@ -3582,7 +3582,7 @@
 	PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
 		      (flags&F_ALT) ? "#" : "",
 		      prec, type);
-	PyOS_snprintf(buf, buflen, fmt, x);
+        PyOS_ascii_formatd(buf, buflen, fmt, x);
 	return strlen(buf);
 }
 

diff --git a/Python/compile.c b/Python/compile.c
index 15159f8..dd80ae4 100644
--- a/Python/compile.c
+++ b/Python/compile.c

@@ -1379,7 +1379,7 @@
 		Py_complex z;
 		z.real = 0.;
 		PyFPE_START_PROTECT("atof", return 0)
-		z.imag = atof(s);
+		z.imag = PyOS_ascii_atof(s);
 		PyFPE_END_PROTECT(z)
 		return PyComplex_FromCComplex(z);
 	}
@@ -1387,7 +1387,7 @@
 #endif
 	{
 		PyFPE_START_PROTECT("atof", return 0)
-		dx = atof(s);
+		dx = PyOS_ascii_atof(s);
 		PyFPE_END_PROTECT(dx)
 		return PyFloat_FromDouble(dx);
 	}

diff --git a/Python/marshal.c b/Python/marshal.c
index d3cd659..c253119 100644
--- a/Python/marshal.c
+++ b/Python/marshal.c

@@ -457,7 +457,7 @@
 			}
 			buf[n] = '\0';
 			PyFPE_START_PROTECT("atof", return 0)
-			dx = atof(buf);
+			dx = PyOS_ascii_atof(buf);
 			PyFPE_END_PROTECT(dx)
 			return PyFloat_FromDouble(dx);
 		}
@@ -475,7 +475,7 @@
 			}
 			buf[n] = '\0';
 			PyFPE_START_PROTECT("atof", return 0)
-			c.real = atof(buf);
+			c.real = PyOS_ascii_atof(buf);
 			PyFPE_END_PROTECT(c)
 			n = r_byte(p);
 			if (n == EOF || r_string(buf, (int)n, p) != n) {
@@ -485,7 +485,7 @@
 			}
 			buf[n] = '\0';
 			PyFPE_START_PROTECT("atof", return 0)
-			c.imag = atof(buf);
+			c.imag = PyOS_ascii_atof(buf);
 			PyFPE_END_PROTECT(c)
 			return PyComplex_FromCComplex(c);
 		}

diff --git a/Python/pystrtod.c b/Python/pystrtod.c
new file mode 100644
index 0000000..ab25799
--- /dev/null
+++ b/Python/pystrtod.c

@@ -0,0 +1,258 @@
+/* -*- Mode: C; c-file-style: "python" -*- */
+
+#include <Python.h>
+#include <locale.h>
+
+/* ascii character tests (as opposed to locale tests) */
+#define ISSPACE(c)  ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
+                     (c) == '\r' || (c) == '\t' || (c) == '\v')
+#define ISDIGIT(c)  ((c) >= '0' && (c) <= '9')
+#define ISXDIGIT(c) (ISDIGIT(c) || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F'))
+
+
+/**
+ * PyOS_ascii_strtod:
+ * @nptr:    the string to convert to a numeric value.
+ * @endptr:  if non-%NULL, it returns the character after
+ *           the last character used in the conversion.
+ * 
+ * Converts a string to a #gdouble value.
+ * This function behaves like the standard strtod() function
+ * does in the C locale. It does this without actually
+ * changing the current locale, since that would not be
+ * thread-safe.
+ *
+ * This function is typically used when reading configuration
+ * files or other non-user input that should be locale independent.
+ * To handle input from the user you should normally use the
+ * locale-sensitive system strtod() function.
+ *
+ * If the correct value would cause overflow, plus or minus %HUGE_VAL
+ * is returned (according to the sign of the value), and %ERANGE is
+ * stored in %errno. If the correct value would cause underflow,
+ * zero is returned and %ERANGE is stored in %errno.
+ * 
+ * This function resets %errno before calling strtod() so that
+ * you can reliably detect overflow and underflow.
+ *
+ * Return value: the #gdouble value.
+ **/
+double
+PyOS_ascii_strtod(const char  *nptr, 
+	    char       **endptr)
+{
+	char *fail_pos;
+	double val;
+	struct lconv *locale_data;
+	const char *decimal_point;
+	int decimal_point_len;
+	const char *p, *decimal_point_pos;
+	const char *end = NULL; /* Silence gcc */
+
+/* 	g_return_val_if_fail (nptr != NULL, 0); */
+	assert(nptr != NULL);
+
+	fail_pos = NULL;
+
+	locale_data = localeconv();
+	decimal_point = locale_data->decimal_point;
+	decimal_point_len = strlen(decimal_point);
+
+	assert(decimal_point_len != 0);
+
+	decimal_point_pos = NULL;
+	if (decimal_point[0] != '.' || 
+	    decimal_point[1] != 0)
+	{
+		p = nptr;
+		  /* Skip leading space */
+		while (ISSPACE(*p))
+			p++;
+
+		  /* Skip leading optional sign */
+		if (*p == '+' || *p == '-')
+			p++;
+
+		if (p[0] == '0' && 
+		    (p[1] == 'x' || p[1] == 'X'))
+		{
+			p += 2;
+			  /* HEX - find the (optional) decimal point */
+
+			while (ISXDIGIT(*p))
+				p++;
+
+			if (*p == '.')
+			{
+				decimal_point_pos = p++;
+
+				while (ISXDIGIT(*p))
+					p++;
+
+				if (*p == 'p' || *p == 'P')
+					p++;
+				if (*p == '+' || *p == '-')
+					p++;
+				while (ISDIGIT(*p))
+					p++;
+				end = p;
+			}
+		}
+		else
+		{
+			while (ISDIGIT(*p))
+				p++;
+
+			if (*p == '.')
+			{
+				decimal_point_pos = p++;
+
+				while (ISDIGIT(*p))
+					p++;
+
+				if (*p == 'e' || *p == 'E')
+					p++;
+				if (*p == '+' || *p == '-')
+					p++;
+				while (ISDIGIT(*p))
+					p++;
+				end = p;
+			}
+		}
+		  /* For the other cases, we need not convert the decimal point */
+	}
+
+	  /* Set errno to zero, so that we can distinguish zero results
+	     and underflows */
+	errno = 0;
+
+	if (decimal_point_pos)
+	{
+		char *copy, *c;
+
+		  /* We need to convert the '.' to the locale specific decimal point */
+		copy = malloc(end - nptr + 1 + decimal_point_len);
+
+		c = copy;
+		memcpy(c, nptr, decimal_point_pos - nptr);
+		c += decimal_point_pos - nptr;
+		memcpy(c, decimal_point, decimal_point_len);
+		c += decimal_point_len;
+		memcpy(c, decimal_point_pos + 1, end - (decimal_point_pos + 1));
+		c += end - (decimal_point_pos + 1);
+		*c = 0;
+
+		val = strtod(copy, &fail_pos);
+
+		if (fail_pos)
+		{
+			if (fail_pos > decimal_point_pos)
+				fail_pos = (char *)nptr + (fail_pos - copy) - (decimal_point_len - 1);
+			else
+				fail_pos = (char *)nptr + (fail_pos - copy);
+		}
+
+		free(copy);
+
+	}
+	else
+		val = strtod(nptr, &fail_pos);
+
+	if (endptr)
+		*endptr = fail_pos;
+
+	return val;
+}
+
+
+/**
+ * PyOS_ascii_formatd:
+ * @buffer: A buffer to place the resulting string in
+ * @buf_len: The length of the buffer.
+ * @format: The printf()-style format to use for the
+ *          code to use for converting. 
+ * @d: The #gdouble to convert
+ *
+ * Converts a #gdouble to a string, using the '.' as
+ * decimal point. To format the number you pass in
+ * a printf()-style format string. Allowed conversion
+ * specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'. 
+ * 
+ * Return value: The pointer to the buffer with the converted string.
+ **/
+char *
+PyOS_ascii_formatd(char       *buffer, 
+		   int         buf_len, 
+		   const char *format, 
+		   double      d)
+{
+	struct lconv *locale_data;
+	const char *decimal_point;
+	int decimal_point_len;
+	char *p;
+	int rest_len;
+	char format_char;
+
+/* 	g_return_val_if_fail (buffer != NULL, NULL); */
+/* 	g_return_val_if_fail (format[0] == '%', NULL); */
+/* 	g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL); */
+
+	format_char = format[strlen(format) - 1];
+
+/* 	g_return_val_if_fail (format_char == 'e' || format_char == 'E' || */
+/* 			      format_char == 'f' || format_char == 'F' || */
+/* 			      format_char == 'g' || format_char == 'G', */
+/* 			      NULL); */
+
+	if (format[0] != '%')
+		return NULL;
+
+	if (strpbrk(format + 1, "'l%"))
+		return NULL;
+
+	if (!(format_char == 'e' || format_char == 'E' || 
+	      format_char == 'f' || format_char == 'F' || 
+	      format_char == 'g' || format_char == 'G'))
+		return NULL;
+
+
+	PyOS_snprintf(buffer, buf_len, format, d);
+
+	locale_data = localeconv();
+	decimal_point = locale_data->decimal_point;
+	decimal_point_len = strlen(decimal_point);
+
+	assert(decimal_point_len != 0);
+
+	if (decimal_point[0] != '.' || 
+	    decimal_point[1] != 0)
+	{
+		p = buffer;
+
+		if (*p == '+' || *p == '-')
+			p++;
+
+		while (isdigit((unsigned char)*p))
+			p++;
+
+		if (strncmp(p, decimal_point, decimal_point_len) == 0)
+		{
+			*p = '.';
+			p++;
+			if (decimal_point_len > 1) {
+				rest_len = strlen(p + (decimal_point_len - 1));
+				memmove(p, p + (decimal_point_len - 1), 
+					rest_len);
+				p[rest_len] = 0;
+			}
+		}
+	}
+
+	return buffer;
+}
+
+double
+PyOS_ascii_atof(const char *nptr)
+{
+	return PyOS_ascii_strtod(nptr, NULL);
+}
commit	737ea82a5abd448b3e214b44d7d3c579b77e8155	[log] [tgz]
author	Martin v. Löwis <martin@v.loewis.de>	Tue Jun 08 18:52:54 2004 +0000
committer	Martin v. Löwis <martin@v.loewis.de>	Tue Jun 08 18:52:54 2004 +0000
tree	b0f710ba716db5247dbb6ac88e0a20ea8623f716
parent	6ccc9a99dfbb2575daa8e01a8e8e3531b61a9d60 [diff]