blob: 2ca84021d8b29a3485a49df266723dd12a2ada63 [file] [log] [blame]
Martin v. Löwis737ea822004-06-08 18:52:54 +00001/* -*- Mode: C; c-file-style: "python" -*- */
2
3#include <Python.h>
4#include <locale.h>
5
6/* ascii character tests (as opposed to locale tests) */
7#define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
8 (c) == '\r' || (c) == '\t' || (c) == '\v')
9#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
Martin v. Löwis737ea822004-06-08 18:52:54 +000010
11
12/**
13 * PyOS_ascii_strtod:
14 * @nptr: the string to convert to a numeric value.
15 * @endptr: if non-%NULL, it returns the character after
16 * the last character used in the conversion.
17 *
18 * Converts a string to a #gdouble value.
19 * This function behaves like the standard strtod() function
20 * does in the C locale. It does this without actually
21 * changing the current locale, since that would not be
22 * thread-safe.
23 *
24 * This function is typically used when reading configuration
25 * files or other non-user input that should be locale independent.
26 * To handle input from the user you should normally use the
27 * locale-sensitive system strtod() function.
28 *
29 * If the correct value would cause overflow, plus or minus %HUGE_VAL
30 * is returned (according to the sign of the value), and %ERANGE is
31 * stored in %errno. If the correct value would cause underflow,
32 * zero is returned and %ERANGE is stored in %errno.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000033 * If memory allocation fails, %ENOMEM is stored in %errno.
Martin v. Löwis737ea822004-06-08 18:52:54 +000034 *
35 * This function resets %errno before calling strtod() so that
36 * you can reliably detect overflow and underflow.
37 *
38 * Return value: the #gdouble value.
39 **/
40double
Neal Norwitze7214a12005-12-18 05:03:17 +000041PyOS_ascii_strtod(const char *nptr, char **endptr)
Martin v. Löwis737ea822004-06-08 18:52:54 +000042{
43 char *fail_pos;
Neal Norwitz0e7a0ed2005-12-18 05:37:36 +000044 double val = -1.0;
Martin v. Löwis737ea822004-06-08 18:52:54 +000045 struct lconv *locale_data;
46 const char *decimal_point;
Neal Norwitzd39d8612006-01-08 01:03:36 +000047 size_t decimal_point_len;
Martin v. Löwis737ea822004-06-08 18:52:54 +000048 const char *p, *decimal_point_pos;
49 const char *end = NULL; /* Silence gcc */
Christian Heimesfaf2f632008-01-06 16:59:19 +000050 const char *digits_pos = NULL;
51 int negate = 0;
Martin v. Löwis737ea822004-06-08 18:52:54 +000052
Martin v. Löwis737ea822004-06-08 18:52:54 +000053 assert(nptr != NULL);
54
55 fail_pos = NULL;
56
57 locale_data = localeconv();
58 decimal_point = locale_data->decimal_point;
59 decimal_point_len = strlen(decimal_point);
60
61 assert(decimal_point_len != 0);
62
63 decimal_point_pos = NULL;
Christian Heimesfaf2f632008-01-06 16:59:19 +000064
65 /* We process any leading whitespace and the optional sign manually,
66 then pass the remainder to the system strtod. This ensures that
67 the result of an underflow has the correct sign. (bug #1725) */
68
69 p = nptr;
70 /* Skip leading space */
71 while (ISSPACE(*p))
72 p++;
73
74 /* Process leading sign, if present */
75 if (*p == '-') {
76 negate = 1;
77 p++;
78 } else if (*p == '+') {
79 p++;
80 }
81
82 /* What's left should begin with a digit, a decimal point, or one of
83 the letters i, I, n, N. It should not begin with 0x or 0X */
84 if ((!ISDIGIT(*p) &&
85 *p != '.' && *p != 'i' && *p != 'I' && *p != 'n' && *p != 'N')
86 ||
87 (*p == '0' && (p[1] == 'x' || p[1] == 'X')))
88 {
89 if (endptr)
90 *endptr = (char*)nptr;
91 errno = EINVAL;
92 return val;
93 }
94 digits_pos = p;
95
Martin v. Löwis737ea822004-06-08 18:52:54 +000096 if (decimal_point[0] != '.' ||
97 decimal_point[1] != 0)
98 {
Neal Norwitze7214a12005-12-18 05:03:17 +000099 while (ISDIGIT(*p))
100 p++;
101
102 if (*p == '.')
Martin v. Löwis737ea822004-06-08 18:52:54 +0000103 {
Neal Norwitze7214a12005-12-18 05:03:17 +0000104 decimal_point_pos = p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000105
Martin v. Löwis737ea822004-06-08 18:52:54 +0000106 while (ISDIGIT(*p))
107 p++;
108
Neal Norwitze7214a12005-12-18 05:03:17 +0000109 if (*p == 'e' || *p == 'E')
110 p++;
111 if (*p == '+' || *p == '-')
112 p++;
113 while (ISDIGIT(*p))
114 p++;
115 end = p;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000116 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000117 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
118 {
119 /* Python bug #1417699 */
Christian Heimesfaf2f632008-01-06 16:59:19 +0000120 if (endptr)
121 *endptr = (char*)nptr;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000122 errno = EINVAL;
123 return val;
124 }
Christian Heimesb186d002008-03-18 15:15:01 +0000125 /* For the other cases, we need not convert the decimal
126 point */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000127 }
128
Neal Norwitze7214a12005-12-18 05:03:17 +0000129 /* Set errno to zero, so that we can distinguish zero results
130 and underflows */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000131 errno = 0;
132
133 if (decimal_point_pos)
134 {
135 char *copy, *c;
136
Christian Heimesb186d002008-03-18 15:15:01 +0000137 /* We need to convert the '.' to the locale specific decimal
138 point */
Christian Heimesfaf2f632008-01-06 16:59:19 +0000139 copy = (char *)PyMem_MALLOC(end - digits_pos +
140 1 + decimal_point_len);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000141 if (copy == NULL) {
142 if (endptr)
143 *endptr = (char *)nptr;
144 errno = ENOMEM;
145 return val;
146 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000147
148 c = copy;
Christian Heimesfaf2f632008-01-06 16:59:19 +0000149 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
150 c += decimal_point_pos - digits_pos;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000151 memcpy(c, decimal_point, decimal_point_len);
152 c += decimal_point_len;
Christian Heimesb186d002008-03-18 15:15:01 +0000153 memcpy(c, decimal_point_pos + 1,
154 end - (decimal_point_pos + 1));
Martin v. Löwis737ea822004-06-08 18:52:54 +0000155 c += end - (decimal_point_pos + 1);
156 *c = 0;
157
158 val = strtod(copy, &fail_pos);
159
160 if (fail_pos)
161 {
162 if (fail_pos > decimal_point_pos)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000163 fail_pos = (char *)digits_pos +
164 (fail_pos - copy) -
165 (decimal_point_len - 1);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000166 else
Christian Heimesfaf2f632008-01-06 16:59:19 +0000167 fail_pos = (char *)digits_pos +
168 (fail_pos - copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000169 }
170
Thomas Wouters477c8d52006-05-27 19:21:47 +0000171 PyMem_FREE(copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000172
173 }
Neal Norwitze7214a12005-12-18 05:03:17 +0000174 else {
Christian Heimesfaf2f632008-01-06 16:59:19 +0000175 val = strtod(digits_pos, &fail_pos);
Neal Norwitze7214a12005-12-18 05:03:17 +0000176 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000177
Christian Heimesfaf2f632008-01-06 16:59:19 +0000178 if (fail_pos == digits_pos)
179 fail_pos = (char *)nptr;
180
181 if (negate && fail_pos != nptr)
182 val = -val;
183
Martin v. Löwis737ea822004-06-08 18:52:54 +0000184 if (endptr)
185 *endptr = fail_pos;
186
187 return val;
188}
189
190
Christian Heimesc3f30c42008-02-22 16:37:40 +0000191/* From the C99 standard, section 7.19.6:
192The exponent always contains at least two digits, and only as many more digits
193as necessary to represent the exponent.
194*/
195#define MIN_EXPONENT_DIGITS 2
196
197/* see FORMATBUFLEN in unicodeobject.c */
198#define FLOAT_FORMATBUFLEN 120
199
Martin v. Löwis737ea822004-06-08 18:52:54 +0000200/**
201 * PyOS_ascii_formatd:
202 * @buffer: A buffer to place the resulting string in
Christian Heimesb186d002008-03-18 15:15:01 +0000203 * @buf_size: The length of the buffer.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000204 * @format: The printf()-style format to use for the
205 * code to use for converting.
206 * @d: The #gdouble to convert
207 *
208 * Converts a #gdouble to a string, using the '.' as
209 * decimal point. To format the number you pass in
210 * a printf()-style format string. Allowed conversion
Christian Heimesc3f30c42008-02-22 16:37:40 +0000211 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000212 *
Christian Heimesc3f30c42008-02-22 16:37:40 +0000213 * 'n' is the same as 'g', except it uses the current locale.
Christian Heimesb186d002008-03-18 15:15:01 +0000214 * 'Z' is the same as 'g', except it always has a decimal and
215 * at least one digit after the decimal.
Christian Heimesc3f30c42008-02-22 16:37:40 +0000216 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000217 * Return value: The pointer to the buffer with the converted string.
218 **/
219char *
220PyOS_ascii_formatd(char *buffer,
Christian Heimesb186d002008-03-18 15:15:01 +0000221 size_t buf_size,
Martin v. Löwis737ea822004-06-08 18:52:54 +0000222 const char *format,
223 double d)
224{
Martin v. Löwis737ea822004-06-08 18:52:54 +0000225 char *p;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000226 char format_char;
Christian Heimesc3f30c42008-02-22 16:37:40 +0000227 size_t format_len = strlen(format);
228
229 /* For type 'n', we need to make a copy of the format string, because
230 we're going to modify 'n' -> 'g', and format is const char*, so we
231 can't modify it directly. FLOAT_FORMATBUFLEN should be longer than
232 we ever need this to be. There's an upcoming check to ensure it's
233 big enough. */
Christian Heimesb186d002008-03-18 15:15:01 +0000234 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
235 also with at least one character past the decimal. */
Christian Heimesc3f30c42008-02-22 16:37:40 +0000236 char tmp_format[FLOAT_FORMATBUFLEN];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000237
Christian Heimesc3f30c42008-02-22 16:37:40 +0000238 /* The last character in the format string must be the format char */
239 format_char = format[format_len - 1];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000240
Martin v. Löwis737ea822004-06-08 18:52:54 +0000241 if (format[0] != '%')
242 return NULL;
243
Christian Heimesc3f30c42008-02-22 16:37:40 +0000244 /* I'm not sure why this test is here. It's ensuring that the format
245 string after the first character doesn't have a single quote, a
246 lowercase l, or a percent. This is the reverse of the commented-out
247 test about 10 lines ago. */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000248 if (strpbrk(format + 1, "'l%"))
249 return NULL;
250
Christian Heimesb186d002008-03-18 15:15:01 +0000251 /* Also curious about this function is that it accepts format strings
252 like "%xg", which are invalid for floats. In general, the
253 interface to this function is not very good, but changing it is
254 difficult because it's a public API. */
255
Martin v. Löwis737ea822004-06-08 18:52:54 +0000256 if (!(format_char == 'e' || format_char == 'E' ||
257 format_char == 'f' || format_char == 'F' ||
Christian Heimesc3f30c42008-02-22 16:37:40 +0000258 format_char == 'g' || format_char == 'G' ||
Christian Heimesb186d002008-03-18 15:15:01 +0000259 format_char == 'n' || format_char == 'Z'))
Martin v. Löwis737ea822004-06-08 18:52:54 +0000260 return NULL;
261
Christian Heimesb186d002008-03-18 15:15:01 +0000262 /* Map 'n' or 'Z' format_char to 'g', by copying the format string and
263 replacing the final char with a 'g' */
264 if (format_char == 'n' || format_char == 'Z') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000265 if (format_len + 1 >= sizeof(tmp_format)) {
266 /* The format won't fit in our copy. Error out. In
Christian Heimesb186d002008-03-18 15:15:01 +0000267 practice, this will never happen and will be
268 detected by returning NULL */
Christian Heimesc3f30c42008-02-22 16:37:40 +0000269 return NULL;
270 }
271 strcpy(tmp_format, format);
272 tmp_format[format_len - 1] = 'g';
273 format = tmp_format;
274 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000275
Christian Heimesb186d002008-03-18 15:15:01 +0000276
Christian Heimesc3f30c42008-02-22 16:37:40 +0000277 /* Have PyOS_snprintf do the hard work */
Christian Heimesb186d002008-03-18 15:15:01 +0000278 PyOS_snprintf(buffer, buf_size, format, d);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000279
Christian Heimesc3f30c42008-02-22 16:37:40 +0000280 /* Get the current local, and find the decimal point character (or
281 string?). Convert that string back to a dot. Do not do this if
282 using the 'n' (number) format code. */
283 if (format_char != 'n') {
284 struct lconv *locale_data = localeconv();
285 const char *decimal_point = locale_data->decimal_point;
286 size_t decimal_point_len = strlen(decimal_point);
287 size_t rest_len;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000288
Christian Heimesc3f30c42008-02-22 16:37:40 +0000289 assert(decimal_point_len != 0);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000290
Christian Heimesc3f30c42008-02-22 16:37:40 +0000291 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
292 p = buffer;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000293
Christian Heimesc3f30c42008-02-22 16:37:40 +0000294 if (*p == '+' || *p == '-')
295 p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000296
Christian Heimesc3f30c42008-02-22 16:37:40 +0000297 while (isdigit(Py_CHARMASK(*p)))
298 p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000299
Christian Heimesb186d002008-03-18 15:15:01 +0000300 if (strncmp(p, decimal_point,
301 decimal_point_len) == 0) {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000302 *p = '.';
303 p++;
304 if (decimal_point_len > 1) {
305 rest_len = strlen(p +
306 (decimal_point_len - 1));
307 memmove(p, p + (decimal_point_len - 1),
308 rest_len);
309 p[rest_len] = 0;
310 }
311 }
312 }
313 }
314
315 /* If an exponent exists, ensure that the exponent is at least
316 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
317 for the extra zeros. Also, if there are more than
318 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
319 back to MIN_EXPONENT_DIGITS */
320 p = strpbrk(buffer, "eE");
321 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
322 char *start = p + 2;
323 int exponent_digit_cnt = 0;
324 int leading_zero_cnt = 0;
325 int in_leading_zeros = 1;
326 int significant_digit_cnt;
327
328 p += 2;
329 while (*p && isdigit(Py_CHARMASK(*p))) {
330 if (in_leading_zeros && *p == '0')
331 ++leading_zero_cnt;
332 if (*p != '0')
333 in_leading_zeros = 0;
334 ++p;
335 ++exponent_digit_cnt;
336 }
337
338 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
339 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
340 /* If there are 2 exactly digits, we're done,
341 regardless of what they contain */
342 }
343 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
344 int extra_zeros_cnt;
345
346 /* There are more than 2 digits in the exponent. See
347 if we can delete some of the leading zeros */
348 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
349 significant_digit_cnt = MIN_EXPONENT_DIGITS;
Christian Heimesb186d002008-03-18 15:15:01 +0000350 extra_zeros_cnt = exponent_digit_cnt -
351 significant_digit_cnt;
Christian Heimesc3f30c42008-02-22 16:37:40 +0000352
353 /* Delete extra_zeros_cnt worth of characters from the
354 front of the exponent */
355 assert(extra_zeros_cnt >= 0);
356
357 /* Add one to significant_digit_cnt to copy the
358 trailing 0 byte, thus setting the length */
359 memmove(start,
360 start + extra_zeros_cnt,
361 significant_digit_cnt + 1);
362 }
363 else {
364 /* If there are fewer than 2 digits, add zeros
365 until there are 2, if there's enough room */
366 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
367 if (start + zeros + exponent_digit_cnt + 1
Christian Heimesb186d002008-03-18 15:15:01 +0000368 < buffer + buf_size) {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000369 memmove(start + zeros, start,
370 exponent_digit_cnt + 1);
371 memset(start, '0', zeros);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000372 }
373 }
374 }
375
Christian Heimesb186d002008-03-18 15:15:01 +0000376 /* If format_char is 'Z', make sure we have at least one character
377 after the decimal point (and make sure we have a decimal point). */
378 if (format_char == 'Z') {
379 int insert_count = 0;
380 char* chars_to_insert;
381
382 /* search for the first non-digit character */
383 p = buffer;
384 while (*p && isdigit(Py_CHARMASK(*p)))
385 ++p;
386
387 if (*p == '.') {
388 if (isdigit(Py_CHARMASK(*(p+1)))) {
389 /* Nothing to do, we already have a decimal
390 point and a digit after it */
391 }
392 else {
393 /* We have a decimal point, but no following
394 digit. Insert a zero after the decimal. */
395 ++p;
396 chars_to_insert = "0";
397 insert_count = 1;
398 }
399 }
400 else {
401 chars_to_insert = ".0";
402 insert_count = 2;
403 }
404 if (insert_count) {
405 size_t buf_len = strlen(buffer);
406 if (buf_len + insert_count + 1 >= buf_size) {
407 /* If there is not enough room in the buffer
408 for the additional text, just skip it. It's
409 not worth generating an error over. */
410 }
411 else {
412 memmove(p + insert_count, p,
413 buffer + strlen(buffer) - p + 1);
414 memcpy(p, chars_to_insert, insert_count);
415 }
416 }
417 }
418
Martin v. Löwis737ea822004-06-08 18:52:54 +0000419 return buffer;
420}
421
422double
423PyOS_ascii_atof(const char *nptr)
424{
425 return PyOS_ascii_strtod(nptr, NULL);
426}