blob: 0912cec57ed34695b98251caf9cd98397b9f8eed [file] [log] [blame]
Martin v. Löwis737ea822004-06-08 18:52:54 +00001/* -*- Mode: C; c-file-style: "python" -*- */
2
3#include <Python.h>
4#include <locale.h>
5
6/* ascii character tests (as opposed to locale tests) */
7#define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
8 (c) == '\r' || (c) == '\t' || (c) == '\v')
9#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
Martin v. Löwis737ea822004-06-08 18:52:54 +000010
11
12/**
13 * PyOS_ascii_strtod:
14 * @nptr: the string to convert to a numeric value.
15 * @endptr: if non-%NULL, it returns the character after
16 * the last character used in the conversion.
17 *
18 * Converts a string to a #gdouble value.
19 * This function behaves like the standard strtod() function
20 * does in the C locale. It does this without actually
21 * changing the current locale, since that would not be
22 * thread-safe.
23 *
24 * This function is typically used when reading configuration
25 * files or other non-user input that should be locale independent.
26 * To handle input from the user you should normally use the
27 * locale-sensitive system strtod() function.
28 *
29 * If the correct value would cause overflow, plus or minus %HUGE_VAL
30 * is returned (according to the sign of the value), and %ERANGE is
31 * stored in %errno. If the correct value would cause underflow,
32 * zero is returned and %ERANGE is stored in %errno.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000033 * If memory allocation fails, %ENOMEM is stored in %errno.
Martin v. Löwis737ea822004-06-08 18:52:54 +000034 *
35 * This function resets %errno before calling strtod() so that
36 * you can reliably detect overflow and underflow.
37 *
38 * Return value: the #gdouble value.
39 **/
40double
Neal Norwitze7214a12005-12-18 05:03:17 +000041PyOS_ascii_strtod(const char *nptr, char **endptr)
Martin v. Löwis737ea822004-06-08 18:52:54 +000042{
43 char *fail_pos;
Neal Norwitz0e7a0ed2005-12-18 05:37:36 +000044 double val = -1.0;
Martin v. Löwis737ea822004-06-08 18:52:54 +000045 struct lconv *locale_data;
46 const char *decimal_point;
Neal Norwitzd39d8612006-01-08 01:03:36 +000047 size_t decimal_point_len;
Martin v. Löwis737ea822004-06-08 18:52:54 +000048 const char *p, *decimal_point_pos;
49 const char *end = NULL; /* Silence gcc */
Christian Heimesfaf2f632008-01-06 16:59:19 +000050 const char *digits_pos = NULL;
51 int negate = 0;
Martin v. Löwis737ea822004-06-08 18:52:54 +000052
Martin v. Löwis737ea822004-06-08 18:52:54 +000053 assert(nptr != NULL);
54
55 fail_pos = NULL;
56
57 locale_data = localeconv();
58 decimal_point = locale_data->decimal_point;
59 decimal_point_len = strlen(decimal_point);
60
61 assert(decimal_point_len != 0);
62
63 decimal_point_pos = NULL;
Christian Heimesfaf2f632008-01-06 16:59:19 +000064
65 /* We process any leading whitespace and the optional sign manually,
66 then pass the remainder to the system strtod. This ensures that
67 the result of an underflow has the correct sign. (bug #1725) */
68
69 p = nptr;
70 /* Skip leading space */
71 while (ISSPACE(*p))
72 p++;
73
74 /* Process leading sign, if present */
75 if (*p == '-') {
76 negate = 1;
77 p++;
78 } else if (*p == '+') {
79 p++;
80 }
81
82 /* What's left should begin with a digit, a decimal point, or one of
83 the letters i, I, n, N. It should not begin with 0x or 0X */
84 if ((!ISDIGIT(*p) &&
85 *p != '.' && *p != 'i' && *p != 'I' && *p != 'n' && *p != 'N')
86 ||
87 (*p == '0' && (p[1] == 'x' || p[1] == 'X')))
88 {
89 if (endptr)
90 *endptr = (char*)nptr;
91 errno = EINVAL;
92 return val;
93 }
94 digits_pos = p;
95
Martin v. Löwis737ea822004-06-08 18:52:54 +000096 if (decimal_point[0] != '.' ||
97 decimal_point[1] != 0)
98 {
Neal Norwitze7214a12005-12-18 05:03:17 +000099 while (ISDIGIT(*p))
100 p++;
101
102 if (*p == '.')
Martin v. Löwis737ea822004-06-08 18:52:54 +0000103 {
Neal Norwitze7214a12005-12-18 05:03:17 +0000104 decimal_point_pos = p++;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000105
Martin v. Löwis737ea822004-06-08 18:52:54 +0000106 while (ISDIGIT(*p))
107 p++;
108
Neal Norwitze7214a12005-12-18 05:03:17 +0000109 if (*p == 'e' || *p == 'E')
110 p++;
111 if (*p == '+' || *p == '-')
112 p++;
113 while (ISDIGIT(*p))
114 p++;
115 end = p;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000116 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000117 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
118 {
119 /* Python bug #1417699 */
Christian Heimesfaf2f632008-01-06 16:59:19 +0000120 if (endptr)
121 *endptr = (char*)nptr;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000122 errno = EINVAL;
123 return val;
124 }
Christian Heimesb186d002008-03-18 15:15:01 +0000125 /* For the other cases, we need not convert the decimal
126 point */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000127 }
128
Neal Norwitze7214a12005-12-18 05:03:17 +0000129 /* Set errno to zero, so that we can distinguish zero results
130 and underflows */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000131 errno = 0;
132
133 if (decimal_point_pos)
134 {
135 char *copy, *c;
136
Christian Heimesb186d002008-03-18 15:15:01 +0000137 /* We need to convert the '.' to the locale specific decimal
138 point */
Christian Heimesfaf2f632008-01-06 16:59:19 +0000139 copy = (char *)PyMem_MALLOC(end - digits_pos +
140 1 + decimal_point_len);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000141 if (copy == NULL) {
142 if (endptr)
143 *endptr = (char *)nptr;
144 errno = ENOMEM;
145 return val;
146 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000147
148 c = copy;
Christian Heimesfaf2f632008-01-06 16:59:19 +0000149 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
150 c += decimal_point_pos - digits_pos;
Martin v. Löwis737ea822004-06-08 18:52:54 +0000151 memcpy(c, decimal_point, decimal_point_len);
152 c += decimal_point_len;
Christian Heimesb186d002008-03-18 15:15:01 +0000153 memcpy(c, decimal_point_pos + 1,
154 end - (decimal_point_pos + 1));
Martin v. Löwis737ea822004-06-08 18:52:54 +0000155 c += end - (decimal_point_pos + 1);
156 *c = 0;
157
158 val = strtod(copy, &fail_pos);
159
160 if (fail_pos)
161 {
162 if (fail_pos > decimal_point_pos)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000163 fail_pos = (char *)digits_pos +
164 (fail_pos - copy) -
165 (decimal_point_len - 1);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000166 else
Christian Heimesfaf2f632008-01-06 16:59:19 +0000167 fail_pos = (char *)digits_pos +
168 (fail_pos - copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000169 }
170
Thomas Wouters477c8d52006-05-27 19:21:47 +0000171 PyMem_FREE(copy);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000172
173 }
Neal Norwitze7214a12005-12-18 05:03:17 +0000174 else {
Christian Heimesfaf2f632008-01-06 16:59:19 +0000175 val = strtod(digits_pos, &fail_pos);
Neal Norwitze7214a12005-12-18 05:03:17 +0000176 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000177
Christian Heimesfaf2f632008-01-06 16:59:19 +0000178 if (fail_pos == digits_pos)
179 fail_pos = (char *)nptr;
180
181 if (negate && fail_pos != nptr)
182 val = -val;
183
Martin v. Löwis737ea822004-06-08 18:52:54 +0000184 if (endptr)
185 *endptr = fail_pos;
186
187 return val;
188}
189
Eric Smithb2c7af82008-04-30 02:12:09 +0000190/* Given a string that may have a decimal point in the current
191 locale, change it back to a dot. Since the string cannot get
192 longer, no need for a maximum buffer size parameter. */
193Py_LOCAL_INLINE(void)
194change_decimal_from_locale_to_dot(char* buffer)
195{
196 struct lconv *locale_data = localeconv();
197 const char *decimal_point = locale_data->decimal_point;
198
199 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
200 size_t decimal_point_len = strlen(decimal_point);
201
202 if (*buffer == '+' || *buffer == '-')
203 buffer++;
204 while (isdigit(Py_CHARMASK(*buffer)))
205 buffer++;
206 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
207 *buffer = '.';
208 buffer++;
209 if (decimal_point_len > 1) {
210 /* buffer needs to get smaller */
211 size_t rest_len = strlen(buffer +
212 (decimal_point_len - 1));
213 memmove(buffer,
214 buffer + (decimal_point_len - 1),
215 rest_len);
216 buffer[rest_len] = 0;
217 }
218 }
219 }
220}
221
Martin v. Löwis737ea822004-06-08 18:52:54 +0000222
Christian Heimesc3f30c42008-02-22 16:37:40 +0000223/* From the C99 standard, section 7.19.6:
224The exponent always contains at least two digits, and only as many more digits
225as necessary to represent the exponent.
226*/
227#define MIN_EXPONENT_DIGITS 2
228
Eric Smithb2c7af82008-04-30 02:12:09 +0000229/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
230 in length. */
231Py_LOCAL_INLINE(void)
232ensure_minumim_exponent_length(char* buffer, size_t buf_size)
233{
234 char *p = strpbrk(buffer, "eE");
235 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
236 char *start = p + 2;
237 int exponent_digit_cnt = 0;
238 int leading_zero_cnt = 0;
239 int in_leading_zeros = 1;
240 int significant_digit_cnt;
241
242 /* Skip over the exponent and the sign. */
243 p += 2;
244
245 /* Find the end of the exponent, keeping track of leading
246 zeros. */
247 while (*p && isdigit(Py_CHARMASK(*p))) {
248 if (in_leading_zeros && *p == '0')
249 ++leading_zero_cnt;
250 if (*p != '0')
251 in_leading_zeros = 0;
252 ++p;
253 ++exponent_digit_cnt;
254 }
255
256 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
257 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
258 /* If there are 2 exactly digits, we're done,
259 regardless of what they contain */
260 }
261 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
262 int extra_zeros_cnt;
263
264 /* There are more than 2 digits in the exponent. See
265 if we can delete some of the leading zeros */
266 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
267 significant_digit_cnt = MIN_EXPONENT_DIGITS;
268 extra_zeros_cnt = exponent_digit_cnt -
269 significant_digit_cnt;
270
271 /* Delete extra_zeros_cnt worth of characters from the
272 front of the exponent */
273 assert(extra_zeros_cnt >= 0);
274
275 /* Add one to significant_digit_cnt to copy the
276 trailing 0 byte, thus setting the length */
277 memmove(start,
278 start + extra_zeros_cnt,
279 significant_digit_cnt + 1);
280 }
281 else {
282 /* If there are fewer than 2 digits, add zeros
283 until there are 2, if there's enough room */
284 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
285 if (start + zeros + exponent_digit_cnt + 1
286 < buffer + buf_size) {
287 memmove(start + zeros, start,
288 exponent_digit_cnt + 1);
289 memset(start, '0', zeros);
290 }
291 }
292 }
293}
294
295/* Ensure that buffer has a decimal point in it. The decimal point
296 will not be in the current locale, it will always be '.' */
297Py_LOCAL_INLINE(void)
298ensure_decimal_point(char* buffer, size_t buf_size)
299{
300 int insert_count = 0;
301 char* chars_to_insert;
302
303 /* search for the first non-digit character */
304 char *p = buffer;
305 while (*p && isdigit(Py_CHARMASK(*p)))
306 ++p;
307
308 if (*p == '.') {
309 if (isdigit(Py_CHARMASK(*(p+1)))) {
310 /* Nothing to do, we already have a decimal
311 point and a digit after it */
312 }
313 else {
314 /* We have a decimal point, but no following
315 digit. Insert a zero after the decimal. */
316 ++p;
317 chars_to_insert = "0";
318 insert_count = 1;
319 }
320 }
321 else {
322 chars_to_insert = ".0";
323 insert_count = 2;
324 }
325 if (insert_count) {
326 size_t buf_len = strlen(buffer);
327 if (buf_len + insert_count + 1 >= buf_size) {
328 /* If there is not enough room in the buffer
329 for the additional text, just skip it. It's
330 not worth generating an error over. */
331 }
332 else {
333 memmove(p + insert_count, p,
334 buffer + strlen(buffer) - p + 1);
335 memcpy(p, chars_to_insert, insert_count);
336 }
337 }
338}
339
340/* Add the locale specific grouping characters to buffer. Note
341 that any decimal point (if it's present) in buffer is already
342 locale-specific. Return 0 on error, else 1. */
343Py_LOCAL_INLINE(int)
344add_thousands_grouping(char* buffer, size_t buf_size)
345{
346 struct lconv *locale_data = localeconv();
347 const char *grouping = locale_data->grouping;
348 const char *thousands_sep = locale_data->thousands_sep;
349 size_t thousands_sep_len = strlen(thousands_sep);
350 const char *decimal_point = locale_data->decimal_point;
351 char *pend = buffer + strlen(buffer); /* current end of buffer */
352 char *pmax = buffer + buf_size; /* max of buffer */
353 char current_grouping;
354
355 /* Find the decimal point, if any. We're only concerned
356 about the characters to the left of the decimal when
357 adding grouping. */
358 char *p = strstr(buffer, decimal_point);
359 if (!p) {
360 /* No decimal, use the entire string. */
361
362 /* If any exponent, adjust p. */
363 p = strpbrk(buffer, "eE");
364 if (!p)
365 /* No exponent and no decimal. Use the entire
366 string. */
367 p = pend;
368 }
369 /* At this point, p points just past the right-most character we
370 want to format. We need to add the grouping string for the
371 characters between buffer and p. */
372
373 /* Starting at p and working right-to-left, keep track of
374 what grouping needs to be added and insert that. */
375 current_grouping = *grouping++;
376
377 /* If the first character is 0, perform no grouping at all. */
378 if (current_grouping == 0)
379 return 1;
380
381 while (p - buffer > current_grouping) {
382 /* Always leave buffer and pend valid at the end of this
383 loop, since we might leave with a return statement. */
384
385 /* Is there room to insert thousands_sep_len chars?. */
386 if (pmax - pend <= thousands_sep_len)
387 /* No room. */
388 return 0;
389
390 /* Move the rest of the string down. */
391 p -= current_grouping;
392 memmove(p + thousands_sep_len,
393 p,
394 pend - p + 1);
395 /* Adjust end pointer. */
396 pend += thousands_sep_len;
397 /* Copy the thousands_sep chars into the buffer. */
398 memcpy(p, thousands_sep, thousands_sep_len);
399
400 /* Move to the next grouping character, unless we're
401 repeating (which is designated by a grouping of 0). */
402 if (*grouping != 0) {
403 current_grouping = *grouping++;
404 if (current_grouping == CHAR_MAX)
405 /* We're done. */
406 return 1;
407 }
408 }
409 return 1;
410}
411
Christian Heimesc3f30c42008-02-22 16:37:40 +0000412/* see FORMATBUFLEN in unicodeobject.c */
413#define FLOAT_FORMATBUFLEN 120
414
Martin v. Löwis737ea822004-06-08 18:52:54 +0000415/**
416 * PyOS_ascii_formatd:
417 * @buffer: A buffer to place the resulting string in
Christian Heimesb186d002008-03-18 15:15:01 +0000418 * @buf_size: The length of the buffer.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000419 * @format: The printf()-style format to use for the
420 * code to use for converting.
421 * @d: The #gdouble to convert
422 *
423 * Converts a #gdouble to a string, using the '.' as
424 * decimal point. To format the number you pass in
425 * a printf()-style format string. Allowed conversion
Christian Heimesc3f30c42008-02-22 16:37:40 +0000426 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'.
Martin v. Löwis737ea822004-06-08 18:52:54 +0000427 *
Christian Heimesc3f30c42008-02-22 16:37:40 +0000428 * 'n' is the same as 'g', except it uses the current locale.
Christian Heimesb186d002008-03-18 15:15:01 +0000429 * 'Z' is the same as 'g', except it always has a decimal and
430 * at least one digit after the decimal.
Christian Heimesc3f30c42008-02-22 16:37:40 +0000431 *
Martin v. Löwis737ea822004-06-08 18:52:54 +0000432 * Return value: The pointer to the buffer with the converted string.
433 **/
434char *
435PyOS_ascii_formatd(char *buffer,
Christian Heimesb186d002008-03-18 15:15:01 +0000436 size_t buf_size,
Martin v. Löwis737ea822004-06-08 18:52:54 +0000437 const char *format,
438 double d)
439{
Martin v. Löwis737ea822004-06-08 18:52:54 +0000440 char format_char;
Christian Heimesc3f30c42008-02-22 16:37:40 +0000441 size_t format_len = strlen(format);
442
443 /* For type 'n', we need to make a copy of the format string, because
444 we're going to modify 'n' -> 'g', and format is const char*, so we
445 can't modify it directly. FLOAT_FORMATBUFLEN should be longer than
446 we ever need this to be. There's an upcoming check to ensure it's
447 big enough. */
Christian Heimesb186d002008-03-18 15:15:01 +0000448 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
449 also with at least one character past the decimal. */
Christian Heimesc3f30c42008-02-22 16:37:40 +0000450 char tmp_format[FLOAT_FORMATBUFLEN];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000451
Christian Heimesc3f30c42008-02-22 16:37:40 +0000452 /* The last character in the format string must be the format char */
453 format_char = format[format_len - 1];
Martin v. Löwis737ea822004-06-08 18:52:54 +0000454
Martin v. Löwis737ea822004-06-08 18:52:54 +0000455 if (format[0] != '%')
456 return NULL;
457
Christian Heimesc3f30c42008-02-22 16:37:40 +0000458 /* I'm not sure why this test is here. It's ensuring that the format
459 string after the first character doesn't have a single quote, a
460 lowercase l, or a percent. This is the reverse of the commented-out
461 test about 10 lines ago. */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000462 if (strpbrk(format + 1, "'l%"))
463 return NULL;
464
Christian Heimesb186d002008-03-18 15:15:01 +0000465 /* Also curious about this function is that it accepts format strings
466 like "%xg", which are invalid for floats. In general, the
467 interface to this function is not very good, but changing it is
468 difficult because it's a public API. */
469
Martin v. Löwis737ea822004-06-08 18:52:54 +0000470 if (!(format_char == 'e' || format_char == 'E' ||
471 format_char == 'f' || format_char == 'F' ||
Christian Heimesc3f30c42008-02-22 16:37:40 +0000472 format_char == 'g' || format_char == 'G' ||
Christian Heimesb186d002008-03-18 15:15:01 +0000473 format_char == 'n' || format_char == 'Z'))
Martin v. Löwis737ea822004-06-08 18:52:54 +0000474 return NULL;
475
Christian Heimesb186d002008-03-18 15:15:01 +0000476 /* Map 'n' or 'Z' format_char to 'g', by copying the format string and
477 replacing the final char with a 'g' */
478 if (format_char == 'n' || format_char == 'Z') {
Christian Heimesc3f30c42008-02-22 16:37:40 +0000479 if (format_len + 1 >= sizeof(tmp_format)) {
480 /* The format won't fit in our copy. Error out. In
Christian Heimesb186d002008-03-18 15:15:01 +0000481 practice, this will never happen and will be
482 detected by returning NULL */
Christian Heimesc3f30c42008-02-22 16:37:40 +0000483 return NULL;
484 }
485 strcpy(tmp_format, format);
486 tmp_format[format_len - 1] = 'g';
487 format = tmp_format;
488 }
Martin v. Löwis737ea822004-06-08 18:52:54 +0000489
Christian Heimesb186d002008-03-18 15:15:01 +0000490
Christian Heimesc3f30c42008-02-22 16:37:40 +0000491 /* Have PyOS_snprintf do the hard work */
Christian Heimesb186d002008-03-18 15:15:01 +0000492 PyOS_snprintf(buffer, buf_size, format, d);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000493
Eric Smithb2c7af82008-04-30 02:12:09 +0000494 /* Do various fixups on the return string */
Martin v. Löwis737ea822004-06-08 18:52:54 +0000495
Eric Smithb2c7af82008-04-30 02:12:09 +0000496 /* Get the current locale, and find the decimal point string.
497 Convert that string back to a dot. Do not do this if using the
498 'n' (number) format code, since we want to keep the localized
499 decimal point in that case. */
500 if (format_char != 'n')
501 change_decimal_from_locale_to_dot(buffer);
Christian Heimesc3f30c42008-02-22 16:37:40 +0000502
503 /* If an exponent exists, ensure that the exponent is at least
504 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
505 for the extra zeros. Also, if there are more than
506 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
507 back to MIN_EXPONENT_DIGITS */
Eric Smithb2c7af82008-04-30 02:12:09 +0000508 ensure_minumim_exponent_length(buffer, buf_size);
Martin v. Löwis737ea822004-06-08 18:52:54 +0000509
Christian Heimesb186d002008-03-18 15:15:01 +0000510 /* If format_char is 'Z', make sure we have at least one character
511 after the decimal point (and make sure we have a decimal point). */
Eric Smithb2c7af82008-04-30 02:12:09 +0000512 if (format_char == 'Z')
513 ensure_decimal_point(buffer, buf_size);
Christian Heimesb186d002008-03-18 15:15:01 +0000514
Eric Smithb2c7af82008-04-30 02:12:09 +0000515 /* If format_char is 'n', add the thousands grouping. */
516 if (format_char == 'n')
517 if (!add_thousands_grouping(buffer, buf_size))
518 return NULL;
Christian Heimesb186d002008-03-18 15:15:01 +0000519
Martin v. Löwis737ea822004-06-08 18:52:54 +0000520 return buffer;
521}
522
523double
524PyOS_ascii_atof(const char *nptr)
525{
526 return PyOS_ascii_strtod(nptr, NULL);
527}