Blame - Python/pystrtod.c - platform/external/python/cpython2

blob: 79f63e2603538d2f72964613eb28a4d15cf7fcf0 [file] [log] [blame]

Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	1	/* -- Mode: C; c-file-style: "python" -- */
				2
				3	#include <Python.h>
				4	#include <locale.h>
				5
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	6	/**
				7	* PyOS_ascii_strtod:
				8	* @nptr: the string to convert to a numeric value.
				9	* @endptr: if non-%NULL, it returns the character after
				10	* the last character used in the conversion.
				11	*
				12	* Converts a string to a #gdouble value.
				13	* This function behaves like the standard strtod() function
				14	* does in the C locale. It does this without actually
				15	* changing the current locale, since that would not be
				16	* thread-safe.
				17	*
				18	* This function is typically used when reading configuration
				19	* files or other non-user input that should be locale independent.
				20	* To handle input from the user you should normally use the
				21	* locale-sensitive system strtod() function.
				22	*
				23	* If the correct value would cause overflow, plus or minus %HUGE_VAL
				24	* is returned (according to the sign of the value), and %ERANGE is
				25	* stored in %errno. If the correct value would cause underflow,
				26	* zero is returned and %ERANGE is stored in %errno.
Georg Brandl	b569ee4	2006-05-29 14:28:05 +0000	[diff] [blame]	27	* If memory allocation fails, %ENOMEM is stored in %errno.
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	28	*
				29	* This function resets %errno before calling strtod() so that
				30	* you can reliably detect overflow and underflow.
				31	*
				32	* Return value: the #gdouble value.
				33	**/
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	34
				35	/*
				36	Use system strtod; since strtod is locale aware, we may
				37	have to first fix the decimal separator.
				38
				39	Note that unlike _Py_dg_strtod, the system strtod may not always give
				40	correctly rounded results.
				41	*/
				42
Mark Dickinson	01fce5a	2009-05-03 22:33:34 +0000	[diff] [blame]	43	/* Case-insensitive string match used for nan and inf detection; t should be
				44	lower-case. Returns 1 for a successful match, 0 otherwise. */
				45
				46	static int
				47	case_insensitive_match(const char s, const char t)
				48	{
				49	while(t && Py_TOLOWER(s) == *t) {
				50	s++;
				51	t++;
				52	}
				53	return *t ? 0 : 1;
				54	}
				55
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	56	double
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	57	PyOS_ascii_strtod(const char nptr, char *endptr)
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	58	{
				59	char *fail_pos;
Neal Norwitz	0e7a0ed	2005-12-18 05:37:36 +0000	[diff] [blame]	60	double val = -1.0;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	61	struct lconv *locale_data;
				62	const char *decimal_point;
Neal Norwitz	d39d861	2006-01-08 01:03:36 +0000	[diff] [blame]	63	size_t decimal_point_len;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	64	const char p, decimal_point_pos;
				65	const char end = NULL; / Silence gcc */
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	66	const char *digits_pos = NULL;
				67	int negate = 0;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	68
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	69	assert(nptr != NULL);
				70
				71	fail_pos = NULL;
				72
				73	locale_data = localeconv();
				74	decimal_point = locale_data->decimal_point;
				75	decimal_point_len = strlen(decimal_point);
				76
				77	assert(decimal_point_len != 0);
				78
				79	decimal_point_pos = NULL;
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	80
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	81	/* Set errno to zero, so that we can distinguish zero results
				82	and underflows */
				83	errno = 0;
				84
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	85	/* We process any leading whitespace and the optional sign manually,
				86	then pass the remainder to the system strtod. This ensures that
				87	the result of an underflow has the correct sign. (bug #1725) */
				88
				89	p = nptr;
				90	/* Skip leading space */
Eric Smith	cac7af6	2009-04-27 19:04:37 +0000	[diff] [blame]	91	while (Py_ISSPACE(*p))
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	92	p++;
				93
				94	/* Process leading sign, if present */
				95	if (*p == '-') {
				96	negate = 1;
				97	p++;
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	98	}
				99	else if (*p == '+') {
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	100	p++;
				101	}
				102
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	103	/* Parse infinities and nans */
				104	if (p == 'i' \|\| p == 'I') {
Mark Dickinson	01fce5a	2009-05-03 22:33:34 +0000	[diff] [blame]	105	if (case_insensitive_match(p+1, "nf")) {
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	106	val = Py_HUGE_VAL;
Mark Dickinson	01fce5a	2009-05-03 22:33:34 +0000	[diff] [blame]	107	if (case_insensitive_match(p+3, "inity"))
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	108	fail_pos = (char *)p+8;
				109	else
				110	fail_pos = (char *)p+3;
				111	goto got_val;
				112	}
				113	else
				114	goto invalid_string;
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	115	}
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	116	#ifdef Py_NAN
				117	if (p == 'n' \|\| p == 'N') {
Mark Dickinson	01fce5a	2009-05-03 22:33:34 +0000	[diff] [blame]	118	if (case_insensitive_match(p+1, "an")) {
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	119	val = Py_NAN;
				120	fail_pos = (char *)p+3;
				121	goto got_val;
				122	}
				123	else
				124	goto invalid_string;
				125	}
				126	#endif
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	127
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	128	/* Some platform strtods accept hex floats; Python shouldn't (at the
				129	moment), so we check explicitly for strings starting with '0x'. */
				130	if (p == '0' && ((p+1) == 'x' \|\| *(p+1) == 'X'))
				131	goto invalid_string;
				132
				133	/* Check that what's left begins with a digit or decimal point */
Eric Smith	cac7af6	2009-04-27 19:04:37 +0000	[diff] [blame]	134	if (!Py_ISDIGIT(p) && p != '.')
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	135	goto invalid_string;
				136
				137	digits_pos = p;
				138	if (decimal_point[0] != '.' \|\|
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	139	decimal_point[1] != 0)
				140	{
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	141	/* Look for a '.' in the input; if present, it'll need to be
				142	swapped for the current locale's decimal point before we
				143	call strtod. On the other hand, if we find the current
				144	locale's decimal point then the input is invalid. */
Eric Smith	cac7af6	2009-04-27 19:04:37 +0000	[diff] [blame]	145	while (Py_ISDIGIT(*p))
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	146	p++;
				147
				148	if (*p == '.')
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	149	{
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	150	decimal_point_pos = p++;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	151
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	152	/* locate end of number */
Eric Smith	cac7af6	2009-04-27 19:04:37 +0000	[diff] [blame]	153	while (Py_ISDIGIT(*p))
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	154	p++;
				155
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	156	if (p == 'e' \|\| p == 'E')
				157	p++;
				158	if (p == '+' \|\| p == '-')
				159	p++;
Eric Smith	cac7af6	2009-04-27 19:04:37 +0000	[diff] [blame]	160	while (Py_ISDIGIT(*p))
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	161	p++;
				162	end = p;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	163	}
Martin v. Löwis	fcfff0a	2006-07-03 12:19:50 +0000	[diff] [blame]	164	else if (strncmp(p, decimal_point, decimal_point_len) == 0)
Martin v. Löwis	fcfff0a	2006-07-03 12:19:50 +0000	[diff] [blame]	165	/* Python bug #1417699 */
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	166	goto invalid_string;
Eric Smith	5c35a9d	2008-03-17 12:14:29 +0000	[diff] [blame]	167	/* For the other cases, we need not convert the decimal
				168	point */
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	169	}
				170
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	171	if (decimal_point_pos) {
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	172	char copy, c;
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	173	/* Create a copy of the input, with the '.' converted to the
				174	locale-specific decimal point */
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	175	copy = (char *)PyMem_MALLOC(end - digits_pos +
				176	1 + decimal_point_len);
Georg Brandl	b569ee4	2006-05-29 14:28:05 +0000	[diff] [blame]	177	if (copy == NULL) {
				178	if (endptr)
Georg Brandl	80181e2	2006-05-29 14:33:55 +0000	[diff] [blame]	179	endptr = (char )nptr;
Georg Brandl	b569ee4	2006-05-29 14:28:05 +0000	[diff] [blame]	180	errno = ENOMEM;
				181	return val;
				182	}
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	183
				184	c = copy;
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	185	memcpy(c, digits_pos, decimal_point_pos - digits_pos);
				186	c += decimal_point_pos - digits_pos;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	187	memcpy(c, decimal_point, decimal_point_len);
				188	c += decimal_point_len;
Eric Smith	5c35a9d	2008-03-17 12:14:29 +0000	[diff] [blame]	189	memcpy(c, decimal_point_pos + 1,
				190	end - (decimal_point_pos + 1));
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	191	c += end - (decimal_point_pos + 1);
				192	*c = 0;
				193
				194	val = strtod(copy, &fail_pos);
				195
				196	if (fail_pos)
				197	{
				198	if (fail_pos > decimal_point_pos)
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	199	fail_pos = (char *)digits_pos +
				200	(fail_pos - copy) -
				201	(decimal_point_len - 1);
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	202	else
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	203	fail_pos = (char *)digits_pos +
				204	(fail_pos - copy);
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	205	}
				206
Brett Cannon	0ed0587	2006-05-25 20:44:08 +0000	[diff] [blame]	207	PyMem_FREE(copy);
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	208
				209	}
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	210	else {
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	211	val = strtod(digits_pos, &fail_pos);
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	212	}
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	213
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	214	if (fail_pos == digits_pos)
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	215	goto invalid_string;
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	216
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	217	got_val:
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	218	if (negate && fail_pos != nptr)
				219	val = -val;
				220
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	221	if (endptr)
				222	*endptr = fail_pos;
				223
				224	return val;
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	225
				226	invalid_string:
				227	if (endptr)
				228	endptr = (char)nptr;
				229	errno = EINVAL;
				230	return -1.0;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	231	}
				232
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	233	double
				234	PyOS_ascii_atof(const char *nptr)
				235	{
				236	return PyOS_ascii_strtod(nptr, NULL);
				237	}
				238
				239
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	240	/* Given a string that may have a decimal point in the current
				241	locale, change it back to a dot. Since the string cannot get
				242	longer, no need for a maximum buffer size parameter. */
				243	Py_LOCAL_INLINE(void)
				244	change_decimal_from_locale_to_dot(char* buffer)
				245	{
				246	struct lconv *locale_data = localeconv();
				247	const char *decimal_point = locale_data->decimal_point;
				248
				249	if (decimal_point[0] != '.' \|\| decimal_point[1] != 0) {
				250	size_t decimal_point_len = strlen(decimal_point);
				251
				252	if (buffer == '+' \|\| buffer == '-')
				253	buffer++;
Eric Smith	cac7af6	2009-04-27 19:04:37 +0000	[diff] [blame]	254	while (Py_ISDIGIT(*buffer))
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	255	buffer++;
				256	if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
				257	*buffer = '.';
				258	buffer++;
				259	if (decimal_point_len > 1) {
				260	/* buffer needs to get smaller */
				261	size_t rest_len = strlen(buffer +
				262	(decimal_point_len - 1));
				263	memmove(buffer,
				264	buffer + (decimal_point_len - 1),
				265	rest_len);
				266	buffer[rest_len] = 0;
				267	}
				268	}
				269	}
				270	}
				271
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	272
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	273	Py_LOCAL_INLINE(void)
				274	ensure_sign(char* buffer, size_t buf_size)
				275	{
Eric Smith	94cc00c	2009-04-28 07:33:09 +0000	[diff] [blame]	276	size_t len;
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	277
				278	if (buffer[0] == '-')
				279	/* Already have a sign. */
				280	return;
				281
				282	/* Include the trailing 0 byte. */
				283	len = strlen(buffer)+1;
				284	if (len >= buf_size+1)
				285	/* No room for the sign, don't do anything. */
				286	return;
				287
				288	memmove(buffer+1, buffer, len);
				289	buffer[0] = '+';
				290	}
				291
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	292	/* From the C99 standard, section 7.19.6:
				293	The exponent always contains at least two digits, and only as many more digits
				294	as necessary to represent the exponent.
				295	*/
				296	#define MIN_EXPONENT_DIGITS 2
				297
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	298	/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
				299	in length. */
				300	Py_LOCAL_INLINE(void)
Mark Dickinson	e73cbe7	2009-04-26 19:54:55 +0000	[diff] [blame]	301	ensure_minimum_exponent_length(char* buffer, size_t buf_size)
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	302	{
				303	char *p = strpbrk(buffer, "eE");
				304	if (p && ((p + 1) == '-' \|\| (p + 1) == '+')) {
				305	char *start = p + 2;
				306	int exponent_digit_cnt = 0;
				307	int leading_zero_cnt = 0;
				308	int in_leading_zeros = 1;
				309	int significant_digit_cnt;
				310
				311	/* Skip over the exponent and the sign. */
				312	p += 2;
				313
				314	/* Find the end of the exponent, keeping track of leading
				315	zeros. */
Eric Smith	cac7af6	2009-04-27 19:04:37 +0000	[diff] [blame]	316	while (p && Py_ISDIGIT(p)) {
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	317	if (in_leading_zeros && *p == '0')
				318	++leading_zero_cnt;
				319	if (*p != '0')
				320	in_leading_zeros = 0;
				321	++p;
				322	++exponent_digit_cnt;
				323	}
				324
				325	significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
				326	if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
				327	/* If there are 2 exactly digits, we're done,
				328	regardless of what they contain */
				329	}
				330	else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
				331	int extra_zeros_cnt;
				332
				333	/* There are more than 2 digits in the exponent. See
				334	if we can delete some of the leading zeros */
				335	if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
				336	significant_digit_cnt = MIN_EXPONENT_DIGITS;
				337	extra_zeros_cnt = exponent_digit_cnt -
				338	significant_digit_cnt;
				339
				340	/* Delete extra_zeros_cnt worth of characters from the
				341	front of the exponent */
				342	assert(extra_zeros_cnt >= 0);
				343
				344	/* Add one to significant_digit_cnt to copy the
				345	trailing 0 byte, thus setting the length */
				346	memmove(start,
				347	start + extra_zeros_cnt,
				348	significant_digit_cnt + 1);
				349	}
				350	else {
				351	/* If there are fewer than 2 digits, add zeros
				352	until there are 2, if there's enough room */
				353	int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
				354	if (start + zeros + exponent_digit_cnt + 1
				355	< buffer + buf_size) {
				356	memmove(start + zeros, start,
				357	exponent_digit_cnt + 1);
				358	memset(start, '0', zeros);
				359	}
				360	}
				361	}
				362	}
				363
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	364	/* Remove trailing zeros after the decimal point from a numeric string; also
				365	remove the decimal point if all digits following it are zero. The numeric
				366	string must end in '\0', and should not have any leading or trailing
				367	whitespace. Assumes that the decimal point is '.'. */
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	368	Py_LOCAL_INLINE(void)
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	369	remove_trailing_zeros(char *buffer)
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	370	{
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	371	char old_fraction_end, new_fraction_end, end, p;
				372
				373	p = buffer;
				374	if (p == '-' \|\| p == '+')
				375	/* Skip leading sign, if present */
				376	++p;
				377	while (Py_ISDIGIT(*p))
				378	++p;
				379
				380	/* if there's no decimal point there's nothing to do */
				381	if (*p++ != '.')
				382	return;
				383
				384	/* scan any digits after the point */
				385	while (Py_ISDIGIT(*p))
				386	++p;
				387	old_fraction_end = p;
				388
				389	/* scan up to ending '\0' */
				390	while (*p != '\0')
				391	p++;
				392	/* +1 to make sure that we move the null byte as well */
				393	end = p+1;
				394
				395	/* scan back from fraction_end, looking for removable zeros */
				396	p = old_fraction_end;
				397	while (*(p-1) == '0')
				398	--p;
				399	/* and remove point if we've got that far */
				400	if (*(p-1) == '.')
				401	--p;
				402	new_fraction_end = p;
				403
				404	memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
				405	}
				406
				407	/* Ensure that buffer has a decimal point in it. The decimal point will not
				408	be in the current locale, it will always be '.'. Don't add a decimal point
				409	if an exponent is present. Also, convert to exponential notation where
				410	adding a '.0' would produce too many significant digits (see issue 5864).
				411
				412	Returns a pointer to the fixed buffer, or NULL on failure.
				413	*/
				414	Py_LOCAL_INLINE(char *)
				415	ensure_decimal_point(char* buffer, size_t buf_size, int precision)
				416	{
				417	int digit_count, insert_count = 0, convert_to_exp = 0;
				418	char* chars_to_insert, *digits_start;
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	419
				420	/* search for the first non-digit character */
				421	char *p = buffer;
Eric Smith	f032a00	2008-07-19 00:24:05 +0000	[diff] [blame]	422	if (p == '-' \|\| p == '+')
				423	/* Skip leading sign, if present. I think this could only
				424	ever be '-', but it can't hurt to check for both. */
				425	++p;
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	426	digits_start = p;
Eric Smith	cac7af6	2009-04-27 19:04:37 +0000	[diff] [blame]	427	while (p && Py_ISDIGIT(p))
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	428	++p;
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	429	digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	430
				431	if (*p == '.') {
Eric Smith	cac7af6	2009-04-27 19:04:37 +0000	[diff] [blame]	432	if (Py_ISDIGIT(*(p+1))) {
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	433	/* Nothing to do, we already have a decimal
				434	point and a digit after it */
				435	}
				436	else {
				437	/* We have a decimal point, but no following
				438	digit. Insert a zero after the decimal. */
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	439	/* can't ever get here via PyOS_double_to_string */
				440	assert(precision == -1);
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	441	++p;
				442	chars_to_insert = "0";
				443	insert_count = 1;
				444	}
				445	}
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	446	else if (!(p == 'e' \|\| p == 'E')) {
				447	/* Don't add ".0" if we have an exponent. */
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	448	if (digit_count == precision) {
				449	/* issue 5864: don't add a trailing .0 in the case
				450	where the '%g'-formatted result already has as many
				451	significant digits as were requested. Switch to
				452	exponential notation instead. */
				453	convert_to_exp = 1;
				454	/* no exponent, no point, and we shouldn't land here
				455	for infs and nans, so we must be at the end of the
				456	string. */
				457	assert(*p == '\0');
				458	}
				459	else {
				460	assert(precision == -1 \|\| digit_count < precision);
				461	chars_to_insert = ".0";
				462	insert_count = 2;
				463	}
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	464	}
				465	if (insert_count) {
				466	size_t buf_len = strlen(buffer);
				467	if (buf_len + insert_count + 1 >= buf_size) {
				468	/* If there is not enough room in the buffer
				469	for the additional text, just skip it. It's
				470	not worth generating an error over. */
				471	}
				472	else {
				473	memmove(p + insert_count, p,
				474	buffer + strlen(buffer) - p + 1);
				475	memcpy(p, chars_to_insert, insert_count);
				476	}
				477	}
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	478	if (convert_to_exp) {
				479	int written;
				480	size_t buf_avail;
				481	p = digits_start;
				482	/* insert decimal point */
				483	assert(digit_count >= 1);
				484	memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
				485	p[1] = '.';
				486	p += digit_count+1;
				487	assert(p <= buf_size+buffer);
				488	buf_avail = buf_size+buffer-p;
				489	if (buf_avail == 0)
				490	return NULL;
				491	/* Add exponent. It's okay to use lower case 'e': we only
				492	arrive here as a result of using the empty format code or
				493	repr/str builtins and those never want an upper case 'E' */
				494	written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
				495	if (!(0 <= written &&
				496	written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
				497	/* output truncated, or something else bad happened */
				498	return NULL;
				499	remove_trailing_zeros(buffer);
				500	}
				501	return buffer;
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	502	}
				503
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	504	/* see FORMATBUFLEN in unicodeobject.c */
				505	#define FLOAT_FORMATBUFLEN 120
				506
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	507	/**
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	508	* _PyOS_ascii_formatd:
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	509	* @buffer: A buffer to place the resulting string in
Eric Smith	8113ca6	2008-03-17 11:01:01 +0000	[diff] [blame]	510	* @buf_size: The length of the buffer.
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	511	* @format: The printf()-style format to use for the
				512	* code to use for converting.
				513	* @d: The #gdouble to convert
				514	*
				515	* Converts a #gdouble to a string, using the '.' as
				516	* decimal point. To format the number you pass in
				517	* a printf()-style format string. Allowed conversion
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	518	* specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	519	*
Eric Smith	8113ca6	2008-03-17 11:01:01 +0000	[diff] [blame]	520	* 'Z' is the same as 'g', except it always has a decimal and
				521	* at least one digit after the decimal.
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	522	*
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	523	* Return value: The pointer to the buffer with the converted string.
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	524	* On failure returns NULL but does not set any Python exception.
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	525	**/
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	526	/* DEPRECATED, will be deleted in 2.8 and 3.2 */
				527	PyAPI_FUNC(char *)
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	528	PyOS_ascii_formatd(char *buffer,
Eric Smith	8113ca6	2008-03-17 11:01:01 +0000	[diff] [blame]	529	size_t buf_size,
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	530	const char *format,
				531	double d)
				532	{
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	533	char format_char;
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	534	size_t format_len = strlen(format);
				535
Eric Smith	8113ca6	2008-03-17 11:01:01 +0000	[diff] [blame]	536	/* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
				537	also with at least one character past the decimal. */
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	538	char tmp_format[FLOAT_FORMATBUFLEN];
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	539
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	540	if (PyErr_WarnEx(PyExc_DeprecationWarning,
				541	"PyOS_ascii_formatd is deprecated, "
				542	"use PyOS_double_to_string instead", 1) < 0)
				543	return NULL;
				544
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	545	/* The last character in the format string must be the format char */
				546	format_char = format[format_len - 1];
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	547
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	548	if (format[0] != '%')
				549	return NULL;
				550
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	551	/* I'm not sure why this test is here. It's ensuring that the format
				552	string after the first character doesn't have a single quote, a
				553	lowercase l, or a percent. This is the reverse of the commented-out
				554	test about 10 lines ago. */
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	555	if (strpbrk(format + 1, "'l%"))
				556	return NULL;
				557
Eric Smith	8113ca6	2008-03-17 11:01:01 +0000	[diff] [blame]	558	/* Also curious about this function is that it accepts format strings
				559	like "%xg", which are invalid for floats. In general, the
				560	interface to this function is not very good, but changing it is
				561	difficult because it's a public API. */
				562
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	563	if (!(format_char == 'e' \|\| format_char == 'E' \|\|
				564	format_char == 'f' \|\| format_char == 'F' \|\|
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	565	format_char == 'g' \|\| format_char == 'G' \|\|
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	566	format_char == 'Z'))
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	567	return NULL;
				568
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	569	/* Map 'Z' format_char to 'g', by copying the format string and
Eric Smith	8113ca6	2008-03-17 11:01:01 +0000	[diff] [blame]	570	replacing the final char with a 'g' */
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	571	if (format_char == 'Z') {
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	572	if (format_len + 1 >= sizeof(tmp_format)) {
				573	/* The format won't fit in our copy. Error out. In
Eric Smith	5c35a9d	2008-03-17 12:14:29 +0000	[diff] [blame]	574	practice, this will never happen and will be
				575	detected by returning NULL */
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	576	return NULL;
				577	}
				578	strcpy(tmp_format, format);
				579	tmp_format[format_len - 1] = 'g';
				580	format = tmp_format;
				581	}
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	582
Eric Smith	8113ca6	2008-03-17 11:01:01 +0000	[diff] [blame]	583
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	584	/* Have PyOS_snprintf do the hard work */
Eric Smith	8113ca6	2008-03-17 11:01:01 +0000	[diff] [blame]	585	PyOS_snprintf(buffer, buf_size, format, d);
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	586
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	587	/* Do various fixups on the return string */
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	588
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	589	/* Get the current locale, and find the decimal point string.
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	590	Convert that string back to a dot. */
				591	change_decimal_from_locale_to_dot(buffer);
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	592
				593	/* If an exponent exists, ensure that the exponent is at least
				594	MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
				595	for the extra zeros. Also, if there are more than
				596	MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
				597	back to MIN_EXPONENT_DIGITS */
Mark Dickinson	e73cbe7	2009-04-26 19:54:55 +0000	[diff] [blame]	598	ensure_minimum_exponent_length(buffer, buf_size);
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	599
Eric Smith	8113ca6	2008-03-17 11:01:01 +0000	[diff] [blame]	600	/* If format_char is 'Z', make sure we have at least one character
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	601	after the decimal point (and make sure we have a decimal point);
				602	also switch to exponential notation in some edge cases where the
				603	extra character would produce more significant digits that we
				604	really want. */
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	605	if (format_char == 'Z')
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	606	buffer = ensure_decimal_point(buffer, buf_size, -1);
Eric Smith	8113ca6	2008-03-17 11:01:01 +0000	[diff] [blame]	607
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	608	return buffer;
				609	}
				610
Mark Dickinson	df108ca	2009-04-29 21:56:53 +0000	[diff] [blame]	611	/* Precisions used by repr() and str(), respectively.
				612
				613	The repr() precision (17 significant decimal digits) is the minimal number
				614	that is guaranteed to have enough precision so that if the number is read
				615	back in the exact same binary value is recreated. This is true for IEEE
				616	floating point by design, and also happens to work for all other modern
				617	hardware.
				618
				619	The str() precision (12 significant decimal digits) is chosen so that in
				620	most cases, the rounding noise created by various operations is suppressed,
				621	while giving plenty of precision for practical use.
				622
				623	*/
				624
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	625	PyAPI_FUNC(void)
				626	_PyOS_double_to_string(char *buf, size_t buf_len, double val,
				627	char format_code, int precision,
				628	int flags, int *ptype)
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	629	{
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	630	char format[32];
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	631	int t;
				632	int upper = 0;
				633
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	634	if (buf_len < 1) {
				635	assert(0);
				636	/* There's no way to signal this error. Just return. */
				637	return;
				638	}
				639	buf[0] = 0;
				640
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	641	/* Validate format_code, and map upper and lower case */
				642	switch (format_code) {
				643	case 'e': /* exponent */
				644	case 'f': /* fixed */
				645	case 'g': /* general */
				646	break;
				647	case 'E':
				648	upper = 1;
				649	format_code = 'e';
				650	break;
				651	case 'F':
				652	upper = 1;
				653	format_code = 'f';
				654	break;
				655	case 'G':
				656	upper = 1;
				657	format_code = 'g';
				658	break;
				659	case 'r': /* repr format */
				660	/* Supplied precision is unused, must be 0. */
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	661	if (precision != 0)
				662	return;
Eric Smith	a985a3a	2009-05-05 18:26:08 +0000	[diff] [blame]	663	/* The repr() precision (17 significant decimal digits) is the
				664	minimal number that is guaranteed to have enough precision
				665	so that if the number is read back in the exact same binary
				666	value is recreated. This is true for IEEE floating point
				667	by design, and also happens to work for all other modern
				668	hardware. */
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	669	precision = 17;
				670	format_code = 'g';
				671	break;
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	672	default:
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	673	assert(0);
				674	return;
				675	}
				676
				677	/* Check for buf too small to fit "-inf". Other buffer too small
				678	conditions are dealt with when converting or formatting finite
				679	numbers. */
				680	if (buf_len < 5) {
				681	assert(0);
				682	return;
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	683	}
				684
				685	/* Handle nan and inf. */
				686	if (Py_IS_NAN(val)) {
				687	strcpy(buf, "nan");
				688	t = Py_DTST_NAN;
				689	} else if (Py_IS_INFINITY(val)) {
				690	if (copysign(1., val) == 1.)
				691	strcpy(buf, "inf");
				692	else
				693	strcpy(buf, "-inf");
				694	t = Py_DTST_INFINITE;
				695	} else {
				696	t = Py_DTST_FINITE;
				697
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	698	/* Build the format string. */
				699	PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
				700	(flags & Py_DTSF_ALT ? "#" : ""), precision,
				701	format_code);
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	702
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	703	/* Have PyOS_snprintf do the hard work. */
				704	PyOS_snprintf(buf, buf_len, format, val);
				705
				706	/* Do various fixups on the return string */
				707
				708	/* Get the current locale, and find the decimal point string.
				709	Convert that string back to a dot. */
				710	change_decimal_from_locale_to_dot(buf);
				711
				712	/* If an exponent exists, ensure that the exponent is at least
				713	MIN_EXPONENT_DIGITS digits, providing the buffer is large
				714	enough for the extra zeros. Also, if there are more than
				715	MIN_EXPONENT_DIGITS, remove as many zeros as possible until
				716	we get back to MIN_EXPONENT_DIGITS */
Mark Dickinson	e73cbe7	2009-04-26 19:54:55 +0000	[diff] [blame]	717	ensure_minimum_exponent_length(buf, buf_len);
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	718
				719	/* Possibly make sure we have at least one character after the
				720	decimal point (and make sure we have a decimal point). */
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	721	if (flags & Py_DTSF_ADD_DOT_0)
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	722	buf = ensure_decimal_point(buf, buf_len, precision);
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	723	}
				724
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	725	/* Add the sign if asked and the result isn't negative. */
				726	if (flags & Py_DTSF_SIGN && buf[0] != '-')
				727	ensure_sign(buf, buf_len);
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	728
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	729	if (upper) {
				730	/* Convert to upper case. */
				731	char *p;
				732	for (p = buf; *p; p++)
Mark Dickinson	777e4ff	2009-05-03 20:59:48 +0000	[diff] [blame]	733	p = Py_TOUPPER(p);
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	734	}
				735
				736	if (ptype)
				737	*ptype = t;
				738	}
				739
				740
				741	PyAPI_FUNC(char *) PyOS_double_to_string(double val,
				742	char format_code,
				743	int precision,
				744	int flags,
				745	int *ptype)
				746	{
				747	char buf[128];
				748	Py_ssize_t len;
				749	char *result;
				750
				751	_PyOS_double_to_string(buf, sizeof(buf), val, format_code, precision,
				752	flags, ptype);
				753	len = strlen(buf);
				754	if (len == 0) {
				755	PyErr_BadInternalCall();
				756	return NULL;
				757	}
				758
				759	/* Add 1 for the trailing 0 byte. */
				760	result = PyMem_Malloc(len + 1);
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	761	if (result == NULL) {
				762	PyErr_NoMemory();
				763	return NULL;
				764	}
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	765	strcpy(result, buf);
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	766
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	767	return result;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	768	}