Blame - Python/pystrtod.c - platform/external/python/cpython2

blob: 30dd4e539418a0e3085923a99d3897a1629e5f56 [file] [log] [blame]

Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	1	/* -- Mode: C; c-file-style: "python" -- */
				2
				3	#include <Python.h>
				4	#include <locale.h>
				5
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	6	/**
				7	* PyOS_ascii_strtod:
				8	* @nptr: the string to convert to a numeric value.
				9	* @endptr: if non-%NULL, it returns the character after
				10	* the last character used in the conversion.
				11	*
				12	* Converts a string to a #gdouble value.
				13	* This function behaves like the standard strtod() function
				14	* does in the C locale. It does this without actually
				15	* changing the current locale, since that would not be
				16	* thread-safe.
				17	*
				18	* This function is typically used when reading configuration
				19	* files or other non-user input that should be locale independent.
				20	* To handle input from the user you should normally use the
				21	* locale-sensitive system strtod() function.
				22	*
				23	* If the correct value would cause overflow, plus or minus %HUGE_VAL
				24	* is returned (according to the sign of the value), and %ERANGE is
				25	* stored in %errno. If the correct value would cause underflow,
				26	* zero is returned and %ERANGE is stored in %errno.
Georg Brandl	b569ee4	2006-05-29 14:28:05 +0000	[diff] [blame]	27	* If memory allocation fails, %ENOMEM is stored in %errno.
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	28	*
				29	* This function resets %errno before calling strtod() so that
				30	* you can reliably detect overflow and underflow.
				31	*
				32	* Return value: the #gdouble value.
				33	**/
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	34
				35	/*
				36	Use system strtod; since strtod is locale aware, we may
				37	have to first fix the decimal separator.
				38
				39	Note that unlike _Py_dg_strtod, the system strtod may not always give
				40	correctly rounded results.
				41	*/
				42
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	43	double
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	44	PyOS_ascii_strtod(const char nptr, char *endptr)
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	45	{
				46	char *fail_pos;
Neal Norwitz	0e7a0ed	2005-12-18 05:37:36 +0000	[diff] [blame]	47	double val = -1.0;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	48	struct lconv *locale_data;
				49	const char *decimal_point;
Neal Norwitz	d39d861	2006-01-08 01:03:36 +0000	[diff] [blame]	50	size_t decimal_point_len;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	51	const char p, decimal_point_pos;
				52	const char end = NULL; / Silence gcc */
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	53	const char *digits_pos = NULL;
				54	int negate = 0;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	55
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	56	assert(nptr != NULL);
				57
				58	fail_pos = NULL;
				59
				60	locale_data = localeconv();
				61	decimal_point = locale_data->decimal_point;
				62	decimal_point_len = strlen(decimal_point);
				63
				64	assert(decimal_point_len != 0);
				65
				66	decimal_point_pos = NULL;
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	67
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	68	/* Set errno to zero, so that we can distinguish zero results
				69	and underflows */
				70	errno = 0;
				71
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	72	/* We process any leading whitespace and the optional sign manually,
				73	then pass the remainder to the system strtod. This ensures that
				74	the result of an underflow has the correct sign. (bug #1725) */
				75
				76	p = nptr;
				77	/* Skip leading space */
Eric Smith	cac7af6	2009-04-27 19:04:37 +0000	[diff] [blame]	78	while (Py_ISSPACE(*p))
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	79	p++;
				80
				81	/* Process leading sign, if present */
				82	if (*p == '-') {
				83	negate = 1;
				84	p++;
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	85	}
				86	else if (*p == '+') {
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	87	p++;
				88	}
				89
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	90	/* Parse infinities and nans */
				91	if (p == 'i' \|\| p == 'I') {
				92	if (PyOS_strnicmp(p, "inf", 3) == 0) {
				93	val = Py_HUGE_VAL;
				94	if (PyOS_strnicmp(p+3, "inity", 5) == 0)
				95	fail_pos = (char *)p+8;
				96	else
				97	fail_pos = (char *)p+3;
				98	goto got_val;
				99	}
				100	else
				101	goto invalid_string;
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	102	}
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	103	#ifdef Py_NAN
				104	if (p == 'n' \|\| p == 'N') {
				105	if (PyOS_strnicmp(p, "nan", 3) == 0) {
				106	val = Py_NAN;
				107	fail_pos = (char *)p+3;
				108	goto got_val;
				109	}
				110	else
				111	goto invalid_string;
				112	}
				113	#endif
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	114
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	115	/* Some platform strtods accept hex floats; Python shouldn't (at the
				116	moment), so we check explicitly for strings starting with '0x'. */
				117	if (p == '0' && ((p+1) == 'x' \|\| *(p+1) == 'X'))
				118	goto invalid_string;
				119
				120	/* Check that what's left begins with a digit or decimal point */
Eric Smith	cac7af6	2009-04-27 19:04:37 +0000	[diff] [blame]	121	if (!Py_ISDIGIT(p) && p != '.')
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	122	goto invalid_string;
				123
				124	digits_pos = p;
				125	if (decimal_point[0] != '.' \|\|
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	126	decimal_point[1] != 0)
				127	{
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	128	/* Look for a '.' in the input; if present, it'll need to be
				129	swapped for the current locale's decimal point before we
				130	call strtod. On the other hand, if we find the current
				131	locale's decimal point then the input is invalid. */
Eric Smith	cac7af6	2009-04-27 19:04:37 +0000	[diff] [blame]	132	while (Py_ISDIGIT(*p))
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	133	p++;
				134
				135	if (*p == '.')
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	136	{
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	137	decimal_point_pos = p++;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	138
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	139	/* locate end of number */
Eric Smith	cac7af6	2009-04-27 19:04:37 +0000	[diff] [blame]	140	while (Py_ISDIGIT(*p))
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	141	p++;
				142
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	143	if (p == 'e' \|\| p == 'E')
				144	p++;
				145	if (p == '+' \|\| p == '-')
				146	p++;
Eric Smith	cac7af6	2009-04-27 19:04:37 +0000	[diff] [blame]	147	while (Py_ISDIGIT(*p))
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	148	p++;
				149	end = p;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	150	}
Martin v. Löwis	fcfff0a	2006-07-03 12:19:50 +0000	[diff] [blame]	151	else if (strncmp(p, decimal_point, decimal_point_len) == 0)
Martin v. Löwis	fcfff0a	2006-07-03 12:19:50 +0000	[diff] [blame]	152	/* Python bug #1417699 */
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	153	goto invalid_string;
Eric Smith	5c35a9d	2008-03-17 12:14:29 +0000	[diff] [blame]	154	/* For the other cases, we need not convert the decimal
				155	point */
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	156	}
				157
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	158	if (decimal_point_pos) {
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	159	char copy, c;
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	160	/* Create a copy of the input, with the '.' converted to the
				161	locale-specific decimal point */
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	162	copy = (char *)PyMem_MALLOC(end - digits_pos +
				163	1 + decimal_point_len);
Georg Brandl	b569ee4	2006-05-29 14:28:05 +0000	[diff] [blame]	164	if (copy == NULL) {
				165	if (endptr)
Georg Brandl	80181e2	2006-05-29 14:33:55 +0000	[diff] [blame]	166	endptr = (char )nptr;
Georg Brandl	b569ee4	2006-05-29 14:28:05 +0000	[diff] [blame]	167	errno = ENOMEM;
				168	return val;
				169	}
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	170
				171	c = copy;
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	172	memcpy(c, digits_pos, decimal_point_pos - digits_pos);
				173	c += decimal_point_pos - digits_pos;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	174	memcpy(c, decimal_point, decimal_point_len);
				175	c += decimal_point_len;
Eric Smith	5c35a9d	2008-03-17 12:14:29 +0000	[diff] [blame]	176	memcpy(c, decimal_point_pos + 1,
				177	end - (decimal_point_pos + 1));
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	178	c += end - (decimal_point_pos + 1);
				179	*c = 0;
				180
				181	val = strtod(copy, &fail_pos);
				182
				183	if (fail_pos)
				184	{
				185	if (fail_pos > decimal_point_pos)
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	186	fail_pos = (char *)digits_pos +
				187	(fail_pos - copy) -
				188	(decimal_point_len - 1);
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	189	else
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	190	fail_pos = (char *)digits_pos +
				191	(fail_pos - copy);
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	192	}
				193
Brett Cannon	0ed0587	2006-05-25 20:44:08 +0000	[diff] [blame]	194	PyMem_FREE(copy);
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	195
				196	}
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	197	else {
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	198	val = strtod(digits_pos, &fail_pos);
Neal Norwitz	e7214a1	2005-12-18 05:03:17 +0000	[diff] [blame]	199	}
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	200
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	201	if (fail_pos == digits_pos)
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	202	goto invalid_string;
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	203
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	204	got_val:
Guido van Rossum	3b83549	2008-01-05 00:59:59 +0000	[diff] [blame]	205	if (negate && fail_pos != nptr)
				206	val = -val;
				207
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	208	if (endptr)
				209	*endptr = fail_pos;
				210
				211	return val;
Mark Dickinson	6d6b220	2009-04-26 16:04:05 +0000	[diff] [blame]	212
				213	invalid_string:
				214	if (endptr)
				215	endptr = (char)nptr;
				216	errno = EINVAL;
				217	return -1.0;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	218	}
				219
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	220	double
				221	PyOS_ascii_atof(const char *nptr)
				222	{
				223	return PyOS_ascii_strtod(nptr, NULL);
				224	}
				225
				226
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	227	/* Given a string that may have a decimal point in the current
				228	locale, change it back to a dot. Since the string cannot get
				229	longer, no need for a maximum buffer size parameter. */
				230	Py_LOCAL_INLINE(void)
				231	change_decimal_from_locale_to_dot(char* buffer)
				232	{
				233	struct lconv *locale_data = localeconv();
				234	const char *decimal_point = locale_data->decimal_point;
				235
				236	if (decimal_point[0] != '.' \|\| decimal_point[1] != 0) {
				237	size_t decimal_point_len = strlen(decimal_point);
				238
				239	if (buffer == '+' \|\| buffer == '-')
				240	buffer++;
Eric Smith	cac7af6	2009-04-27 19:04:37 +0000	[diff] [blame]	241	while (Py_ISDIGIT(*buffer))
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	242	buffer++;
				243	if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
				244	*buffer = '.';
				245	buffer++;
				246	if (decimal_point_len > 1) {
				247	/* buffer needs to get smaller */
				248	size_t rest_len = strlen(buffer +
				249	(decimal_point_len - 1));
				250	memmove(buffer,
				251	buffer + (decimal_point_len - 1),
				252	rest_len);
				253	buffer[rest_len] = 0;
				254	}
				255	}
				256	}
				257	}
				258
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	259
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	260	Py_LOCAL_INLINE(void)
				261	ensure_sign(char* buffer, size_t buf_size)
				262	{
Eric Smith	94cc00c	2009-04-28 07:33:09 +0000	[diff] [blame]	263	size_t len;
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	264
				265	if (buffer[0] == '-')
				266	/* Already have a sign. */
				267	return;
				268
				269	/* Include the trailing 0 byte. */
				270	len = strlen(buffer)+1;
				271	if (len >= buf_size+1)
				272	/* No room for the sign, don't do anything. */
				273	return;
				274
				275	memmove(buffer+1, buffer, len);
				276	buffer[0] = '+';
				277	}
				278
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	279	/* From the C99 standard, section 7.19.6:
				280	The exponent always contains at least two digits, and only as many more digits
				281	as necessary to represent the exponent.
				282	*/
				283	#define MIN_EXPONENT_DIGITS 2
				284
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	285	/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
				286	in length. */
				287	Py_LOCAL_INLINE(void)
Mark Dickinson	e73cbe7	2009-04-26 19:54:55 +0000	[diff] [blame]	288	ensure_minimum_exponent_length(char* buffer, size_t buf_size)
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	289	{
				290	char *p = strpbrk(buffer, "eE");
				291	if (p && ((p + 1) == '-' \|\| (p + 1) == '+')) {
				292	char *start = p + 2;
				293	int exponent_digit_cnt = 0;
				294	int leading_zero_cnt = 0;
				295	int in_leading_zeros = 1;
				296	int significant_digit_cnt;
				297
				298	/* Skip over the exponent and the sign. */
				299	p += 2;
				300
				301	/* Find the end of the exponent, keeping track of leading
				302	zeros. */
Eric Smith	cac7af6	2009-04-27 19:04:37 +0000	[diff] [blame]	303	while (p && Py_ISDIGIT(p)) {
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	304	if (in_leading_zeros && *p == '0')
				305	++leading_zero_cnt;
				306	if (*p != '0')
				307	in_leading_zeros = 0;
				308	++p;
				309	++exponent_digit_cnt;
				310	}
				311
				312	significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
				313	if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
				314	/* If there are 2 exactly digits, we're done,
				315	regardless of what they contain */
				316	}
				317	else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
				318	int extra_zeros_cnt;
				319
				320	/* There are more than 2 digits in the exponent. See
				321	if we can delete some of the leading zeros */
				322	if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
				323	significant_digit_cnt = MIN_EXPONENT_DIGITS;
				324	extra_zeros_cnt = exponent_digit_cnt -
				325	significant_digit_cnt;
				326
				327	/* Delete extra_zeros_cnt worth of characters from the
				328	front of the exponent */
				329	assert(extra_zeros_cnt >= 0);
				330
				331	/* Add one to significant_digit_cnt to copy the
				332	trailing 0 byte, thus setting the length */
				333	memmove(start,
				334	start + extra_zeros_cnt,
				335	significant_digit_cnt + 1);
				336	}
				337	else {
				338	/* If there are fewer than 2 digits, add zeros
				339	until there are 2, if there's enough room */
				340	int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
				341	if (start + zeros + exponent_digit_cnt + 1
				342	< buffer + buf_size) {
				343	memmove(start + zeros, start,
				344	exponent_digit_cnt + 1);
				345	memset(start, '0', zeros);
				346	}
				347	}
				348	}
				349	}
				350
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	351	/* Remove trailing zeros after the decimal point from a numeric string; also
				352	remove the decimal point if all digits following it are zero. The numeric
				353	string must end in '\0', and should not have any leading or trailing
				354	whitespace. Assumes that the decimal point is '.'. */
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	355	Py_LOCAL_INLINE(void)
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	356	remove_trailing_zeros(char *buffer)
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	357	{
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	358	char old_fraction_end, new_fraction_end, end, p;
				359
				360	p = buffer;
				361	if (p == '-' \|\| p == '+')
				362	/* Skip leading sign, if present */
				363	++p;
				364	while (Py_ISDIGIT(*p))
				365	++p;
				366
				367	/* if there's no decimal point there's nothing to do */
				368	if (*p++ != '.')
				369	return;
				370
				371	/* scan any digits after the point */
				372	while (Py_ISDIGIT(*p))
				373	++p;
				374	old_fraction_end = p;
				375
				376	/* scan up to ending '\0' */
				377	while (*p != '\0')
				378	p++;
				379	/* +1 to make sure that we move the null byte as well */
				380	end = p+1;
				381
				382	/* scan back from fraction_end, looking for removable zeros */
				383	p = old_fraction_end;
				384	while (*(p-1) == '0')
				385	--p;
				386	/* and remove point if we've got that far */
				387	if (*(p-1) == '.')
				388	--p;
				389	new_fraction_end = p;
				390
				391	memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
				392	}
				393
				394	/* Ensure that buffer has a decimal point in it. The decimal point will not
				395	be in the current locale, it will always be '.'. Don't add a decimal point
				396	if an exponent is present. Also, convert to exponential notation where
				397	adding a '.0' would produce too many significant digits (see issue 5864).
				398
				399	Returns a pointer to the fixed buffer, or NULL on failure.
				400	*/
				401	Py_LOCAL_INLINE(char *)
				402	ensure_decimal_point(char* buffer, size_t buf_size, int precision)
				403	{
				404	int digit_count, insert_count = 0, convert_to_exp = 0;
				405	char* chars_to_insert, *digits_start;
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	406
				407	/* search for the first non-digit character */
				408	char *p = buffer;
Eric Smith	f032a00	2008-07-19 00:24:05 +0000	[diff] [blame]	409	if (p == '-' \|\| p == '+')
				410	/* Skip leading sign, if present. I think this could only
				411	ever be '-', but it can't hurt to check for both. */
				412	++p;
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	413	digits_start = p;
Eric Smith	cac7af6	2009-04-27 19:04:37 +0000	[diff] [blame]	414	while (p && Py_ISDIGIT(p))
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	415	++p;
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	416	digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	417
				418	if (*p == '.') {
Eric Smith	cac7af6	2009-04-27 19:04:37 +0000	[diff] [blame]	419	if (Py_ISDIGIT(*(p+1))) {
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	420	/* Nothing to do, we already have a decimal
				421	point and a digit after it */
				422	}
				423	else {
				424	/* We have a decimal point, but no following
				425	digit. Insert a zero after the decimal. */
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	426	/* can't ever get here via PyOS_double_to_string */
				427	assert(precision == -1);
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	428	++p;
				429	chars_to_insert = "0";
				430	insert_count = 1;
				431	}
				432	}
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	433	else if (!(p == 'e' \|\| p == 'E')) {
				434	/* Don't add ".0" if we have an exponent. */
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	435	if (digit_count == precision) {
				436	/* issue 5864: don't add a trailing .0 in the case
				437	where the '%g'-formatted result already has as many
				438	significant digits as were requested. Switch to
				439	exponential notation instead. */
				440	convert_to_exp = 1;
				441	/* no exponent, no point, and we shouldn't land here
				442	for infs and nans, so we must be at the end of the
				443	string. */
				444	assert(*p == '\0');
				445	}
				446	else {
				447	assert(precision == -1 \|\| digit_count < precision);
				448	chars_to_insert = ".0";
				449	insert_count = 2;
				450	}
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	451	}
				452	if (insert_count) {
				453	size_t buf_len = strlen(buffer);
				454	if (buf_len + insert_count + 1 >= buf_size) {
				455	/* If there is not enough room in the buffer
				456	for the additional text, just skip it. It's
				457	not worth generating an error over. */
				458	}
				459	else {
				460	memmove(p + insert_count, p,
				461	buffer + strlen(buffer) - p + 1);
				462	memcpy(p, chars_to_insert, insert_count);
				463	}
				464	}
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	465	if (convert_to_exp) {
				466	int written;
				467	size_t buf_avail;
				468	p = digits_start;
				469	/* insert decimal point */
				470	assert(digit_count >= 1);
				471	memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
				472	p[1] = '.';
				473	p += digit_count+1;
				474	assert(p <= buf_size+buffer);
				475	buf_avail = buf_size+buffer-p;
				476	if (buf_avail == 0)
				477	return NULL;
				478	/* Add exponent. It's okay to use lower case 'e': we only
				479	arrive here as a result of using the empty format code or
				480	repr/str builtins and those never want an upper case 'E' */
				481	written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
				482	if (!(0 <= written &&
				483	written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
				484	/* output truncated, or something else bad happened */
				485	return NULL;
				486	remove_trailing_zeros(buffer);
				487	}
				488	return buffer;
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	489	}
				490
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	491	/* see FORMATBUFLEN in unicodeobject.c */
				492	#define FLOAT_FORMATBUFLEN 120
				493
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	494	/**
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	495	* _PyOS_ascii_formatd:
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	496	* @buffer: A buffer to place the resulting string in
Eric Smith	8113ca6	2008-03-17 11:01:01 +0000	[diff] [blame]	497	* @buf_size: The length of the buffer.
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	498	* @format: The printf()-style format to use for the
				499	* code to use for converting.
				500	* @d: The #gdouble to convert
				501	*
				502	* Converts a #gdouble to a string, using the '.' as
				503	* decimal point. To format the number you pass in
				504	* a printf()-style format string. Allowed conversion
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	505	* specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	506	*
Eric Smith	8113ca6	2008-03-17 11:01:01 +0000	[diff] [blame]	507	* 'Z' is the same as 'g', except it always has a decimal and
				508	* at least one digit after the decimal.
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	509	*
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	510	* Return value: The pointer to the buffer with the converted string.
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	511	* On failure returns NULL but does not set any Python exception.
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	512	**/
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	513	/* DEPRECATED, will be deleted in 2.8 and 3.2 */
				514	PyAPI_FUNC(char *)
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	515	PyOS_ascii_formatd(char *buffer,
Eric Smith	8113ca6	2008-03-17 11:01:01 +0000	[diff] [blame]	516	size_t buf_size,
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	517	const char *format,
				518	double d)
				519	{
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	520	char format_char;
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	521	size_t format_len = strlen(format);
				522
Eric Smith	8113ca6	2008-03-17 11:01:01 +0000	[diff] [blame]	523	/* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
				524	also with at least one character past the decimal. */
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	525	char tmp_format[FLOAT_FORMATBUFLEN];
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	526
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	527	if (PyErr_WarnEx(PyExc_DeprecationWarning,
				528	"PyOS_ascii_formatd is deprecated, "
				529	"use PyOS_double_to_string instead", 1) < 0)
				530	return NULL;
				531
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	532	/* The last character in the format string must be the format char */
				533	format_char = format[format_len - 1];
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	534
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	535	if (format[0] != '%')
				536	return NULL;
				537
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	538	/* I'm not sure why this test is here. It's ensuring that the format
				539	string after the first character doesn't have a single quote, a
				540	lowercase l, or a percent. This is the reverse of the commented-out
				541	test about 10 lines ago. */
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	542	if (strpbrk(format + 1, "'l%"))
				543	return NULL;
				544
Eric Smith	8113ca6	2008-03-17 11:01:01 +0000	[diff] [blame]	545	/* Also curious about this function is that it accepts format strings
				546	like "%xg", which are invalid for floats. In general, the
				547	interface to this function is not very good, but changing it is
				548	difficult because it's a public API. */
				549
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	550	if (!(format_char == 'e' \|\| format_char == 'E' \|\|
				551	format_char == 'f' \|\| format_char == 'F' \|\|
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	552	format_char == 'g' \|\| format_char == 'G' \|\|
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	553	format_char == 'Z'))
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	554	return NULL;
				555
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	556	/* Map 'Z' format_char to 'g', by copying the format string and
Eric Smith	8113ca6	2008-03-17 11:01:01 +0000	[diff] [blame]	557	replacing the final char with a 'g' */
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	558	if (format_char == 'Z') {
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	559	if (format_len + 1 >= sizeof(tmp_format)) {
				560	/* The format won't fit in our copy. Error out. In
Eric Smith	5c35a9d	2008-03-17 12:14:29 +0000	[diff] [blame]	561	practice, this will never happen and will be
				562	detected by returning NULL */
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	563	return NULL;
				564	}
				565	strcpy(tmp_format, format);
				566	tmp_format[format_len - 1] = 'g';
				567	format = tmp_format;
				568	}
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	569
Eric Smith	8113ca6	2008-03-17 11:01:01 +0000	[diff] [blame]	570
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	571	/* Have PyOS_snprintf do the hard work */
Eric Smith	8113ca6	2008-03-17 11:01:01 +0000	[diff] [blame]	572	PyOS_snprintf(buffer, buf_size, format, d);
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	573
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	574	/* Do various fixups on the return string */
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	575
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	576	/* Get the current locale, and find the decimal point string.
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	577	Convert that string back to a dot. */
				578	change_decimal_from_locale_to_dot(buffer);
Eric Smith	7ef40bf	2008-02-20 23:34:22 +0000	[diff] [blame]	579
				580	/* If an exponent exists, ensure that the exponent is at least
				581	MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
				582	for the extra zeros. Also, if there are more than
				583	MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
				584	back to MIN_EXPONENT_DIGITS */
Mark Dickinson	e73cbe7	2009-04-26 19:54:55 +0000	[diff] [blame]	585	ensure_minimum_exponent_length(buffer, buf_size);
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	586
Eric Smith	8113ca6	2008-03-17 11:01:01 +0000	[diff] [blame]	587	/* If format_char is 'Z', make sure we have at least one character
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	588	after the decimal point (and make sure we have a decimal point);
				589	also switch to exponential notation in some edge cases where the
				590	extra character would produce more significant digits that we
				591	really want. */
Eric Smith	0a95063	2008-04-30 01:09:30 +0000	[diff] [blame]	592	if (format_char == 'Z')
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	593	buffer = ensure_decimal_point(buffer, buf_size, -1);
Eric Smith	8113ca6	2008-03-17 11:01:01 +0000	[diff] [blame]	594
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	595	return buffer;
				596	}
				597
Mark Dickinson	df108ca	2009-04-29 21:56:53 +0000	[diff] [blame]	598	/* Precisions used by repr() and str(), respectively.
				599
				600	The repr() precision (17 significant decimal digits) is the minimal number
				601	that is guaranteed to have enough precision so that if the number is read
				602	back in the exact same binary value is recreated. This is true for IEEE
				603	floating point by design, and also happens to work for all other modern
				604	hardware.
				605
				606	The str() precision (12 significant decimal digits) is chosen so that in
				607	most cases, the rounding noise created by various operations is suppressed,
				608	while giving plenty of precision for practical use.
				609
				610	*/
				611
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	612	PyAPI_FUNC(void)
				613	_PyOS_double_to_string(char *buf, size_t buf_len, double val,
				614	char format_code, int precision,
				615	int flags, int *ptype)
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	616	{
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	617	char format[32];
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	618	int t;
				619	int upper = 0;
				620
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	621	if (buf_len < 1) {
				622	assert(0);
				623	/* There's no way to signal this error. Just return. */
				624	return;
				625	}
				626	buf[0] = 0;
				627
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	628	/* Validate format_code, and map upper and lower case */
				629	switch (format_code) {
				630	case 'e': /* exponent */
				631	case 'f': /* fixed */
				632	case 'g': /* general */
				633	break;
				634	case 'E':
				635	upper = 1;
				636	format_code = 'e';
				637	break;
				638	case 'F':
				639	upper = 1;
				640	format_code = 'f';
				641	break;
				642	case 'G':
				643	upper = 1;
				644	format_code = 'g';
				645	break;
				646	case 'r': /* repr format */
				647	/* Supplied precision is unused, must be 0. */
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	648	if (precision != 0)
				649	return;
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	650	precision = 17;
				651	format_code = 'g';
				652	break;
				653	case 's': /* str format */
				654	/* Supplied precision is unused, must be 0. */
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	655	if (precision != 0)
				656	return;
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	657	precision = 12;
				658	format_code = 'g';
				659	break;
				660	default:
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	661	assert(0);
				662	return;
				663	}
				664
				665	/* Check for buf too small to fit "-inf". Other buffer too small
				666	conditions are dealt with when converting or formatting finite
				667	numbers. */
				668	if (buf_len < 5) {
				669	assert(0);
				670	return;
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	671	}
				672
				673	/* Handle nan and inf. */
				674	if (Py_IS_NAN(val)) {
				675	strcpy(buf, "nan");
				676	t = Py_DTST_NAN;
				677	} else if (Py_IS_INFINITY(val)) {
				678	if (copysign(1., val) == 1.)
				679	strcpy(buf, "inf");
				680	else
				681	strcpy(buf, "-inf");
				682	t = Py_DTST_INFINITE;
				683	} else {
				684	t = Py_DTST_FINITE;
				685
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	686	/* Build the format string. */
				687	PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
				688	(flags & Py_DTSF_ALT ? "#" : ""), precision,
				689	format_code);
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	690
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	691	/* Have PyOS_snprintf do the hard work. */
				692	PyOS_snprintf(buf, buf_len, format, val);
				693
				694	/* Do various fixups on the return string */
				695
				696	/* Get the current locale, and find the decimal point string.
				697	Convert that string back to a dot. */
				698	change_decimal_from_locale_to_dot(buf);
				699
				700	/* If an exponent exists, ensure that the exponent is at least
				701	MIN_EXPONENT_DIGITS digits, providing the buffer is large
				702	enough for the extra zeros. Also, if there are more than
				703	MIN_EXPONENT_DIGITS, remove as many zeros as possible until
				704	we get back to MIN_EXPONENT_DIGITS */
Mark Dickinson	e73cbe7	2009-04-26 19:54:55 +0000	[diff] [blame]	705	ensure_minimum_exponent_length(buf, buf_len);
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	706
				707	/* Possibly make sure we have at least one character after the
				708	decimal point (and make sure we have a decimal point). */
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	709	if (flags & Py_DTSF_ADD_DOT_0)
Mark Dickinson	92fcc9c	2009-04-29 20:41:00 +0000	[diff] [blame]	710	buf = ensure_decimal_point(buf, buf_len, precision);
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	711	}
				712
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	713	/* Add the sign if asked and the result isn't negative. */
				714	if (flags & Py_DTSF_SIGN && buf[0] != '-')
				715	ensure_sign(buf, buf_len);
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	716
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	717	if (upper) {
				718	/* Convert to upper case. */
				719	char *p;
				720	for (p = buf; *p; p++)
				721	p = toupper(p);
				722	}
				723
				724	if (ptype)
				725	*ptype = t;
				726	}
				727
				728
				729	PyAPI_FUNC(char *) PyOS_double_to_string(double val,
				730	char format_code,
				731	int precision,
				732	int flags,
				733	int *ptype)
				734	{
				735	char buf[128];
				736	Py_ssize_t len;
				737	char *result;
				738
				739	_PyOS_double_to_string(buf, sizeof(buf), val, format_code, precision,
				740	flags, ptype);
				741	len = strlen(buf);
				742	if (len == 0) {
				743	PyErr_BadInternalCall();
				744	return NULL;
				745	}
				746
				747	/* Add 1 for the trailing 0 byte. */
				748	result = PyMem_Malloc(len + 1);
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	749	if (result == NULL) {
				750	PyErr_NoMemory();
				751	return NULL;
				752	}
Eric Smith	068f065	2009-04-25 21:40:15 +0000	[diff] [blame]	753	strcpy(result, buf);
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	754
Eric Smith	aca19e6	2009-04-22 13:29:05 +0000	[diff] [blame]	755	return result;
Martin v. Löwis	737ea82	2004-06-08 18:52:54 +0000	[diff] [blame]	756	}