Blame - Python/formatter_unicode.c - platform/external/python/cpython3

blob: db6364f5135dc7ff8b090019843ff384843a8766 [file] [log] [blame]

Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	1	/* implements the unicode (as opposed to string) version of the
				2	built-in formatters for string, int, float. that is, the versions
				3	of int.__float__, etc., that take and return unicode objects */
				4
				5	#include "Python.h"
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	6	#include <locale.h>
				7
				8	/* Raises an exception about an unknown presentation type for this
				9	* type. */
				10
				11	static void
				12	unknown_presentation_type(Py_UCS4 presentation_type,
				13	const char* type_name)
				14	{
				15	/* %c might be out-of-range, hence the two cases. */
				16	if (presentation_type > 32 && presentation_type < 128)
				17	PyErr_Format(PyExc_ValueError,
				18	"Unknown format code '%c' "
				19	"for object of type '%.200s'",
				20	(char)presentation_type,
				21	type_name);
				22	else
				23	PyErr_Format(PyExc_ValueError,
				24	"Unknown format code '\\x%x' "
				25	"for object of type '%.200s'",
				26	(unsigned int)presentation_type,
				27	type_name);
				28	}
				29
				30	static void
				31	invalid_comma_type(Py_UCS4 presentation_type)
				32	{
				33	if (presentation_type > 32 && presentation_type < 128)
				34	PyErr_Format(PyExc_ValueError,
				35	"Cannot specify ',' with '%c'.",
				36	(char)presentation_type);
				37	else
				38	PyErr_Format(PyExc_ValueError,
				39	"Cannot specify ',' with '\\x%x'.",
				40	(unsigned int)presentation_type);
				41	}
				42
				43	/*
				44	get_integer consumes 0 or more decimal digit characters from an
				45	input string, updates *result with the corresponding positive
				46	integer, and returns the number of digits consumed.
				47
				48	returns -1 on error.
				49	*/
				50	static int
				51	get_integer(PyObject str, Py_ssize_t pos, Py_ssize_t end,
				52	Py_ssize_t *result)
				53	{
Mark Dickinson	47862d4	2011-12-01 15:27:04 +0000	[diff] [blame]	54	Py_ssize_t accumulator, digitval;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	55	int numdigits;
				56	accumulator = numdigits = 0;
				57	for (;;(*pos)++, numdigits++) {
				58	if (*pos >= end)
				59	break;
				60	digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str, *pos));
				61	if (digitval < 0)
				62	break;
				63	/*
Mark Dickinson	47862d4	2011-12-01 15:27:04 +0000	[diff] [blame]	64	Detect possible overflow before it happens:
				65
				66	accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
				67	accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	68	*/
Mark Dickinson	47862d4	2011-12-01 15:27:04 +0000	[diff] [blame]	69	if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	70	PyErr_Format(PyExc_ValueError,
				71	"Too many decimal digits in format string");
				72	return -1;
				73	}
Mark Dickinson	47862d4	2011-12-01 15:27:04 +0000	[diff] [blame]	74	accumulator = accumulator * 10 + digitval;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	75	}
				76	*result = accumulator;
				77	return numdigits;
				78	}
				79
				80	/************************************************************************/
				81	/********* standard format specifier parsing ************************/
				82	/************************************************************************/
				83
				84	/* returns true if this character is a specifier alignment token */
				85	Py_LOCAL_INLINE(int)
				86	is_alignment_token(Py_UCS4 c)
				87	{
				88	switch (c) {
				89	case '<': case '>': case '=': case '^':
				90	return 1;
				91	default:
				92	return 0;
				93	}
				94	}
				95
				96	/* returns true if this character is a sign element */
				97	Py_LOCAL_INLINE(int)
				98	is_sign_element(Py_UCS4 c)
				99	{
				100	switch (c) {
				101	case ' ': case '+': case '-':
				102	return 1;
				103	default:
				104	return 0;
				105	}
				106	}
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	107
Eric Smith	4a7d76d	2008-05-30 18:10:19 +0000	[diff] [blame]	108
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	109	typedef struct {
				110	Py_UCS4 fill_char;
				111	Py_UCS4 align;
				112	int alternate;
				113	Py_UCS4 sign;
				114	Py_ssize_t width;
				115	int thousands_separators;
				116	Py_ssize_t precision;
				117	Py_UCS4 type;
				118	} InternalFormatSpec;
Eric Smith	4a7d76d	2008-05-30 18:10:19 +0000	[diff] [blame]	119
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	120	#if 0
				121	/* Occassionally useful for debugging. Should normally be commented out. */
				122	static void
				123	DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
				124	{
				125	printf("internal format spec: fill_char %d\n", format->fill_char);
				126	printf("internal format spec: align %d\n", format->align);
				127	printf("internal format spec: alternate %d\n", format->alternate);
				128	printf("internal format spec: sign %d\n", format->sign);
				129	printf("internal format spec: width %zd\n", format->width);
				130	printf("internal format spec: thousands_separators %d\n",
				131	format->thousands_separators);
				132	printf("internal format spec: precision %zd\n", format->precision);
				133	printf("internal format spec: type %c\n", format->type);
				134	printf("\n");
				135	}
				136	#endif
				137
				138
				139	/*
				140	ptr points to the start of the format_spec, end points just past its end.
				141	fills in format with the parsed information.
				142	returns 1 on success, 0 on failure.
				143	if failure, sets the exception
				144	*/
				145	static int
				146	parse_internal_render_format_spec(PyObject *format_spec,
				147	Py_ssize_t start, Py_ssize_t end,
				148	InternalFormatSpec *format,
				149	char default_type,
				150	char default_align)
				151	{
				152	Py_ssize_t pos = start;
				153	/* end-pos is used throughout this code to specify the length of
				154	the input string */
				155	#define READ_spec(index) PyUnicode_READ_CHAR(format_spec, index)
				156
				157	Py_ssize_t consumed;
				158	int align_specified = 0;
				159
				160	format->fill_char = '\0';
				161	format->align = default_align;
				162	format->alternate = 0;
				163	format->sign = '\0';
				164	format->width = -1;
				165	format->thousands_separators = 0;
				166	format->precision = -1;
				167	format->type = default_type;
				168
				169	/* If the second char is an alignment token,
				170	then parse the fill char */
				171	if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
				172	format->align = READ_spec(pos+1);
				173	format->fill_char = READ_spec(pos);
				174	align_specified = 1;
				175	pos += 2;
				176	}
				177	else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
				178	format->align = READ_spec(pos);
				179	align_specified = 1;
				180	++pos;
				181	}
				182
				183	/* Parse the various sign options */
				184	if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
				185	format->sign = READ_spec(pos);
				186	++pos;
				187	}
				188
				189	/* If the next character is #, we're in alternate mode. This only
				190	applies to integers. */
				191	if (end-pos >= 1 && READ_spec(pos) == '#') {
				192	format->alternate = 1;
				193	++pos;
				194	}
				195
				196	/* The special case for 0-padding (backwards compat) */
				197	if (format->fill_char == '\0' && end-pos >= 1 && READ_spec(pos) == '0') {
				198	format->fill_char = '0';
				199	if (!align_specified) {
				200	format->align = '=';
				201	}
				202	++pos;
				203	}
				204
				205	consumed = get_integer(format_spec, &pos, end, &format->width);
				206	if (consumed == -1)
				207	/* Overflow error. Exception already set. */
				208	return 0;
				209
				210	/* If consumed is 0, we didn't consume any characters for the
				211	width. In that case, reset the width to -1, because
				212	get_integer() will have set it to zero. -1 is how we record
				213	that the width wasn't specified. */
				214	if (consumed == 0)
				215	format->width = -1;
				216
				217	/* Comma signifies add thousands separators */
				218	if (end-pos && READ_spec(pos) == ',') {
				219	format->thousands_separators = 1;
				220	++pos;
				221	}
				222
				223	/* Parse field precision */
				224	if (end-pos && READ_spec(pos) == '.') {
				225	++pos;
				226
				227	consumed = get_integer(format_spec, &pos, end, &format->precision);
				228	if (consumed == -1)
				229	/* Overflow error. Exception already set. */
				230	return 0;
				231
				232	/* Not having a precision after a dot is an error. */
				233	if (consumed == 0) {
				234	PyErr_Format(PyExc_ValueError,
				235	"Format specifier missing precision");
				236	return 0;
				237	}
				238
				239	}
				240
				241	/* Finally, parse the type field. */
				242
				243	if (end-pos > 1) {
Eric V. Smith	d25cfe6	2012-01-19 20:04:28 -0500	[diff] [blame^]	244	/* More than one char remain, invalid format specifier. */
				245	PyErr_Format(PyExc_ValueError, "Invalid format specifier");
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	246	return 0;
				247	}
				248
				249	if (end-pos == 1) {
				250	format->type = READ_spec(pos);
				251	++pos;
				252	}
				253
				254	/* Do as much validating as we can, just by looking at the format
				255	specifier. Do not take into account what type of formatting
				256	we're doing (int, float, string). */
				257
				258	if (format->thousands_separators) {
				259	switch (format->type) {
				260	case 'd':
				261	case 'e':
				262	case 'f':
				263	case 'g':
				264	case 'E':
				265	case 'G':
				266	case '%':
				267	case 'F':
				268	case '\0':
				269	/* These are allowed. See PEP 378.*/
				270	break;
				271	default:
				272	invalid_comma_type(format->type);
				273	return 0;
				274	}
				275	}
				276
				277	if (format->fill_char > 127 \|\| format->align > 127 \|\|
				278	format->sign > 127) {
				279	PyErr_SetString(PyExc_ValueError, "fill character too large");
				280	return 0;
				281	}
				282
				283	return 1;
				284	}
				285
				286	/* Calculate the padding needed. */
				287	static void
				288	calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
				289	Py_ssize_t n_lpadding, Py_ssize_t n_rpadding,
				290	Py_ssize_t *n_total)
				291	{
				292	if (width >= 0) {
				293	if (nchars > width)
				294	*n_total = nchars;
				295	else
				296	*n_total = width;
				297	}
				298	else {
				299	/* not specified, use all of the chars and no more */
				300	*n_total = nchars;
				301	}
				302
				303	/* Figure out how much leading space we need, based on the
				304	aligning */
				305	if (align == '>')
				306	n_lpadding = n_total - nchars;
				307	else if (align == '^')
				308	n_lpadding = (n_total - nchars) / 2;
				309	else if (align == '<' \|\| align == '=')
				310	*n_lpadding = 0;
				311	else {
				312	/* We should never have an unspecified alignment. */
				313	*n_lpadding = 0;
				314	assert(0);
				315	}
				316
				317	n_rpadding = n_total - nchars - *n_lpadding;
				318	}
				319
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	320	/* Do the padding, and return a pointer to where the caller-supplied
				321	content goes. */
				322	static Py_ssize_t
				323	fill_padding(PyObject *s, Py_ssize_t start, Py_ssize_t nchars,
				324	Py_UCS4 fill_char, Py_ssize_t n_lpadding,
				325	Py_ssize_t n_rpadding)
				326	{
				327	/* Pad on left. */
				328	if (n_lpadding)
Victor Stinner	3fe5531	2012-01-04 00:33:50 +0100	[diff] [blame]	329	PyUnicode_Fill(s, start, start + n_lpadding, fill_char);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	330
				331	/* Pad on right. */
				332	if (n_rpadding)
Victor Stinner	3fe5531	2012-01-04 00:33:50 +0100	[diff] [blame]	333	PyUnicode_Fill(s, start + nchars + n_lpadding,
				334	start + nchars + n_lpadding + n_rpadding, fill_char);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	335
				336	/* Pointer to the user content. */
				337	return start + n_lpadding;
				338	}
				339
				340	/************************************************************************/
				341	/********* common routines for numeric formatting *******************/
				342	/************************************************************************/
				343
				344	/* Locale type codes. */
				345	#define LT_CURRENT_LOCALE 0
				346	#define LT_DEFAULT_LOCALE 1
				347	#define LT_NO_LOCALE 2
				348
				349	/* Locale info needed for formatting integers and the part of floats
				350	before and including the decimal. Note that locales only support
				351	8-bit chars, not unicode. */
				352	typedef struct {
				353	char *decimal_point;
				354	char *thousands_sep;
				355	char *grouping;
				356	} LocaleInfo;
				357
				358	/* describes the layout for an integer, see the comment in
				359	calc_number_widths() for details */
				360	typedef struct {
				361	Py_ssize_t n_lpadding;
				362	Py_ssize_t n_prefix;
				363	Py_ssize_t n_spadding;
				364	Py_ssize_t n_rpadding;
				365	char sign;
				366	Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
				367	Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
				368	any grouping chars. */
				369	Py_ssize_t n_decimal; /* 0 if only an integer */
				370	Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
				371	excluding the decimal itself, if
				372	present. */
				373
				374	/* These 2 are not the widths of fields, but are needed by
				375	STRINGLIB_GROUPING. */
				376	Py_ssize_t n_digits; /* The number of digits before a decimal
				377	or exponent. */
				378	Py_ssize_t n_min_width; /* The min_width we used when we computed
				379	the n_grouped_digits width. */
				380	} NumberFieldWidths;
				381
				382
				383	/* Given a number of the form:
				384	digits[remainder]
				385	where ptr points to the start and end points to the end, find where
				386	the integer part ends. This could be a decimal, an exponent, both,
				387	or neither.
				388	If a decimal point is present, set *has_decimal and increment
				389	remainder beyond it.
				390	Results are undefined (but shouldn't crash) for improperly
				391	formatted strings.
				392	*/
				393	static void
				394	parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
				395	Py_ssize_t n_remainder, int has_decimal)
				396	{
				397	Py_ssize_t remainder;
				398
				399	while (pos<end && isdigit(PyUnicode_READ_CHAR(s, pos)))
				400	++pos;
				401	remainder = pos;
				402
				403	/* Does remainder start with a decimal point? */
				404	*has_decimal = pos<end && PyUnicode_READ_CHAR(s, remainder) == '.';
				405
				406	/* Skip the decimal point. */
				407	if (*has_decimal)
				408	remainder++;
				409
				410	*n_remainder = end - remainder;
				411	}
				412
				413	/* not all fields of format are used. for example, precision is
				414	unused. should this take discrete params in order to be more clear
				415	about what it does? or is passing a single format parameter easier
				416	and more efficient enough to justify a little obfuscation? */
				417	static Py_ssize_t
				418	calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
				419	Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
				420	Py_ssize_t n_end, Py_ssize_t n_remainder,
				421	int has_decimal, const LocaleInfo *locale,
				422	const InternalFormatSpec *format)
				423	{
				424	Py_ssize_t n_non_digit_non_padding;
				425	Py_ssize_t n_padding;
				426
				427	spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
				428	spec->n_lpadding = 0;
				429	spec->n_prefix = n_prefix;
				430	spec->n_decimal = has_decimal ? strlen(locale->decimal_point) : 0;
				431	spec->n_remainder = n_remainder;
				432	spec->n_spadding = 0;
				433	spec->n_rpadding = 0;
				434	spec->sign = '\0';
				435	spec->n_sign = 0;
				436
				437	/* the output will look like:
				438	\| \|
				439	\| <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> \|
				440	\| \|
				441
				442	sign is computed from format->sign and the actual
				443	sign of the number
				444
				445	prefix is given (it's for the '0x' prefix)
				446
				447	digits is already known
				448
				449	the total width is either given, or computed from the
				450	actual digits
				451
				452	only one of lpadding, spadding, and rpadding can be non-zero,
				453	and it's calculated from the width and other fields
				454	*/
				455
				456	/* compute the various parts we're going to write */
				457	switch (format->sign) {
				458	case '+':
				459	/* always put a + or - */
				460	spec->n_sign = 1;
				461	spec->sign = (sign_char == '-' ? '-' : '+');
				462	break;
				463	case ' ':
				464	spec->n_sign = 1;
				465	spec->sign = (sign_char == '-' ? '-' : ' ');
				466	break;
				467	default:
				468	/* Not specified, or the default (-) */
				469	if (sign_char == '-') {
				470	spec->n_sign = 1;
				471	spec->sign = '-';
				472	}
				473	}
				474
				475	/* The number of chars used for non-digits and non-padding. */
				476	n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
				477	spec->n_remainder;
				478
				479	/* min_width can go negative, that's okay. format->width == -1 means
				480	we don't care. */
				481	if (format->fill_char == '0' && format->align == '=')
				482	spec->n_min_width = format->width - n_non_digit_non_padding;
				483	else
				484	spec->n_min_width = 0;
				485
				486	if (spec->n_digits == 0)
				487	/* This case only occurs when using 'c' formatting, we need
				488	to special case it because the grouping code always wants
				489	to have at least one character. */
				490	spec->n_grouped_digits = 0;
				491	else
				492	spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
Victor Stinner	c3cec78	2011-10-05 21:24:08 +0200	[diff] [blame]	493	NULL, PyUnicode_1BYTE_KIND, NULL, 0, NULL,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	494	spec->n_digits, spec->n_min_width,
				495	locale->grouping, locale->thousands_sep);
				496
				497	/* Given the desired width and the total of digit and non-digit
				498	space we consume, see if we need any padding. format->width can
				499	be negative (meaning no padding), but this code still works in
				500	that case. */
				501	n_padding = format->width -
				502	(n_non_digit_non_padding + spec->n_grouped_digits);
				503	if (n_padding > 0) {
				504	/* Some padding is needed. Determine if it's left, space, or right. */
				505	switch (format->align) {
				506	case '<':
				507	spec->n_rpadding = n_padding;
				508	break;
				509	case '^':
				510	spec->n_lpadding = n_padding / 2;
				511	spec->n_rpadding = n_padding - spec->n_lpadding;
				512	break;
				513	case '=':
				514	spec->n_spadding = n_padding;
				515	break;
				516	case '>':
				517	spec->n_lpadding = n_padding;
				518	break;
				519	default:
				520	/* Shouldn't get here, but treat it as '>' */
				521	spec->n_lpadding = n_padding;
				522	assert(0);
				523	break;
				524	}
				525	}
				526	return spec->n_lpadding + spec->n_sign + spec->n_prefix +
				527	spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
				528	spec->n_remainder + spec->n_rpadding;
				529	}
				530
				531	/* Fill in the digit parts of a numbers's string representation,
				532	as determined in calc_number_widths().
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	533	Return -1 on error, or 0 on success. */
				534	static int
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	535	fill_number(PyObject out, Py_ssize_t pos, const NumberFieldWidths spec,
				536	PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	537	PyObject *prefix, Py_ssize_t p_start,
				538	Py_UCS4 fill_char,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	539	LocaleInfo *locale, int toupper)
				540	{
				541	/* Used to keep track of digits, decimal, and remainder. */
				542	Py_ssize_t d_pos = d_start;
				543	unsigned int kind = PyUnicode_KIND(out);
				544	void *data = PyUnicode_DATA(out);
				545
				546	#ifndef NDEBUG
				547	Py_ssize_t r;
				548	#endif
				549
				550	if (spec->n_lpadding) {
Victor Stinner	3fe5531	2012-01-04 00:33:50 +0100	[diff] [blame]	551	PyUnicode_Fill(out, pos, pos + spec->n_lpadding, fill_char);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	552	pos += spec->n_lpadding;
				553	}
				554	if (spec->n_sign == 1) {
				555	PyUnicode_WRITE(kind, data, pos++, spec->sign);
				556	}
				557	if (spec->n_prefix) {
Victor Stinner	fd85c3a	2011-09-28 21:53:49 +0200	[diff] [blame]	558	if (PyUnicode_CopyCharacters(out, pos,
				559	prefix, p_start,
				560	spec->n_prefix) < 0)
				561	return -1;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	562	if (toupper) {
				563	Py_ssize_t t;
Benjamin Peterson	21e0da2	2012-01-11 21:00:42 -0500	[diff] [blame]	564	for (t = 0; t < spec->n_prefix; t++) {
				565	Py_UCS4 c = PyUnicode_READ(kind, data, pos + t);
				566	if (c > 127) {
				567	PyErr_SetString(PyExc_SystemError, "prefix not ASCII");
				568	return -1;
				569	}
				570	PyUnicode_WRITE(kind, data, pos + t, Py_TOUPPER(c));
				571	}
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	572	}
				573	pos += spec->n_prefix;
				574	}
				575	if (spec->n_spadding) {
Victor Stinner	3fe5531	2012-01-04 00:33:50 +0100	[diff] [blame]	576	PyUnicode_Fill(out, pos, pos + spec->n_spadding, fill_char);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	577	pos += spec->n_spadding;
				578	}
				579
				580	/* Only for type 'c' special case, it has no digits. */
				581	if (spec->n_digits != 0) {
				582	/* Fill the digits with InsertThousandsGrouping. */
Victor Stinner	dba2dee	2011-09-28 21:50:42 +0200	[diff] [blame]	583	char *pdigits;
				584	if (PyUnicode_READY(digits))
				585	return -1;
				586	pdigits = PyUnicode_DATA(digits);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	587	if (PyUnicode_KIND(digits) < kind) {
				588	pdigits = _PyUnicode_AsKind(digits, kind);
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	589	if (pdigits == NULL)
				590	return -1;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	591	}
				592	#ifndef NDEBUG
				593	r =
				594	#endif
				595	_PyUnicode_InsertThousandsGrouping(
Victor Stinner	c3cec78	2011-10-05 21:24:08 +0200	[diff] [blame]	596	out, kind,
Martin v. Löwis	c47adb0	2011-10-07 20:55:35 +0200	[diff] [blame]	597	(char)data + kind pos,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	598	spec->n_grouped_digits,
Martin v. Löwis	c47adb0	2011-10-07 20:55:35 +0200	[diff] [blame]	599	pdigits + kind * d_pos,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	600	spec->n_digits, spec->n_min_width,
				601	locale->grouping, locale->thousands_sep);
				602	#ifndef NDEBUG
				603	assert(r == spec->n_grouped_digits);
				604	#endif
				605	if (PyUnicode_KIND(digits) < kind)
				606	PyMem_Free(pdigits);
				607	d_pos += spec->n_digits;
				608	}
				609	if (toupper) {
				610	Py_ssize_t t;
Benjamin Peterson	21e0da2	2012-01-11 21:00:42 -0500	[diff] [blame]	611	for (t = 0; t < spec->n_grouped_digits; t++) {
				612	Py_UCS4 c = PyUnicode_READ(kind, data, pos + t);
				613	if (c > 127) {
				614	PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
				615	return -1;
				616	}
				617	PyUnicode_WRITE(kind, data, pos + t, Py_TOUPPER(c));
				618	}
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	619	}
				620	pos += spec->n_grouped_digits;
				621
				622	if (spec->n_decimal) {
				623	Py_ssize_t t;
				624	for (t = 0; t < spec->n_decimal; ++t)
				625	PyUnicode_WRITE(kind, data, pos + t,
				626	locale->decimal_point[t]);
				627	pos += spec->n_decimal;
				628	d_pos += 1;
				629	}
				630
				631	if (spec->n_remainder) {
Victor Stinner	fd85c3a	2011-09-28 21:53:49 +0200	[diff] [blame]	632	if (PyUnicode_CopyCharacters(out, pos, digits, d_pos, spec->n_remainder) < 0)
				633	return -1;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	634	pos += spec->n_remainder;
				635	d_pos += spec->n_remainder;
				636	}
				637
				638	if (spec->n_rpadding) {
Victor Stinner	3fe5531	2012-01-04 00:33:50 +0100	[diff] [blame]	639	PyUnicode_Fill(out, pos, pos + spec->n_rpadding, fill_char);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	640	pos += spec->n_rpadding;
				641	}
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	642	return 0;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	643	}
				644
				645	static char no_grouping[1] = {CHAR_MAX};
				646
				647	/* Find the decimal point character(s?), thousands_separator(s?), and
				648	grouping description, either for the current locale if type is
				649	LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
				650	none if LT_NO_LOCALE. */
				651	static void
				652	get_locale_info(int type, LocaleInfo *locale_info)
				653	{
				654	switch (type) {
				655	case LT_CURRENT_LOCALE: {
				656	struct lconv *locale_data = localeconv();
				657	locale_info->decimal_point = locale_data->decimal_point;
				658	locale_info->thousands_sep = locale_data->thousands_sep;
				659	locale_info->grouping = locale_data->grouping;
				660	break;
				661	}
				662	case LT_DEFAULT_LOCALE:
				663	locale_info->decimal_point = ".";
				664	locale_info->thousands_sep = ",";
				665	locale_info->grouping = "\3"; /* Group every 3 characters. The
				666	(implicit) trailing 0 means repeat
				667	infinitely. */
				668	break;
				669	case LT_NO_LOCALE:
				670	locale_info->decimal_point = ".";
				671	locale_info->thousands_sep = "";
				672	locale_info->grouping = no_grouping;
				673	break;
				674	default:
				675	assert(0);
				676	}
				677	}
				678
				679	/************************************************************************/
				680	/********* string formatting ****************************************/
				681	/************************************************************************/
				682
				683	static PyObject *
				684	format_string_internal(PyObject value, const InternalFormatSpec format)
				685	{
				686	Py_ssize_t lpad;
				687	Py_ssize_t rpad;
				688	Py_ssize_t total;
				689	Py_ssize_t pos;
Victor Stinner	c4f281e	2011-10-11 22:11:42 +0200	[diff] [blame]	690	Py_ssize_t len = PyUnicode_GET_LENGTH(value);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	691	PyObject *result = NULL;
				692	int maxchar = 127;
				693
				694	/* sign is not allowed on strings */
				695	if (format->sign != '\0') {
				696	PyErr_SetString(PyExc_ValueError,
				697	"Sign not allowed in string format specifier");
				698	goto done;
				699	}
				700
				701	/* alternate is not allowed on strings */
				702	if (format->alternate) {
				703	PyErr_SetString(PyExc_ValueError,
				704	"Alternate form (#) not allowed in string format "
				705	"specifier");
				706	goto done;
				707	}
				708
				709	/* '=' alignment not allowed on strings */
				710	if (format->align == '=') {
				711	PyErr_SetString(PyExc_ValueError,
				712	"'=' alignment not allowed "
				713	"in string format specifier");
				714	goto done;
				715	}
				716
				717	/* if precision is specified, output no more that format.precision
				718	characters */
				719	if (format->precision >= 0 && len >= format->precision) {
				720	len = format->precision;
				721	}
				722
				723	calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
				724
				725	/* allocate the resulting string */
				726	result = PyUnicode_New(total, maxchar);
				727	if (result == NULL)
				728	goto done;
				729
				730	/* Write into that space. First the padding. */
				731	pos = fill_padding(result, 0, len,
				732	format->fill_char=='\0'?' ':format->fill_char,
				733	lpad, rpad);
				734
				735	/* Then the source string. */
Victor Stinner	fd85c3a	2011-09-28 21:53:49 +0200	[diff] [blame]	736	if (PyUnicode_CopyCharacters(result, pos, value, 0, len) < 0)
				737	Py_CLEAR(result);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	738
				739	done:
				740	return result;
				741	}
				742
				743
				744	/************************************************************************/
				745	/********* long formatting ******************************************/
				746	/************************************************************************/
				747
				748	typedef PyObject*
				749	(IntOrLongToString)(PyObject value, int base);
				750
				751	static PyObject *
				752	format_int_or_long_internal(PyObject value, const InternalFormatSpec format,
				753	IntOrLongToString tostring)
				754	{
				755	PyObject *result = NULL;
				756	int maxchar = 127;
				757	PyObject *tmp = NULL;
				758	Py_ssize_t inumeric_chars;
				759	Py_UCS4 sign_char = '\0';
				760	Py_ssize_t n_digits; /* count of digits need from the computed
				761	string */
				762	Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
				763	produces non-digits */
				764	Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
				765	Py_ssize_t n_total;
				766	Py_ssize_t prefix;
				767	NumberFieldWidths spec;
				768	long x;
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	769	int err;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	770
				771	/* Locale settings, either from the actual locale or
				772	from a hard-code pseudo-locale */
				773	LocaleInfo locale;
				774
				775	/* no precision allowed on integers */
				776	if (format->precision != -1) {
				777	PyErr_SetString(PyExc_ValueError,
				778	"Precision not allowed in integer format specifier");
				779	goto done;
				780	}
				781
				782	/* special case for character formatting */
				783	if (format->type == 'c') {
				784	/* error to specify a sign */
				785	if (format->sign != '\0') {
				786	PyErr_SetString(PyExc_ValueError,
				787	"Sign not allowed with integer"
				788	" format specifier 'c'");
				789	goto done;
				790	}
				791
				792	/* taken from unicodeobject.c formatchar() */
				793	/* Integer input truncated to a character */
				794	/* XXX: won't work for int */
				795	x = PyLong_AsLong(value);
				796	if (x == -1 && PyErr_Occurred())
				797	goto done;
				798	if (x < 0 \|\| x > 0x10ffff) {
				799	PyErr_SetString(PyExc_OverflowError,
				800	"%c arg not in range(0x110000) "
				801	"(wide Python build)");
				802	goto done;
				803	}
				804	tmp = PyUnicode_FromOrdinal(x);
				805	inumeric_chars = 0;
				806	n_digits = 1;
				807	if (x > maxchar)
				808	maxchar = x;
				809
				810	/* As a sort-of hack, we tell calc_number_widths that we only
				811	have "remainder" characters. calc_number_widths thinks
				812	these are characters that don't get formatted, only copied
				813	into the output string. We do this for 'c' formatting,
				814	because the characters are likely to be non-digits. */
				815	n_remainder = 1;
				816	}
				817	else {
				818	int base;
				819	int leading_chars_to_skip = 0; /* Number of characters added by
				820	PyNumber_ToBase that we want to
				821	skip over. */
				822
				823	/* Compute the base and how many characters will be added by
				824	PyNumber_ToBase */
				825	switch (format->type) {
				826	case 'b':
				827	base = 2;
				828	leading_chars_to_skip = 2; /* 0b */
				829	break;
				830	case 'o':
				831	base = 8;
				832	leading_chars_to_skip = 2; /* 0o */
				833	break;
				834	case 'x':
				835	case 'X':
				836	base = 16;
				837	leading_chars_to_skip = 2; /* 0x */
				838	break;
				839	default: /* shouldn't be needed, but stops a compiler warning */
				840	case 'd':
				841	case 'n':
				842	base = 10;
				843	break;
				844	}
				845
				846	/* The number of prefix chars is the same as the leading
				847	chars to skip */
				848	if (format->alternate)
				849	n_prefix = leading_chars_to_skip;
				850
				851	/* Do the hard part, converting to a string in a given base */
				852	tmp = tostring(value, base);
				853	if (tmp == NULL \|\| PyUnicode_READY(tmp) == -1)
				854	goto done;
				855
				856	inumeric_chars = 0;
				857	n_digits = PyUnicode_GET_LENGTH(tmp);
				858
				859	prefix = inumeric_chars;
				860
				861	/* Is a sign character present in the output? If so, remember it
				862	and skip it */
				863	if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
				864	sign_char = '-';
				865	++prefix;
				866	++leading_chars_to_skip;
				867	}
				868
				869	/* Skip over the leading chars (0x, 0b, etc.) */
				870	n_digits -= leading_chars_to_skip;
				871	inumeric_chars += leading_chars_to_skip;
				872	}
				873
				874	/* Determine the grouping, separator, and decimal point, if any. */
				875	get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
				876	(format->thousands_separators ?
				877	LT_DEFAULT_LOCALE :
				878	LT_NO_LOCALE),
				879	&locale);
				880
				881	/* Calculate how much memory we'll need. */
				882	n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
				883	inumeric_chars + n_digits, n_remainder, 0, &locale, format);
				884
				885	/* Allocate the memory. */
				886	result = PyUnicode_New(n_total, maxchar);
				887	if (!result)
				888	goto done;
				889
				890	/* Populate the memory. */
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	891	err = fill_number(result, 0, &spec,
				892	tmp, inumeric_chars, inumeric_chars + n_digits,
				893	tmp, prefix,
				894	format->fill_char == '\0' ? ' ' : format->fill_char,
				895	&locale, format->type == 'X');
				896	if (err)
				897	Py_CLEAR(result);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	898
				899	done:
				900	Py_XDECREF(tmp);
				901	return result;
				902	}
				903
				904	/************************************************************************/
				905	/********* float formatting *****************************************/
				906	/************************************************************************/
				907
				908	static PyObject*
				909	strtounicode(char *charbuffer, Py_ssize_t len)
				910	{
				911	return PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, charbuffer, len);
				912	}
				913
				914	/* much of this is taken from unicodeobject.c */
				915	static PyObject *
				916	format_float_internal(PyObject *value,
				917	const InternalFormatSpec *format)
				918	{
				919	char buf = NULL; / buffer returned from PyOS_double_to_string */
				920	Py_ssize_t n_digits;
				921	Py_ssize_t n_remainder;
				922	Py_ssize_t n_total;
				923	int has_decimal;
				924	double val;
				925	Py_ssize_t precision = format->precision;
				926	Py_ssize_t default_precision = 6;
				927	Py_UCS4 type = format->type;
				928	int add_pct = 0;
				929	Py_ssize_t index;
				930	NumberFieldWidths spec;
				931	int flags = 0;
				932	PyObject *result = NULL;
				933	int maxchar = 127;
				934	Py_UCS4 sign_char = '\0';
				935	int float_type; /* Used to see if we have a nan, inf, or regular float. */
				936	PyObject *unicode_tmp = NULL;
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	937	int err;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	938
				939	/* Locale settings, either from the actual locale or
				940	from a hard-code pseudo-locale */
				941	LocaleInfo locale;
				942
				943	if (format->alternate)
				944	flags \|= Py_DTSF_ALT;
				945
				946	if (type == '\0') {
				947	/* Omitted type specifier. Behaves in the same way as repr(x)
				948	and str(x) if no precision is given, else like 'g', but with
				949	at least one digit after the decimal point. */
				950	flags \|= Py_DTSF_ADD_DOT_0;
				951	type = 'r';
				952	default_precision = 0;
				953	}
				954
				955	if (type == 'n')
				956	/* 'n' is the same as 'g', except for the locale used to
				957	format the result. We take care of that later. */
				958	type = 'g';
				959
				960	val = PyFloat_AsDouble(value);
				961	if (val == -1.0 && PyErr_Occurred())
				962	goto done;
				963
				964	if (type == '%') {
				965	type = 'f';
				966	val *= 100;
				967	add_pct = 1;
				968	}
				969
				970	if (precision < 0)
				971	precision = default_precision;
				972	else if (type == 'r')
				973	type = 'g';
				974
				975	/* Cast "type", because if we're in unicode we need to pass a
				976	8-bit char. This is safe, because we've restricted what "type"
				977	can be. */
				978	buf = PyOS_double_to_string(val, (char)type, precision, flags,
				979	&float_type);
				980	if (buf == NULL)
				981	goto done;
				982	n_digits = strlen(buf);
				983
				984	if (add_pct) {
				985	/* We know that buf has a trailing zero (since we just called
				986	strlen() on it), and we don't use that fact any more. So we
				987	can just write over the trailing zero. */
				988	buf[n_digits] = '%';
				989	n_digits += 1;
				990	}
				991
				992	/* Since there is no unicode version of PyOS_double_to_string,
				993	just use the 8 bit version and then convert to unicode. */
				994	unicode_tmp = strtounicode(buf, n_digits);
				995	if (unicode_tmp == NULL)
				996	goto done;
				997	index = 0;
				998
				999	/* Is a sign character present in the output? If so, remember it
				1000	and skip it */
				1001	if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
				1002	sign_char = '-';
				1003	++index;
				1004	--n_digits;
				1005	}
				1006
				1007	/* Determine if we have any "remainder" (after the digits, might include
				1008	decimal or exponent or both (or neither)) */
				1009	parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
				1010
				1011	/* Determine the grouping, separator, and decimal point, if any. */
				1012	get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
				1013	(format->thousands_separators ?
				1014	LT_DEFAULT_LOCALE :
				1015	LT_NO_LOCALE),
				1016	&locale);
				1017
				1018	/* Calculate how much memory we'll need. */
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1019	n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1020	index + n_digits, n_remainder, has_decimal,
				1021	&locale, format);
				1022
				1023	/* Allocate the memory. */
				1024	result = PyUnicode_New(n_total, maxchar);
				1025	if (result == NULL)
				1026	goto done;
				1027
				1028	/* Populate the memory. */
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1029	err = fill_number(result, 0, &spec,
				1030	unicode_tmp, index, index + n_digits,
				1031	NULL, 0,
				1032	format->fill_char == '\0' ? ' ' : format->fill_char,
				1033	&locale, 0);
				1034	if (err)
				1035	Py_CLEAR(result);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1036
				1037	done:
				1038	PyMem_Free(buf);
				1039	Py_DECREF(unicode_tmp);
				1040	return result;
				1041	}
				1042
				1043	/************************************************************************/
				1044	/********* complex formatting ***************************************/
				1045	/************************************************************************/
				1046
				1047	static PyObject *
				1048	format_complex_internal(PyObject *value,
				1049	const InternalFormatSpec *format)
				1050	{
				1051	double re;
				1052	double im;
				1053	char re_buf = NULL; / buffer returned from PyOS_double_to_string */
				1054	char im_buf = NULL; / buffer returned from PyOS_double_to_string */
				1055
				1056	InternalFormatSpec tmp_format = *format;
				1057	Py_ssize_t n_re_digits;
				1058	Py_ssize_t n_im_digits;
				1059	Py_ssize_t n_re_remainder;
				1060	Py_ssize_t n_im_remainder;
				1061	Py_ssize_t n_re_total;
				1062	Py_ssize_t n_im_total;
				1063	int re_has_decimal;
				1064	int im_has_decimal;
				1065	Py_ssize_t precision = format->precision;
				1066	Py_ssize_t default_precision = 6;
				1067	Py_UCS4 type = format->type;
				1068	Py_ssize_t i_re;
				1069	Py_ssize_t i_im;
				1070	NumberFieldWidths re_spec;
				1071	NumberFieldWidths im_spec;
				1072	int flags = 0;
				1073	PyObject *result = NULL;
				1074	int maxchar = 127;
				1075	int rkind;
				1076	void *rdata;
				1077	Py_ssize_t index;
				1078	Py_UCS4 re_sign_char = '\0';
				1079	Py_UCS4 im_sign_char = '\0';
				1080	int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
				1081	int im_float_type;
				1082	int add_parens = 0;
				1083	int skip_re = 0;
				1084	Py_ssize_t lpad;
				1085	Py_ssize_t rpad;
				1086	Py_ssize_t total;
				1087	PyObject *re_unicode_tmp = NULL;
				1088	PyObject *im_unicode_tmp = NULL;
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1089	int err;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1090
				1091	/* Locale settings, either from the actual locale or
				1092	from a hard-code pseudo-locale */
				1093	LocaleInfo locale;
				1094
				1095	/* Zero padding is not allowed. */
				1096	if (format->fill_char == '0') {
				1097	PyErr_SetString(PyExc_ValueError,
				1098	"Zero padding is not allowed in complex format "
				1099	"specifier");
				1100	goto done;
				1101	}
				1102
				1103	/* Neither is '=' alignment . */
				1104	if (format->align == '=') {
				1105	PyErr_SetString(PyExc_ValueError,
				1106	"'=' alignment flag is not allowed in complex format "
				1107	"specifier");
				1108	goto done;
				1109	}
				1110
				1111	re = PyComplex_RealAsDouble(value);
				1112	if (re == -1.0 && PyErr_Occurred())
				1113	goto done;
				1114	im = PyComplex_ImagAsDouble(value);
				1115	if (im == -1.0 && PyErr_Occurred())
				1116	goto done;
				1117
				1118	if (format->alternate)
				1119	flags \|= Py_DTSF_ALT;
				1120
				1121	if (type == '\0') {
				1122	/* Omitted type specifier. Should be like str(self). */
				1123	type = 'r';
				1124	default_precision = 0;
				1125	if (re == 0.0 && copysign(1.0, re) == 1.0)
				1126	skip_re = 1;
				1127	else
				1128	add_parens = 1;
				1129	}
				1130
				1131	if (type == 'n')
				1132	/* 'n' is the same as 'g', except for the locale used to
				1133	format the result. We take care of that later. */
				1134	type = 'g';
				1135
				1136	if (precision < 0)
				1137	precision = default_precision;
				1138	else if (type == 'r')
				1139	type = 'g';
				1140
				1141	/* Cast "type", because if we're in unicode we need to pass a
				1142	8-bit char. This is safe, because we've restricted what "type"
				1143	can be. */
				1144	re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
				1145	&re_float_type);
				1146	if (re_buf == NULL)
				1147	goto done;
				1148	im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
				1149	&im_float_type);
				1150	if (im_buf == NULL)
				1151	goto done;
				1152
				1153	n_re_digits = strlen(re_buf);
				1154	n_im_digits = strlen(im_buf);
				1155
				1156	/* Since there is no unicode version of PyOS_double_to_string,
				1157	just use the 8 bit version and then convert to unicode. */
				1158	re_unicode_tmp = strtounicode(re_buf, n_re_digits);
				1159	if (re_unicode_tmp == NULL)
				1160	goto done;
				1161	i_re = 0;
				1162
				1163	im_unicode_tmp = strtounicode(im_buf, n_im_digits);
				1164	if (im_unicode_tmp == NULL)
				1165	goto done;
				1166	i_im = 0;
				1167
				1168	/* Is a sign character present in the output? If so, remember it
				1169	and skip it */
				1170	if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
				1171	re_sign_char = '-';
				1172	++i_re;
				1173	--n_re_digits;
				1174	}
				1175	if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
				1176	im_sign_char = '-';
				1177	++i_im;
				1178	--n_im_digits;
				1179	}
				1180
				1181	/* Determine if we have any "remainder" (after the digits, might include
				1182	decimal or exponent or both (or neither)) */
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1183	parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1184	&n_re_remainder, &re_has_decimal);
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1185	parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1186	&n_im_remainder, &im_has_decimal);
				1187
				1188	/* Determine the grouping, separator, and decimal point, if any. */
				1189	get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
				1190	(format->thousands_separators ?
				1191	LT_DEFAULT_LOCALE :
				1192	LT_NO_LOCALE),
				1193	&locale);
				1194
				1195	/* Turn off any padding. We'll do it later after we've composed
				1196	the numbers without padding. */
				1197	tmp_format.fill_char = '\0';
				1198	tmp_format.align = '<';
				1199	tmp_format.width = -1;
				1200
				1201	/* Calculate how much memory we'll need. */
				1202	n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
				1203	i_re, i_re + n_re_digits, n_re_remainder,
				1204	re_has_decimal, &locale, &tmp_format);
				1205
				1206	/* Same formatting, but always include a sign, unless the real part is
				1207	* going to be omitted, in which case we use whatever sign convention was
				1208	* requested by the original format. */
				1209	if (!skip_re)
				1210	tmp_format.sign = '+';
				1211	n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
				1212	i_im, i_im + n_im_digits, n_im_remainder,
				1213	im_has_decimal, &locale, &tmp_format);
				1214
				1215	if (skip_re)
				1216	n_re_total = 0;
				1217
				1218	/* Add 1 for the 'j', and optionally 2 for parens. */
				1219	calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
				1220	format->width, format->align, &lpad, &rpad, &total);
				1221
				1222	result = PyUnicode_New(total, maxchar);
				1223	if (result == NULL)
				1224	goto done;
				1225	rkind = PyUnicode_KIND(result);
				1226	rdata = PyUnicode_DATA(result);
				1227
				1228	/* Populate the memory. First, the padding. */
				1229	index = fill_padding(result, 0,
				1230	n_re_total + n_im_total + 1 + add_parens * 2,
				1231	format->fill_char=='\0' ? ' ' : format->fill_char,
				1232	lpad, rpad);
				1233
				1234	if (add_parens)
				1235	PyUnicode_WRITE(rkind, rdata, index++, '(');
				1236
				1237	if (!skip_re) {
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1238	err = fill_number(result, index, &re_spec,
				1239	re_unicode_tmp, i_re, i_re + n_re_digits,
				1240	NULL, 0,
				1241	0,
				1242	&locale, 0);
				1243	if (err) {
				1244	Py_CLEAR(result);
				1245	goto done;
				1246	}
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1247	index += n_re_total;
				1248	}
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1249	err = fill_number(result, index, &im_spec,
				1250	im_unicode_tmp, i_im, i_im + n_im_digits,
				1251	NULL, 0,
				1252	0,
				1253	&locale, 0);
				1254	if (err) {
				1255	Py_CLEAR(result);
				1256	goto done;
				1257	}
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1258	index += n_im_total;
				1259	PyUnicode_WRITE(rkind, rdata, index++, 'j');
				1260
				1261	if (add_parens)
				1262	PyUnicode_WRITE(rkind, rdata, index++, ')');
				1263
				1264	done:
				1265	PyMem_Free(re_buf);
				1266	PyMem_Free(im_buf);
				1267	Py_XDECREF(re_unicode_tmp);
				1268	Py_XDECREF(im_unicode_tmp);
				1269	return result;
				1270	}
				1271
				1272	/************************************************************************/
				1273	/********* built in formatters **************************************/
				1274	/************************************************************************/
				1275	PyObject *
				1276	_PyUnicode_FormatAdvanced(PyObject *obj,
				1277	PyObject *format_spec,
				1278	Py_ssize_t start, Py_ssize_t end)
				1279	{
				1280	InternalFormatSpec format;
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1281	PyObject *result;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1282
				1283	/* check for the special case of zero length format spec, make
				1284	it equivalent to str(obj) */
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1285	if (start == end)
				1286	return PyObject_Str(obj);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1287
				1288	/* parse the format_spec */
				1289	if (!parse_internal_render_format_spec(format_spec, start, end,
				1290	&format, 's', '<'))
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1291	return NULL;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1292
				1293	/* type conversion? */
				1294	switch (format.type) {
				1295	case 's':
				1296	/* no type conversion needed, already a string. do the formatting */
				1297	result = format_string_internal(obj, &format);
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1298	if (result != NULL)
				1299	assert(_PyUnicode_CheckConsistency(result, 1));
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1300	break;
				1301	default:
				1302	/* unknown */
				1303	unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1304	result = NULL;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1305	}
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1306	return result;
				1307	}
				1308
				1309	static PyObject*
				1310	format_int_or_long(PyObject* obj, PyObject* format_spec,
				1311	Py_ssize_t start, Py_ssize_t end,
				1312	IntOrLongToString tostring)
				1313	{
				1314	PyObject *result = NULL;
				1315	PyObject *tmp = NULL;
				1316	InternalFormatSpec format;
				1317
				1318	/* check for the special case of zero length format spec, make
				1319	it equivalent to str(obj) */
				1320	if (start == end) {
				1321	result = PyObject_Str(obj);
				1322	goto done;
				1323	}
				1324
				1325	/* parse the format_spec */
				1326	if (!parse_internal_render_format_spec(format_spec, start, end,
				1327	&format, 'd', '>'))
				1328	goto done;
				1329
				1330	/* type conversion? */
				1331	switch (format.type) {
				1332	case 'b':
				1333	case 'c':
				1334	case 'd':
				1335	case 'o':
				1336	case 'x':
				1337	case 'X':
				1338	case 'n':
				1339	/* no type conversion needed, already an int (or long). do
				1340	the formatting */
				1341	result = format_int_or_long_internal(obj, &format, tostring);
				1342	break;
				1343
				1344	case 'e':
				1345	case 'E':
				1346	case 'f':
				1347	case 'F':
				1348	case 'g':
				1349	case 'G':
				1350	case '%':
				1351	/* convert to float */
				1352	tmp = PyNumber_Float(obj);
				1353	if (tmp == NULL)
				1354	goto done;
				1355	result = format_float_internal(tmp, &format);
				1356	break;
				1357
				1358	default:
				1359	/* unknown */
				1360	unknown_presentation_type(format.type, obj->ob_type->tp_name);
				1361	goto done;
				1362	}
				1363
				1364	done:
				1365	Py_XDECREF(tmp);
				1366	return result;
				1367	}
				1368
				1369	/* Need to define long_format as a function that will convert a long
				1370	to a string. In 3.0, _PyLong_Format has the correct signature. */
				1371	#define long_format _PyLong_Format
				1372
				1373	PyObject *
				1374	_PyLong_FormatAdvanced(PyObject *obj,
				1375	PyObject *format_spec,
				1376	Py_ssize_t start, Py_ssize_t end)
				1377	{
				1378	return format_int_or_long(obj, format_spec, start, end,
				1379	long_format);
				1380	}
				1381
				1382	PyObject *
				1383	_PyFloat_FormatAdvanced(PyObject *obj,
				1384	PyObject *format_spec,
				1385	Py_ssize_t start, Py_ssize_t end)
				1386	{
				1387	PyObject *result = NULL;
				1388	InternalFormatSpec format;
				1389
				1390	/* check for the special case of zero length format spec, make
				1391	it equivalent to str(obj) */
				1392	if (start == end) {
				1393	result = PyObject_Str(obj);
				1394	goto done;
				1395	}
				1396
				1397	/* parse the format_spec */
				1398	if (!parse_internal_render_format_spec(format_spec, start, end,
				1399	&format, '\0', '>'))
				1400	goto done;
				1401
				1402	/* type conversion? */
				1403	switch (format.type) {
				1404	case '\0': /* No format code: like 'g', but with at least one decimal. */
				1405	case 'e':
				1406	case 'E':
				1407	case 'f':
				1408	case 'F':
				1409	case 'g':
				1410	case 'G':
				1411	case 'n':
				1412	case '%':
				1413	/* no conversion, already a float. do the formatting */
				1414	result = format_float_internal(obj, &format);
				1415	break;
				1416
				1417	default:
				1418	/* unknown */
				1419	unknown_presentation_type(format.type, obj->ob_type->tp_name);
				1420	goto done;
				1421	}
				1422
				1423	done:
				1424	return result;
				1425	}
				1426
				1427	PyObject *
				1428	_PyComplex_FormatAdvanced(PyObject *obj,
				1429	PyObject *format_spec,
				1430	Py_ssize_t start, Py_ssize_t end)
				1431	{
				1432	PyObject *result = NULL;
				1433	InternalFormatSpec format;
				1434
				1435	/* check for the special case of zero length format spec, make
				1436	it equivalent to str(obj) */
				1437	if (start == end) {
				1438	result = PyObject_Str(obj);
				1439	goto done;
				1440	}
				1441
				1442	/* parse the format_spec */
				1443	if (!parse_internal_render_format_spec(format_spec, start, end,
				1444	&format, '\0', '>'))
				1445	goto done;
				1446
				1447	/* type conversion? */
				1448	switch (format.type) {
				1449	case '\0': /* No format code: like 'g', but with at least one decimal. */
				1450	case 'e':
				1451	case 'E':
				1452	case 'f':
				1453	case 'F':
				1454	case 'g':
				1455	case 'G':
				1456	case 'n':
				1457	/* no conversion, already a complex. do the formatting */
				1458	result = format_complex_internal(obj, &format);
				1459	break;
				1460
				1461	default:
				1462	/* unknown */
				1463	unknown_presentation_type(format.type, obj->ob_type->tp_name);
				1464	goto done;
				1465	}
				1466
				1467	done:
				1468	return result;
				1469	}