Blame - Python/formatter_unicode.c - platform/external/python/cpython3

blob: ef0151192b76925476917b8f5b6812f9b914d42e [file] [log] [blame]

Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	1	/* implements the unicode (as opposed to string) version of the
				2	built-in formatters for string, int, float. that is, the versions
				3	of int.__float__, etc., that take and return unicode objects */
				4
				5	#include "Python.h"
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	6	#include <locale.h>
				7
				8	/* Raises an exception about an unknown presentation type for this
				9	* type. */
				10
				11	static void
				12	unknown_presentation_type(Py_UCS4 presentation_type,
				13	const char* type_name)
				14	{
				15	/* %c might be out-of-range, hence the two cases. */
				16	if (presentation_type > 32 && presentation_type < 128)
				17	PyErr_Format(PyExc_ValueError,
				18	"Unknown format code '%c' "
				19	"for object of type '%.200s'",
				20	(char)presentation_type,
				21	type_name);
				22	else
				23	PyErr_Format(PyExc_ValueError,
				24	"Unknown format code '\\x%x' "
				25	"for object of type '%.200s'",
				26	(unsigned int)presentation_type,
				27	type_name);
				28	}
				29
				30	static void
				31	invalid_comma_type(Py_UCS4 presentation_type)
				32	{
				33	if (presentation_type > 32 && presentation_type < 128)
				34	PyErr_Format(PyExc_ValueError,
				35	"Cannot specify ',' with '%c'.",
				36	(char)presentation_type);
				37	else
				38	PyErr_Format(PyExc_ValueError,
				39	"Cannot specify ',' with '\\x%x'.",
				40	(unsigned int)presentation_type);
				41	}
				42
				43	/*
				44	get_integer consumes 0 or more decimal digit characters from an
				45	input string, updates *result with the corresponding positive
				46	integer, and returns the number of digits consumed.
				47
				48	returns -1 on error.
				49	*/
				50	static int
				51	get_integer(PyObject str, Py_ssize_t pos, Py_ssize_t end,
				52	Py_ssize_t *result)
				53	{
Mark Dickinson	47862d4	2011-12-01 15:27:04 +0000	[diff] [blame]	54	Py_ssize_t accumulator, digitval;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	55	int numdigits;
				56	accumulator = numdigits = 0;
				57	for (;;(*pos)++, numdigits++) {
				58	if (*pos >= end)
				59	break;
				60	digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str, *pos));
				61	if (digitval < 0)
				62	break;
				63	/*
Mark Dickinson	47862d4	2011-12-01 15:27:04 +0000	[diff] [blame]	64	Detect possible overflow before it happens:
				65
				66	accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
				67	accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	68	*/
Mark Dickinson	47862d4	2011-12-01 15:27:04 +0000	[diff] [blame]	69	if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	70	PyErr_Format(PyExc_ValueError,
				71	"Too many decimal digits in format string");
				72	return -1;
				73	}
Mark Dickinson	47862d4	2011-12-01 15:27:04 +0000	[diff] [blame]	74	accumulator = accumulator * 10 + digitval;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	75	}
				76	*result = accumulator;
				77	return numdigits;
				78	}
				79
				80	/************************************************************************/
				81	/********* standard format specifier parsing ************************/
				82	/************************************************************************/
				83
				84	/* returns true if this character is a specifier alignment token */
				85	Py_LOCAL_INLINE(int)
				86	is_alignment_token(Py_UCS4 c)
				87	{
				88	switch (c) {
				89	case '<': case '>': case '=': case '^':
				90	return 1;
				91	default:
				92	return 0;
				93	}
				94	}
				95
				96	/* returns true if this character is a sign element */
				97	Py_LOCAL_INLINE(int)
				98	is_sign_element(Py_UCS4 c)
				99	{
				100	switch (c) {
				101	case ' ': case '+': case '-':
				102	return 1;
				103	default:
				104	return 0;
				105	}
				106	}
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	107
Eric Smith	4a7d76d	2008-05-30 18:10:19 +0000	[diff] [blame]	108
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	109	typedef struct {
				110	Py_UCS4 fill_char;
				111	Py_UCS4 align;
				112	int alternate;
				113	Py_UCS4 sign;
				114	Py_ssize_t width;
				115	int thousands_separators;
				116	Py_ssize_t precision;
				117	Py_UCS4 type;
				118	} InternalFormatSpec;
Eric Smith	4a7d76d	2008-05-30 18:10:19 +0000	[diff] [blame]	119
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	120	#if 0
				121	/* Occassionally useful for debugging. Should normally be commented out. */
				122	static void
				123	DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
				124	{
				125	printf("internal format spec: fill_char %d\n", format->fill_char);
				126	printf("internal format spec: align %d\n", format->align);
				127	printf("internal format spec: alternate %d\n", format->alternate);
				128	printf("internal format spec: sign %d\n", format->sign);
				129	printf("internal format spec: width %zd\n", format->width);
				130	printf("internal format spec: thousands_separators %d\n",
				131	format->thousands_separators);
				132	printf("internal format spec: precision %zd\n", format->precision);
				133	printf("internal format spec: type %c\n", format->type);
				134	printf("\n");
				135	}
				136	#endif
				137
				138
				139	/*
				140	ptr points to the start of the format_spec, end points just past its end.
				141	fills in format with the parsed information.
				142	returns 1 on success, 0 on failure.
				143	if failure, sets the exception
				144	*/
				145	static int
				146	parse_internal_render_format_spec(PyObject *format_spec,
				147	Py_ssize_t start, Py_ssize_t end,
				148	InternalFormatSpec *format,
				149	char default_type,
				150	char default_align)
				151	{
				152	Py_ssize_t pos = start;
				153	/* end-pos is used throughout this code to specify the length of
				154	the input string */
				155	#define READ_spec(index) PyUnicode_READ_CHAR(format_spec, index)
				156
				157	Py_ssize_t consumed;
				158	int align_specified = 0;
				159
				160	format->fill_char = '\0';
				161	format->align = default_align;
				162	format->alternate = 0;
				163	format->sign = '\0';
				164	format->width = -1;
				165	format->thousands_separators = 0;
				166	format->precision = -1;
				167	format->type = default_type;
				168
				169	/* If the second char is an alignment token,
				170	then parse the fill char */
				171	if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
				172	format->align = READ_spec(pos+1);
				173	format->fill_char = READ_spec(pos);
				174	align_specified = 1;
				175	pos += 2;
				176	}
				177	else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
				178	format->align = READ_spec(pos);
				179	align_specified = 1;
				180	++pos;
				181	}
				182
				183	/* Parse the various sign options */
				184	if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
				185	format->sign = READ_spec(pos);
				186	++pos;
				187	}
				188
				189	/* If the next character is #, we're in alternate mode. This only
				190	applies to integers. */
				191	if (end-pos >= 1 && READ_spec(pos) == '#') {
				192	format->alternate = 1;
				193	++pos;
				194	}
				195
				196	/* The special case for 0-padding (backwards compat) */
				197	if (format->fill_char == '\0' && end-pos >= 1 && READ_spec(pos) == '0') {
				198	format->fill_char = '0';
				199	if (!align_specified) {
				200	format->align = '=';
				201	}
				202	++pos;
				203	}
				204
				205	consumed = get_integer(format_spec, &pos, end, &format->width);
				206	if (consumed == -1)
				207	/* Overflow error. Exception already set. */
				208	return 0;
				209
				210	/* If consumed is 0, we didn't consume any characters for the
				211	width. In that case, reset the width to -1, because
				212	get_integer() will have set it to zero. -1 is how we record
				213	that the width wasn't specified. */
				214	if (consumed == 0)
				215	format->width = -1;
				216
				217	/* Comma signifies add thousands separators */
				218	if (end-pos && READ_spec(pos) == ',') {
				219	format->thousands_separators = 1;
				220	++pos;
				221	}
				222
				223	/* Parse field precision */
				224	if (end-pos && READ_spec(pos) == '.') {
				225	++pos;
				226
				227	consumed = get_integer(format_spec, &pos, end, &format->precision);
				228	if (consumed == -1)
				229	/* Overflow error. Exception already set. */
				230	return 0;
				231
				232	/* Not having a precision after a dot is an error. */
				233	if (consumed == 0) {
				234	PyErr_Format(PyExc_ValueError,
				235	"Format specifier missing precision");
				236	return 0;
				237	}
				238
				239	}
				240
				241	/* Finally, parse the type field. */
				242
				243	if (end-pos > 1) {
				244	/* More than one char remain, invalid conversion spec. */
				245	PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
				246	return 0;
				247	}
				248
				249	if (end-pos == 1) {
				250	format->type = READ_spec(pos);
				251	++pos;
				252	}
				253
				254	/* Do as much validating as we can, just by looking at the format
				255	specifier. Do not take into account what type of formatting
				256	we're doing (int, float, string). */
				257
				258	if (format->thousands_separators) {
				259	switch (format->type) {
				260	case 'd':
				261	case 'e':
				262	case 'f':
				263	case 'g':
				264	case 'E':
				265	case 'G':
				266	case '%':
				267	case 'F':
				268	case '\0':
				269	/* These are allowed. See PEP 378.*/
				270	break;
				271	default:
				272	invalid_comma_type(format->type);
				273	return 0;
				274	}
				275	}
				276
				277	if (format->fill_char > 127 \|\| format->align > 127 \|\|
				278	format->sign > 127) {
				279	PyErr_SetString(PyExc_ValueError, "fill character too large");
				280	return 0;
				281	}
				282
				283	return 1;
				284	}
				285
				286	/* Calculate the padding needed. */
				287	static void
				288	calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
				289	Py_ssize_t n_lpadding, Py_ssize_t n_rpadding,
				290	Py_ssize_t *n_total)
				291	{
				292	if (width >= 0) {
				293	if (nchars > width)
				294	*n_total = nchars;
				295	else
				296	*n_total = width;
				297	}
				298	else {
				299	/* not specified, use all of the chars and no more */
				300	*n_total = nchars;
				301	}
				302
				303	/* Figure out how much leading space we need, based on the
				304	aligning */
				305	if (align == '>')
				306	n_lpadding = n_total - nchars;
				307	else if (align == '^')
				308	n_lpadding = (n_total - nchars) / 2;
				309	else if (align == '<' \|\| align == '=')
				310	*n_lpadding = 0;
				311	else {
				312	/* We should never have an unspecified alignment. */
				313	*n_lpadding = 0;
				314	assert(0);
				315	}
				316
				317	n_rpadding = n_total - nchars - *n_lpadding;
				318	}
				319
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	320	/* Do the padding, and return a pointer to where the caller-supplied
				321	content goes. */
				322	static Py_ssize_t
				323	fill_padding(PyObject *s, Py_ssize_t start, Py_ssize_t nchars,
				324	Py_UCS4 fill_char, Py_ssize_t n_lpadding,
				325	Py_ssize_t n_rpadding)
				326	{
				327	/* Pad on left. */
				328	if (n_lpadding)
Victor Stinner	3fe5531	2012-01-04 00:33:50 +0100	[diff] [blame]	329	PyUnicode_Fill(s, start, start + n_lpadding, fill_char);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	330
				331	/* Pad on right. */
				332	if (n_rpadding)
Victor Stinner	3fe5531	2012-01-04 00:33:50 +0100	[diff] [blame]	333	PyUnicode_Fill(s, start + nchars + n_lpadding,
				334	start + nchars + n_lpadding + n_rpadding, fill_char);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	335
				336	/* Pointer to the user content. */
				337	return start + n_lpadding;
				338	}
				339
				340	/************************************************************************/
				341	/********* common routines for numeric formatting *******************/
				342	/************************************************************************/
				343
				344	/* Locale type codes. */
				345	#define LT_CURRENT_LOCALE 0
				346	#define LT_DEFAULT_LOCALE 1
				347	#define LT_NO_LOCALE 2
				348
				349	/* Locale info needed for formatting integers and the part of floats
				350	before and including the decimal. Note that locales only support
				351	8-bit chars, not unicode. */
				352	typedef struct {
				353	char *decimal_point;
				354	char *thousands_sep;
				355	char *grouping;
				356	} LocaleInfo;
				357
				358	/* describes the layout for an integer, see the comment in
				359	calc_number_widths() for details */
				360	typedef struct {
				361	Py_ssize_t n_lpadding;
				362	Py_ssize_t n_prefix;
				363	Py_ssize_t n_spadding;
				364	Py_ssize_t n_rpadding;
				365	char sign;
				366	Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
				367	Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
				368	any grouping chars. */
				369	Py_ssize_t n_decimal; /* 0 if only an integer */
				370	Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
				371	excluding the decimal itself, if
				372	present. */
				373
				374	/* These 2 are not the widths of fields, but are needed by
				375	STRINGLIB_GROUPING. */
				376	Py_ssize_t n_digits; /* The number of digits before a decimal
				377	or exponent. */
				378	Py_ssize_t n_min_width; /* The min_width we used when we computed
				379	the n_grouped_digits width. */
				380	} NumberFieldWidths;
				381
				382
				383	/* Given a number of the form:
				384	digits[remainder]
				385	where ptr points to the start and end points to the end, find where
				386	the integer part ends. This could be a decimal, an exponent, both,
				387	or neither.
				388	If a decimal point is present, set *has_decimal and increment
				389	remainder beyond it.
				390	Results are undefined (but shouldn't crash) for improperly
				391	formatted strings.
				392	*/
				393	static void
				394	parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
				395	Py_ssize_t n_remainder, int has_decimal)
				396	{
				397	Py_ssize_t remainder;
				398
				399	while (pos<end && isdigit(PyUnicode_READ_CHAR(s, pos)))
				400	++pos;
				401	remainder = pos;
				402
				403	/* Does remainder start with a decimal point? */
				404	*has_decimal = pos<end && PyUnicode_READ_CHAR(s, remainder) == '.';
				405
				406	/* Skip the decimal point. */
				407	if (*has_decimal)
				408	remainder++;
				409
				410	*n_remainder = end - remainder;
				411	}
				412
				413	/* not all fields of format are used. for example, precision is
				414	unused. should this take discrete params in order to be more clear
				415	about what it does? or is passing a single format parameter easier
				416	and more efficient enough to justify a little obfuscation? */
				417	static Py_ssize_t
				418	calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
				419	Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
				420	Py_ssize_t n_end, Py_ssize_t n_remainder,
				421	int has_decimal, const LocaleInfo *locale,
				422	const InternalFormatSpec *format)
				423	{
				424	Py_ssize_t n_non_digit_non_padding;
				425	Py_ssize_t n_padding;
				426
				427	spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
				428	spec->n_lpadding = 0;
				429	spec->n_prefix = n_prefix;
				430	spec->n_decimal = has_decimal ? strlen(locale->decimal_point) : 0;
				431	spec->n_remainder = n_remainder;
				432	spec->n_spadding = 0;
				433	spec->n_rpadding = 0;
				434	spec->sign = '\0';
				435	spec->n_sign = 0;
				436
				437	/* the output will look like:
				438	\| \|
				439	\| <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> \|
				440	\| \|
				441
				442	sign is computed from format->sign and the actual
				443	sign of the number
				444
				445	prefix is given (it's for the '0x' prefix)
				446
				447	digits is already known
				448
				449	the total width is either given, or computed from the
				450	actual digits
				451
				452	only one of lpadding, spadding, and rpadding can be non-zero,
				453	and it's calculated from the width and other fields
				454	*/
				455
				456	/* compute the various parts we're going to write */
				457	switch (format->sign) {
				458	case '+':
				459	/* always put a + or - */
				460	spec->n_sign = 1;
				461	spec->sign = (sign_char == '-' ? '-' : '+');
				462	break;
				463	case ' ':
				464	spec->n_sign = 1;
				465	spec->sign = (sign_char == '-' ? '-' : ' ');
				466	break;
				467	default:
				468	/* Not specified, or the default (-) */
				469	if (sign_char == '-') {
				470	spec->n_sign = 1;
				471	spec->sign = '-';
				472	}
				473	}
				474
				475	/* The number of chars used for non-digits and non-padding. */
				476	n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
				477	spec->n_remainder;
				478
				479	/* min_width can go negative, that's okay. format->width == -1 means
				480	we don't care. */
				481	if (format->fill_char == '0' && format->align == '=')
				482	spec->n_min_width = format->width - n_non_digit_non_padding;
				483	else
				484	spec->n_min_width = 0;
				485
				486	if (spec->n_digits == 0)
				487	/* This case only occurs when using 'c' formatting, we need
				488	to special case it because the grouping code always wants
				489	to have at least one character. */
				490	spec->n_grouped_digits = 0;
				491	else
				492	spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
Victor Stinner	c3cec78	2011-10-05 21:24:08 +0200	[diff] [blame]	493	NULL, PyUnicode_1BYTE_KIND, NULL, 0, NULL,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	494	spec->n_digits, spec->n_min_width,
				495	locale->grouping, locale->thousands_sep);
				496
				497	/* Given the desired width and the total of digit and non-digit
				498	space we consume, see if we need any padding. format->width can
				499	be negative (meaning no padding), but this code still works in
				500	that case. */
				501	n_padding = format->width -
				502	(n_non_digit_non_padding + spec->n_grouped_digits);
				503	if (n_padding > 0) {
				504	/* Some padding is needed. Determine if it's left, space, or right. */
				505	switch (format->align) {
				506	case '<':
				507	spec->n_rpadding = n_padding;
				508	break;
				509	case '^':
				510	spec->n_lpadding = n_padding / 2;
				511	spec->n_rpadding = n_padding - spec->n_lpadding;
				512	break;
				513	case '=':
				514	spec->n_spadding = n_padding;
				515	break;
				516	case '>':
				517	spec->n_lpadding = n_padding;
				518	break;
				519	default:
				520	/* Shouldn't get here, but treat it as '>' */
				521	spec->n_lpadding = n_padding;
				522	assert(0);
				523	break;
				524	}
				525	}
				526	return spec->n_lpadding + spec->n_sign + spec->n_prefix +
				527	spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
				528	spec->n_remainder + spec->n_rpadding;
				529	}
				530
				531	/* Fill in the digit parts of a numbers's string representation,
				532	as determined in calc_number_widths().
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	533	Return -1 on error, or 0 on success. */
				534	static int
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	535	fill_number(PyObject out, Py_ssize_t pos, const NumberFieldWidths spec,
				536	PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	537	PyObject *prefix, Py_ssize_t p_start,
				538	Py_UCS4 fill_char,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	539	LocaleInfo *locale, int toupper)
				540	{
				541	/* Used to keep track of digits, decimal, and remainder. */
				542	Py_ssize_t d_pos = d_start;
				543	unsigned int kind = PyUnicode_KIND(out);
				544	void *data = PyUnicode_DATA(out);
				545
				546	#ifndef NDEBUG
				547	Py_ssize_t r;
				548	#endif
				549
				550	if (spec->n_lpadding) {
Victor Stinner	3fe5531	2012-01-04 00:33:50 +0100	[diff] [blame]	551	PyUnicode_Fill(out, pos, pos + spec->n_lpadding, fill_char);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	552	pos += spec->n_lpadding;
				553	}
				554	if (spec->n_sign == 1) {
				555	PyUnicode_WRITE(kind, data, pos++, spec->sign);
				556	}
				557	if (spec->n_prefix) {
Victor Stinner	fd85c3a	2011-09-28 21:53:49 +0200	[diff] [blame]	558	if (PyUnicode_CopyCharacters(out, pos,
				559	prefix, p_start,
				560	spec->n_prefix) < 0)
				561	return -1;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	562	if (toupper) {
				563	Py_ssize_t t;
				564	/* XXX if the upper-case prefix is wider than the target
				565	buffer, the caller should have allocated a wider string,
				566	but currently doesn't. */
				567	for (t = 0; t < spec->n_prefix; ++t)
				568	PyUnicode_WRITE(kind, data, pos + t,
				569	Py_UNICODE_TOUPPER(
				570	PyUnicode_READ(kind, data, pos + t)));
				571	}
				572	pos += spec->n_prefix;
				573	}
				574	if (spec->n_spadding) {
Victor Stinner	3fe5531	2012-01-04 00:33:50 +0100	[diff] [blame]	575	PyUnicode_Fill(out, pos, pos + spec->n_spadding, fill_char);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	576	pos += spec->n_spadding;
				577	}
				578
				579	/* Only for type 'c' special case, it has no digits. */
				580	if (spec->n_digits != 0) {
				581	/* Fill the digits with InsertThousandsGrouping. */
Victor Stinner	dba2dee	2011-09-28 21:50:42 +0200	[diff] [blame]	582	char *pdigits;
				583	if (PyUnicode_READY(digits))
				584	return -1;
				585	pdigits = PyUnicode_DATA(digits);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	586	if (PyUnicode_KIND(digits) < kind) {
				587	pdigits = _PyUnicode_AsKind(digits, kind);
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	588	if (pdigits == NULL)
				589	return -1;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	590	}
				591	#ifndef NDEBUG
				592	r =
				593	#endif
				594	_PyUnicode_InsertThousandsGrouping(
Victor Stinner	c3cec78	2011-10-05 21:24:08 +0200	[diff] [blame]	595	out, kind,
Martin v. Löwis	c47adb0	2011-10-07 20:55:35 +0200	[diff] [blame]	596	(char)data + kind pos,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	597	spec->n_grouped_digits,
Martin v. Löwis	c47adb0	2011-10-07 20:55:35 +0200	[diff] [blame]	598	pdigits + kind * d_pos,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	599	spec->n_digits, spec->n_min_width,
				600	locale->grouping, locale->thousands_sep);
				601	#ifndef NDEBUG
				602	assert(r == spec->n_grouped_digits);
				603	#endif
				604	if (PyUnicode_KIND(digits) < kind)
				605	PyMem_Free(pdigits);
				606	d_pos += spec->n_digits;
				607	}
				608	if (toupper) {
				609	Py_ssize_t t;
				610	for (t = 0; t < spec->n_grouped_digits; ++t)
				611	PyUnicode_WRITE(kind, data, pos + t,
				612	Py_UNICODE_TOUPPER(
				613	PyUnicode_READ(kind, data, pos + t)));
				614	}
				615	pos += spec->n_grouped_digits;
				616
				617	if (spec->n_decimal) {
				618	Py_ssize_t t;
				619	for (t = 0; t < spec->n_decimal; ++t)
				620	PyUnicode_WRITE(kind, data, pos + t,
				621	locale->decimal_point[t]);
				622	pos += spec->n_decimal;
				623	d_pos += 1;
				624	}
				625
				626	if (spec->n_remainder) {
Victor Stinner	fd85c3a	2011-09-28 21:53:49 +0200	[diff] [blame]	627	if (PyUnicode_CopyCharacters(out, pos, digits, d_pos, spec->n_remainder) < 0)
				628	return -1;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	629	pos += spec->n_remainder;
				630	d_pos += spec->n_remainder;
				631	}
				632
				633	if (spec->n_rpadding) {
Victor Stinner	3fe5531	2012-01-04 00:33:50 +0100	[diff] [blame]	634	PyUnicode_Fill(out, pos, pos + spec->n_rpadding, fill_char);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	635	pos += spec->n_rpadding;
				636	}
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	637	return 0;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	638	}
				639
				640	static char no_grouping[1] = {CHAR_MAX};
				641
				642	/* Find the decimal point character(s?), thousands_separator(s?), and
				643	grouping description, either for the current locale if type is
				644	LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
				645	none if LT_NO_LOCALE. */
				646	static void
				647	get_locale_info(int type, LocaleInfo *locale_info)
				648	{
				649	switch (type) {
				650	case LT_CURRENT_LOCALE: {
				651	struct lconv *locale_data = localeconv();
				652	locale_info->decimal_point = locale_data->decimal_point;
				653	locale_info->thousands_sep = locale_data->thousands_sep;
				654	locale_info->grouping = locale_data->grouping;
				655	break;
				656	}
				657	case LT_DEFAULT_LOCALE:
				658	locale_info->decimal_point = ".";
				659	locale_info->thousands_sep = ",";
				660	locale_info->grouping = "\3"; /* Group every 3 characters. The
				661	(implicit) trailing 0 means repeat
				662	infinitely. */
				663	break;
				664	case LT_NO_LOCALE:
				665	locale_info->decimal_point = ".";
				666	locale_info->thousands_sep = "";
				667	locale_info->grouping = no_grouping;
				668	break;
				669	default:
				670	assert(0);
				671	}
				672	}
				673
				674	/************************************************************************/
				675	/********* string formatting ****************************************/
				676	/************************************************************************/
				677
				678	static PyObject *
				679	format_string_internal(PyObject value, const InternalFormatSpec format)
				680	{
				681	Py_ssize_t lpad;
				682	Py_ssize_t rpad;
				683	Py_ssize_t total;
				684	Py_ssize_t pos;
Victor Stinner	c4f281e	2011-10-11 22:11:42 +0200	[diff] [blame]	685	Py_ssize_t len = PyUnicode_GET_LENGTH(value);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	686	PyObject *result = NULL;
				687	int maxchar = 127;
				688
				689	/* sign is not allowed on strings */
				690	if (format->sign != '\0') {
				691	PyErr_SetString(PyExc_ValueError,
				692	"Sign not allowed in string format specifier");
				693	goto done;
				694	}
				695
				696	/* alternate is not allowed on strings */
				697	if (format->alternate) {
				698	PyErr_SetString(PyExc_ValueError,
				699	"Alternate form (#) not allowed in string format "
				700	"specifier");
				701	goto done;
				702	}
				703
				704	/* '=' alignment not allowed on strings */
				705	if (format->align == '=') {
				706	PyErr_SetString(PyExc_ValueError,
				707	"'=' alignment not allowed "
				708	"in string format specifier");
				709	goto done;
				710	}
				711
				712	/* if precision is specified, output no more that format.precision
				713	characters */
				714	if (format->precision >= 0 && len >= format->precision) {
				715	len = format->precision;
				716	}
				717
				718	calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
				719
				720	/* allocate the resulting string */
				721	result = PyUnicode_New(total, maxchar);
				722	if (result == NULL)
				723	goto done;
				724
				725	/* Write into that space. First the padding. */
				726	pos = fill_padding(result, 0, len,
				727	format->fill_char=='\0'?' ':format->fill_char,
				728	lpad, rpad);
				729
				730	/* Then the source string. */
Victor Stinner	fd85c3a	2011-09-28 21:53:49 +0200	[diff] [blame]	731	if (PyUnicode_CopyCharacters(result, pos, value, 0, len) < 0)
				732	Py_CLEAR(result);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	733
				734	done:
				735	return result;
				736	}
				737
				738
				739	/************************************************************************/
				740	/********* long formatting ******************************************/
				741	/************************************************************************/
				742
				743	typedef PyObject*
				744	(IntOrLongToString)(PyObject value, int base);
				745
				746	static PyObject *
				747	format_int_or_long_internal(PyObject value, const InternalFormatSpec format,
				748	IntOrLongToString tostring)
				749	{
				750	PyObject *result = NULL;
				751	int maxchar = 127;
				752	PyObject *tmp = NULL;
				753	Py_ssize_t inumeric_chars;
				754	Py_UCS4 sign_char = '\0';
				755	Py_ssize_t n_digits; /* count of digits need from the computed
				756	string */
				757	Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
				758	produces non-digits */
				759	Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
				760	Py_ssize_t n_total;
				761	Py_ssize_t prefix;
				762	NumberFieldWidths spec;
				763	long x;
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	764	int err;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	765
				766	/* Locale settings, either from the actual locale or
				767	from a hard-code pseudo-locale */
				768	LocaleInfo locale;
				769
				770	/* no precision allowed on integers */
				771	if (format->precision != -1) {
				772	PyErr_SetString(PyExc_ValueError,
				773	"Precision not allowed in integer format specifier");
				774	goto done;
				775	}
				776
				777	/* special case for character formatting */
				778	if (format->type == 'c') {
				779	/* error to specify a sign */
				780	if (format->sign != '\0') {
				781	PyErr_SetString(PyExc_ValueError,
				782	"Sign not allowed with integer"
				783	" format specifier 'c'");
				784	goto done;
				785	}
				786
				787	/* taken from unicodeobject.c formatchar() */
				788	/* Integer input truncated to a character */
				789	/* XXX: won't work for int */
				790	x = PyLong_AsLong(value);
				791	if (x == -1 && PyErr_Occurred())
				792	goto done;
				793	if (x < 0 \|\| x > 0x10ffff) {
				794	PyErr_SetString(PyExc_OverflowError,
				795	"%c arg not in range(0x110000) "
				796	"(wide Python build)");
				797	goto done;
				798	}
				799	tmp = PyUnicode_FromOrdinal(x);
				800	inumeric_chars = 0;
				801	n_digits = 1;
				802	if (x > maxchar)
				803	maxchar = x;
				804
				805	/* As a sort-of hack, we tell calc_number_widths that we only
				806	have "remainder" characters. calc_number_widths thinks
				807	these are characters that don't get formatted, only copied
				808	into the output string. We do this for 'c' formatting,
				809	because the characters are likely to be non-digits. */
				810	n_remainder = 1;
				811	}
				812	else {
				813	int base;
				814	int leading_chars_to_skip = 0; /* Number of characters added by
				815	PyNumber_ToBase that we want to
				816	skip over. */
				817
				818	/* Compute the base and how many characters will be added by
				819	PyNumber_ToBase */
				820	switch (format->type) {
				821	case 'b':
				822	base = 2;
				823	leading_chars_to_skip = 2; /* 0b */
				824	break;
				825	case 'o':
				826	base = 8;
				827	leading_chars_to_skip = 2; /* 0o */
				828	break;
				829	case 'x':
				830	case 'X':
				831	base = 16;
				832	leading_chars_to_skip = 2; /* 0x */
				833	break;
				834	default: /* shouldn't be needed, but stops a compiler warning */
				835	case 'd':
				836	case 'n':
				837	base = 10;
				838	break;
				839	}
				840
				841	/* The number of prefix chars is the same as the leading
				842	chars to skip */
				843	if (format->alternate)
				844	n_prefix = leading_chars_to_skip;
				845
				846	/* Do the hard part, converting to a string in a given base */
				847	tmp = tostring(value, base);
				848	if (tmp == NULL \|\| PyUnicode_READY(tmp) == -1)
				849	goto done;
				850
				851	inumeric_chars = 0;
				852	n_digits = PyUnicode_GET_LENGTH(tmp);
				853
				854	prefix = inumeric_chars;
				855
				856	/* Is a sign character present in the output? If so, remember it
				857	and skip it */
				858	if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
				859	sign_char = '-';
				860	++prefix;
				861	++leading_chars_to_skip;
				862	}
				863
				864	/* Skip over the leading chars (0x, 0b, etc.) */
				865	n_digits -= leading_chars_to_skip;
				866	inumeric_chars += leading_chars_to_skip;
				867	}
				868
				869	/* Determine the grouping, separator, and decimal point, if any. */
				870	get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
				871	(format->thousands_separators ?
				872	LT_DEFAULT_LOCALE :
				873	LT_NO_LOCALE),
				874	&locale);
				875
				876	/* Calculate how much memory we'll need. */
				877	n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
				878	inumeric_chars + n_digits, n_remainder, 0, &locale, format);
				879
				880	/* Allocate the memory. */
				881	result = PyUnicode_New(n_total, maxchar);
				882	if (!result)
				883	goto done;
				884
				885	/* Populate the memory. */
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	886	err = fill_number(result, 0, &spec,
				887	tmp, inumeric_chars, inumeric_chars + n_digits,
				888	tmp, prefix,
				889	format->fill_char == '\0' ? ' ' : format->fill_char,
				890	&locale, format->type == 'X');
				891	if (err)
				892	Py_CLEAR(result);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	893
				894	done:
				895	Py_XDECREF(tmp);
				896	return result;
				897	}
				898
				899	/************************************************************************/
				900	/********* float formatting *****************************************/
				901	/************************************************************************/
				902
				903	static PyObject*
				904	strtounicode(char *charbuffer, Py_ssize_t len)
				905	{
				906	return PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, charbuffer, len);
				907	}
				908
				909	/* much of this is taken from unicodeobject.c */
				910	static PyObject *
				911	format_float_internal(PyObject *value,
				912	const InternalFormatSpec *format)
				913	{
				914	char buf = NULL; / buffer returned from PyOS_double_to_string */
				915	Py_ssize_t n_digits;
				916	Py_ssize_t n_remainder;
				917	Py_ssize_t n_total;
				918	int has_decimal;
				919	double val;
				920	Py_ssize_t precision = format->precision;
				921	Py_ssize_t default_precision = 6;
				922	Py_UCS4 type = format->type;
				923	int add_pct = 0;
				924	Py_ssize_t index;
				925	NumberFieldWidths spec;
				926	int flags = 0;
				927	PyObject *result = NULL;
				928	int maxchar = 127;
				929	Py_UCS4 sign_char = '\0';
				930	int float_type; /* Used to see if we have a nan, inf, or regular float. */
				931	PyObject *unicode_tmp = NULL;
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	932	int err;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	933
				934	/* Locale settings, either from the actual locale or
				935	from a hard-code pseudo-locale */
				936	LocaleInfo locale;
				937
				938	if (format->alternate)
				939	flags \|= Py_DTSF_ALT;
				940
				941	if (type == '\0') {
				942	/* Omitted type specifier. Behaves in the same way as repr(x)
				943	and str(x) if no precision is given, else like 'g', but with
				944	at least one digit after the decimal point. */
				945	flags \|= Py_DTSF_ADD_DOT_0;
				946	type = 'r';
				947	default_precision = 0;
				948	}
				949
				950	if (type == 'n')
				951	/* 'n' is the same as 'g', except for the locale used to
				952	format the result. We take care of that later. */
				953	type = 'g';
				954
				955	val = PyFloat_AsDouble(value);
				956	if (val == -1.0 && PyErr_Occurred())
				957	goto done;
				958
				959	if (type == '%') {
				960	type = 'f';
				961	val *= 100;
				962	add_pct = 1;
				963	}
				964
				965	if (precision < 0)
				966	precision = default_precision;
				967	else if (type == 'r')
				968	type = 'g';
				969
				970	/* Cast "type", because if we're in unicode we need to pass a
				971	8-bit char. This is safe, because we've restricted what "type"
				972	can be. */
				973	buf = PyOS_double_to_string(val, (char)type, precision, flags,
				974	&float_type);
				975	if (buf == NULL)
				976	goto done;
				977	n_digits = strlen(buf);
				978
				979	if (add_pct) {
				980	/* We know that buf has a trailing zero (since we just called
				981	strlen() on it), and we don't use that fact any more. So we
				982	can just write over the trailing zero. */
				983	buf[n_digits] = '%';
				984	n_digits += 1;
				985	}
				986
				987	/* Since there is no unicode version of PyOS_double_to_string,
				988	just use the 8 bit version and then convert to unicode. */
				989	unicode_tmp = strtounicode(buf, n_digits);
				990	if (unicode_tmp == NULL)
				991	goto done;
				992	index = 0;
				993
				994	/* Is a sign character present in the output? If so, remember it
				995	and skip it */
				996	if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
				997	sign_char = '-';
				998	++index;
				999	--n_digits;
				1000	}
				1001
				1002	/* Determine if we have any "remainder" (after the digits, might include
				1003	decimal or exponent or both (or neither)) */
				1004	parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
				1005
				1006	/* Determine the grouping, separator, and decimal point, if any. */
				1007	get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
				1008	(format->thousands_separators ?
				1009	LT_DEFAULT_LOCALE :
				1010	LT_NO_LOCALE),
				1011	&locale);
				1012
				1013	/* Calculate how much memory we'll need. */
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1014	n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1015	index + n_digits, n_remainder, has_decimal,
				1016	&locale, format);
				1017
				1018	/* Allocate the memory. */
				1019	result = PyUnicode_New(n_total, maxchar);
				1020	if (result == NULL)
				1021	goto done;
				1022
				1023	/* Populate the memory. */
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1024	err = fill_number(result, 0, &spec,
				1025	unicode_tmp, index, index + n_digits,
				1026	NULL, 0,
				1027	format->fill_char == '\0' ? ' ' : format->fill_char,
				1028	&locale, 0);
				1029	if (err)
				1030	Py_CLEAR(result);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1031
				1032	done:
				1033	PyMem_Free(buf);
				1034	Py_DECREF(unicode_tmp);
				1035	return result;
				1036	}
				1037
				1038	/************************************************************************/
				1039	/********* complex formatting ***************************************/
				1040	/************************************************************************/
				1041
				1042	static PyObject *
				1043	format_complex_internal(PyObject *value,
				1044	const InternalFormatSpec *format)
				1045	{
				1046	double re;
				1047	double im;
				1048	char re_buf = NULL; / buffer returned from PyOS_double_to_string */
				1049	char im_buf = NULL; / buffer returned from PyOS_double_to_string */
				1050
				1051	InternalFormatSpec tmp_format = *format;
				1052	Py_ssize_t n_re_digits;
				1053	Py_ssize_t n_im_digits;
				1054	Py_ssize_t n_re_remainder;
				1055	Py_ssize_t n_im_remainder;
				1056	Py_ssize_t n_re_total;
				1057	Py_ssize_t n_im_total;
				1058	int re_has_decimal;
				1059	int im_has_decimal;
				1060	Py_ssize_t precision = format->precision;
				1061	Py_ssize_t default_precision = 6;
				1062	Py_UCS4 type = format->type;
				1063	Py_ssize_t i_re;
				1064	Py_ssize_t i_im;
				1065	NumberFieldWidths re_spec;
				1066	NumberFieldWidths im_spec;
				1067	int flags = 0;
				1068	PyObject *result = NULL;
				1069	int maxchar = 127;
				1070	int rkind;
				1071	void *rdata;
				1072	Py_ssize_t index;
				1073	Py_UCS4 re_sign_char = '\0';
				1074	Py_UCS4 im_sign_char = '\0';
				1075	int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
				1076	int im_float_type;
				1077	int add_parens = 0;
				1078	int skip_re = 0;
				1079	Py_ssize_t lpad;
				1080	Py_ssize_t rpad;
				1081	Py_ssize_t total;
				1082	PyObject *re_unicode_tmp = NULL;
				1083	PyObject *im_unicode_tmp = NULL;
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1084	int err;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1085
				1086	/* Locale settings, either from the actual locale or
				1087	from a hard-code pseudo-locale */
				1088	LocaleInfo locale;
				1089
				1090	/* Zero padding is not allowed. */
				1091	if (format->fill_char == '0') {
				1092	PyErr_SetString(PyExc_ValueError,
				1093	"Zero padding is not allowed in complex format "
				1094	"specifier");
				1095	goto done;
				1096	}
				1097
				1098	/* Neither is '=' alignment . */
				1099	if (format->align == '=') {
				1100	PyErr_SetString(PyExc_ValueError,
				1101	"'=' alignment flag is not allowed in complex format "
				1102	"specifier");
				1103	goto done;
				1104	}
				1105
				1106	re = PyComplex_RealAsDouble(value);
				1107	if (re == -1.0 && PyErr_Occurred())
				1108	goto done;
				1109	im = PyComplex_ImagAsDouble(value);
				1110	if (im == -1.0 && PyErr_Occurred())
				1111	goto done;
				1112
				1113	if (format->alternate)
				1114	flags \|= Py_DTSF_ALT;
				1115
				1116	if (type == '\0') {
				1117	/* Omitted type specifier. Should be like str(self). */
				1118	type = 'r';
				1119	default_precision = 0;
				1120	if (re == 0.0 && copysign(1.0, re) == 1.0)
				1121	skip_re = 1;
				1122	else
				1123	add_parens = 1;
				1124	}
				1125
				1126	if (type == 'n')
				1127	/* 'n' is the same as 'g', except for the locale used to
				1128	format the result. We take care of that later. */
				1129	type = 'g';
				1130
				1131	if (precision < 0)
				1132	precision = default_precision;
				1133	else if (type == 'r')
				1134	type = 'g';
				1135
				1136	/* Cast "type", because if we're in unicode we need to pass a
				1137	8-bit char. This is safe, because we've restricted what "type"
				1138	can be. */
				1139	re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
				1140	&re_float_type);
				1141	if (re_buf == NULL)
				1142	goto done;
				1143	im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
				1144	&im_float_type);
				1145	if (im_buf == NULL)
				1146	goto done;
				1147
				1148	n_re_digits = strlen(re_buf);
				1149	n_im_digits = strlen(im_buf);
				1150
				1151	/* Since there is no unicode version of PyOS_double_to_string,
				1152	just use the 8 bit version and then convert to unicode. */
				1153	re_unicode_tmp = strtounicode(re_buf, n_re_digits);
				1154	if (re_unicode_tmp == NULL)
				1155	goto done;
				1156	i_re = 0;
				1157
				1158	im_unicode_tmp = strtounicode(im_buf, n_im_digits);
				1159	if (im_unicode_tmp == NULL)
				1160	goto done;
				1161	i_im = 0;
				1162
				1163	/* Is a sign character present in the output? If so, remember it
				1164	and skip it */
				1165	if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
				1166	re_sign_char = '-';
				1167	++i_re;
				1168	--n_re_digits;
				1169	}
				1170	if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
				1171	im_sign_char = '-';
				1172	++i_im;
				1173	--n_im_digits;
				1174	}
				1175
				1176	/* Determine if we have any "remainder" (after the digits, might include
				1177	decimal or exponent or both (or neither)) */
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1178	parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1179	&n_re_remainder, &re_has_decimal);
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1180	parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1181	&n_im_remainder, &im_has_decimal);
				1182
				1183	/* Determine the grouping, separator, and decimal point, if any. */
				1184	get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
				1185	(format->thousands_separators ?
				1186	LT_DEFAULT_LOCALE :
				1187	LT_NO_LOCALE),
				1188	&locale);
				1189
				1190	/* Turn off any padding. We'll do it later after we've composed
				1191	the numbers without padding. */
				1192	tmp_format.fill_char = '\0';
				1193	tmp_format.align = '<';
				1194	tmp_format.width = -1;
				1195
				1196	/* Calculate how much memory we'll need. */
				1197	n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
				1198	i_re, i_re + n_re_digits, n_re_remainder,
				1199	re_has_decimal, &locale, &tmp_format);
				1200
				1201	/* Same formatting, but always include a sign, unless the real part is
				1202	* going to be omitted, in which case we use whatever sign convention was
				1203	* requested by the original format. */
				1204	if (!skip_re)
				1205	tmp_format.sign = '+';
				1206	n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
				1207	i_im, i_im + n_im_digits, n_im_remainder,
				1208	im_has_decimal, &locale, &tmp_format);
				1209
				1210	if (skip_re)
				1211	n_re_total = 0;
				1212
				1213	/* Add 1 for the 'j', and optionally 2 for parens. */
				1214	calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
				1215	format->width, format->align, &lpad, &rpad, &total);
				1216
				1217	result = PyUnicode_New(total, maxchar);
				1218	if (result == NULL)
				1219	goto done;
				1220	rkind = PyUnicode_KIND(result);
				1221	rdata = PyUnicode_DATA(result);
				1222
				1223	/* Populate the memory. First, the padding. */
				1224	index = fill_padding(result, 0,
				1225	n_re_total + n_im_total + 1 + add_parens * 2,
				1226	format->fill_char=='\0' ? ' ' : format->fill_char,
				1227	lpad, rpad);
				1228
				1229	if (add_parens)
				1230	PyUnicode_WRITE(rkind, rdata, index++, '(');
				1231
				1232	if (!skip_re) {
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1233	err = fill_number(result, index, &re_spec,
				1234	re_unicode_tmp, i_re, i_re + n_re_digits,
				1235	NULL, 0,
				1236	0,
				1237	&locale, 0);
				1238	if (err) {
				1239	Py_CLEAR(result);
				1240	goto done;
				1241	}
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1242	index += n_re_total;
				1243	}
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1244	err = fill_number(result, index, &im_spec,
				1245	im_unicode_tmp, i_im, i_im + n_im_digits,
				1246	NULL, 0,
				1247	0,
				1248	&locale, 0);
				1249	if (err) {
				1250	Py_CLEAR(result);
				1251	goto done;
				1252	}
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1253	index += n_im_total;
				1254	PyUnicode_WRITE(rkind, rdata, index++, 'j');
				1255
				1256	if (add_parens)
				1257	PyUnicode_WRITE(rkind, rdata, index++, ')');
				1258
				1259	done:
				1260	PyMem_Free(re_buf);
				1261	PyMem_Free(im_buf);
				1262	Py_XDECREF(re_unicode_tmp);
				1263	Py_XDECREF(im_unicode_tmp);
				1264	return result;
				1265	}
				1266
				1267	/************************************************************************/
				1268	/********* built in formatters **************************************/
				1269	/************************************************************************/
				1270	PyObject *
				1271	_PyUnicode_FormatAdvanced(PyObject *obj,
				1272	PyObject *format_spec,
				1273	Py_ssize_t start, Py_ssize_t end)
				1274	{
				1275	InternalFormatSpec format;
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1276	PyObject *result;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1277
				1278	/* check for the special case of zero length format spec, make
				1279	it equivalent to str(obj) */
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1280	if (start == end)
				1281	return PyObject_Str(obj);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1282
				1283	/* parse the format_spec */
				1284	if (!parse_internal_render_format_spec(format_spec, start, end,
				1285	&format, 's', '<'))
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1286	return NULL;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1287
				1288	/* type conversion? */
				1289	switch (format.type) {
				1290	case 's':
				1291	/* no type conversion needed, already a string. do the formatting */
				1292	result = format_string_internal(obj, &format);
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1293	if (result != NULL)
				1294	assert(_PyUnicode_CheckConsistency(result, 1));
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1295	break;
				1296	default:
				1297	/* unknown */
				1298	unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1299	result = NULL;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1300	}
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1301	return result;
				1302	}
				1303
				1304	static PyObject*
				1305	format_int_or_long(PyObject* obj, PyObject* format_spec,
				1306	Py_ssize_t start, Py_ssize_t end,
				1307	IntOrLongToString tostring)
				1308	{
				1309	PyObject *result = NULL;
				1310	PyObject *tmp = NULL;
				1311	InternalFormatSpec format;
				1312
				1313	/* check for the special case of zero length format spec, make
				1314	it equivalent to str(obj) */
				1315	if (start == end) {
				1316	result = PyObject_Str(obj);
				1317	goto done;
				1318	}
				1319
				1320	/* parse the format_spec */
				1321	if (!parse_internal_render_format_spec(format_spec, start, end,
				1322	&format, 'd', '>'))
				1323	goto done;
				1324
				1325	/* type conversion? */
				1326	switch (format.type) {
				1327	case 'b':
				1328	case 'c':
				1329	case 'd':
				1330	case 'o':
				1331	case 'x':
				1332	case 'X':
				1333	case 'n':
				1334	/* no type conversion needed, already an int (or long). do
				1335	the formatting */
				1336	result = format_int_or_long_internal(obj, &format, tostring);
				1337	break;
				1338
				1339	case 'e':
				1340	case 'E':
				1341	case 'f':
				1342	case 'F':
				1343	case 'g':
				1344	case 'G':
				1345	case '%':
				1346	/* convert to float */
				1347	tmp = PyNumber_Float(obj);
				1348	if (tmp == NULL)
				1349	goto done;
				1350	result = format_float_internal(tmp, &format);
				1351	break;
				1352
				1353	default:
				1354	/* unknown */
				1355	unknown_presentation_type(format.type, obj->ob_type->tp_name);
				1356	goto done;
				1357	}
				1358
				1359	done:
				1360	Py_XDECREF(tmp);
				1361	return result;
				1362	}
				1363
				1364	/* Need to define long_format as a function that will convert a long
				1365	to a string. In 3.0, _PyLong_Format has the correct signature. */
				1366	#define long_format _PyLong_Format
				1367
				1368	PyObject *
				1369	_PyLong_FormatAdvanced(PyObject *obj,
				1370	PyObject *format_spec,
				1371	Py_ssize_t start, Py_ssize_t end)
				1372	{
				1373	return format_int_or_long(obj, format_spec, start, end,
				1374	long_format);
				1375	}
				1376
				1377	PyObject *
				1378	_PyFloat_FormatAdvanced(PyObject *obj,
				1379	PyObject *format_spec,
				1380	Py_ssize_t start, Py_ssize_t end)
				1381	{
				1382	PyObject *result = NULL;
				1383	InternalFormatSpec format;
				1384
				1385	/* check for the special case of zero length format spec, make
				1386	it equivalent to str(obj) */
				1387	if (start == end) {
				1388	result = PyObject_Str(obj);
				1389	goto done;
				1390	}
				1391
				1392	/* parse the format_spec */
				1393	if (!parse_internal_render_format_spec(format_spec, start, end,
				1394	&format, '\0', '>'))
				1395	goto done;
				1396
				1397	/* type conversion? */
				1398	switch (format.type) {
				1399	case '\0': /* No format code: like 'g', but with at least one decimal. */
				1400	case 'e':
				1401	case 'E':
				1402	case 'f':
				1403	case 'F':
				1404	case 'g':
				1405	case 'G':
				1406	case 'n':
				1407	case '%':
				1408	/* no conversion, already a float. do the formatting */
				1409	result = format_float_internal(obj, &format);
				1410	break;
				1411
				1412	default:
				1413	/* unknown */
				1414	unknown_presentation_type(format.type, obj->ob_type->tp_name);
				1415	goto done;
				1416	}
				1417
				1418	done:
				1419	return result;
				1420	}
				1421
				1422	PyObject *
				1423	_PyComplex_FormatAdvanced(PyObject *obj,
				1424	PyObject *format_spec,
				1425	Py_ssize_t start, Py_ssize_t end)
				1426	{
				1427	PyObject *result = NULL;
				1428	InternalFormatSpec format;
				1429
				1430	/* check for the special case of zero length format spec, make
				1431	it equivalent to str(obj) */
				1432	if (start == end) {
				1433	result = PyObject_Str(obj);
				1434	goto done;
				1435	}
				1436
				1437	/* parse the format_spec */
				1438	if (!parse_internal_render_format_spec(format_spec, start, end,
				1439	&format, '\0', '>'))
				1440	goto done;
				1441
				1442	/* type conversion? */
				1443	switch (format.type) {
				1444	case '\0': /* No format code: like 'g', but with at least one decimal. */
				1445	case 'e':
				1446	case 'E':
				1447	case 'f':
				1448	case 'F':
				1449	case 'g':
				1450	case 'G':
				1451	case 'n':
				1452	/* no conversion, already a complex. do the formatting */
				1453	result = format_complex_internal(obj, &format);
				1454	break;
				1455
				1456	default:
				1457	/* unknown */
				1458	unknown_presentation_type(format.type, obj->ob_type->tp_name);
				1459	goto done;
				1460	}
				1461
				1462	done:
				1463	return result;
				1464	}