Blame - Python/formatter_unicode.c - platform/external/python/cpython3

blob: 6deb3cd7fe85f239319539ad4783e6622f649df2 [file] [log] [blame]

Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	1	/* implements the unicode (as opposed to string) version of the
				2	built-in formatters for string, int, float. that is, the versions
				3	of int.__float__, etc., that take and return unicode objects */
				4
				5	#include "Python.h"
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	6	#include <locale.h>
				7
				8	/* Raises an exception about an unknown presentation type for this
				9	* type. */
				10
				11	static void
				12	unknown_presentation_type(Py_UCS4 presentation_type,
				13	const char* type_name)
				14	{
				15	/* %c might be out-of-range, hence the two cases. */
				16	if (presentation_type > 32 && presentation_type < 128)
				17	PyErr_Format(PyExc_ValueError,
				18	"Unknown format code '%c' "
				19	"for object of type '%.200s'",
				20	(char)presentation_type,
				21	type_name);
				22	else
				23	PyErr_Format(PyExc_ValueError,
				24	"Unknown format code '\\x%x' "
				25	"for object of type '%.200s'",
				26	(unsigned int)presentation_type,
				27	type_name);
				28	}
				29
				30	static void
				31	invalid_comma_type(Py_UCS4 presentation_type)
				32	{
				33	if (presentation_type > 32 && presentation_type < 128)
				34	PyErr_Format(PyExc_ValueError,
				35	"Cannot specify ',' with '%c'.",
				36	(char)presentation_type);
				37	else
				38	PyErr_Format(PyExc_ValueError,
				39	"Cannot specify ',' with '\\x%x'.",
				40	(unsigned int)presentation_type);
				41	}
				42
				43	/*
				44	get_integer consumes 0 or more decimal digit characters from an
				45	input string, updates *result with the corresponding positive
				46	integer, and returns the number of digits consumed.
				47
				48	returns -1 on error.
				49	*/
				50	static int
				51	get_integer(PyObject str, Py_ssize_t pos, Py_ssize_t end,
				52	Py_ssize_t *result)
				53	{
Mark Dickinson	47862d4	2011-12-01 15:27:04 +0000	[diff] [blame]	54	Py_ssize_t accumulator, digitval;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	55	int numdigits;
				56	accumulator = numdigits = 0;
				57	for (;;(*pos)++, numdigits++) {
				58	if (*pos >= end)
				59	break;
				60	digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str, *pos));
				61	if (digitval < 0)
				62	break;
				63	/*
Mark Dickinson	47862d4	2011-12-01 15:27:04 +0000	[diff] [blame]	64	Detect possible overflow before it happens:
				65
				66	accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
				67	accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	68	*/
Mark Dickinson	47862d4	2011-12-01 15:27:04 +0000	[diff] [blame]	69	if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	70	PyErr_Format(PyExc_ValueError,
				71	"Too many decimal digits in format string");
				72	return -1;
				73	}
Mark Dickinson	47862d4	2011-12-01 15:27:04 +0000	[diff] [blame]	74	accumulator = accumulator * 10 + digitval;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	75	}
				76	*result = accumulator;
				77	return numdigits;
				78	}
				79
				80	/************************************************************************/
				81	/********* standard format specifier parsing ************************/
				82	/************************************************************************/
				83
				84	/* returns true if this character is a specifier alignment token */
				85	Py_LOCAL_INLINE(int)
				86	is_alignment_token(Py_UCS4 c)
				87	{
				88	switch (c) {
				89	case '<': case '>': case '=': case '^':
				90	return 1;
				91	default:
				92	return 0;
				93	}
				94	}
				95
				96	/* returns true if this character is a sign element */
				97	Py_LOCAL_INLINE(int)
				98	is_sign_element(Py_UCS4 c)
				99	{
				100	switch (c) {
				101	case ' ': case '+': case '-':
				102	return 1;
				103	default:
				104	return 0;
				105	}
				106	}
Eric Smith	8c66326	2007-08-25 02:26:07 +0000	[diff] [blame]	107
Eric Smith	4a7d76d	2008-05-30 18:10:19 +0000	[diff] [blame]	108
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	109	typedef struct {
				110	Py_UCS4 fill_char;
				111	Py_UCS4 align;
				112	int alternate;
				113	Py_UCS4 sign;
				114	Py_ssize_t width;
				115	int thousands_separators;
				116	Py_ssize_t precision;
				117	Py_UCS4 type;
				118	} InternalFormatSpec;
Eric Smith	4a7d76d	2008-05-30 18:10:19 +0000	[diff] [blame]	119
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	120	#if 0
				121	/* Occassionally useful for debugging. Should normally be commented out. */
				122	static void
				123	DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
				124	{
				125	printf("internal format spec: fill_char %d\n", format->fill_char);
				126	printf("internal format spec: align %d\n", format->align);
				127	printf("internal format spec: alternate %d\n", format->alternate);
				128	printf("internal format spec: sign %d\n", format->sign);
				129	printf("internal format spec: width %zd\n", format->width);
				130	printf("internal format spec: thousands_separators %d\n",
				131	format->thousands_separators);
				132	printf("internal format spec: precision %zd\n", format->precision);
				133	printf("internal format spec: type %c\n", format->type);
				134	printf("\n");
				135	}
				136	#endif
				137
				138
				139	/*
				140	ptr points to the start of the format_spec, end points just past its end.
				141	fills in format with the parsed information.
				142	returns 1 on success, 0 on failure.
				143	if failure, sets the exception
				144	*/
				145	static int
				146	parse_internal_render_format_spec(PyObject *format_spec,
				147	Py_ssize_t start, Py_ssize_t end,
				148	InternalFormatSpec *format,
				149	char default_type,
				150	char default_align)
				151	{
				152	Py_ssize_t pos = start;
				153	/* end-pos is used throughout this code to specify the length of
				154	the input string */
				155	#define READ_spec(index) PyUnicode_READ_CHAR(format_spec, index)
				156
				157	Py_ssize_t consumed;
				158	int align_specified = 0;
				159
				160	format->fill_char = '\0';
				161	format->align = default_align;
				162	format->alternate = 0;
				163	format->sign = '\0';
				164	format->width = -1;
				165	format->thousands_separators = 0;
				166	format->precision = -1;
				167	format->type = default_type;
				168
				169	/* If the second char is an alignment token,
				170	then parse the fill char */
				171	if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
				172	format->align = READ_spec(pos+1);
				173	format->fill_char = READ_spec(pos);
				174	align_specified = 1;
				175	pos += 2;
				176	}
				177	else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
				178	format->align = READ_spec(pos);
				179	align_specified = 1;
				180	++pos;
				181	}
				182
				183	/* Parse the various sign options */
				184	if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
				185	format->sign = READ_spec(pos);
				186	++pos;
				187	}
				188
				189	/* If the next character is #, we're in alternate mode. This only
				190	applies to integers. */
				191	if (end-pos >= 1 && READ_spec(pos) == '#') {
				192	format->alternate = 1;
				193	++pos;
				194	}
				195
				196	/* The special case for 0-padding (backwards compat) */
				197	if (format->fill_char == '\0' && end-pos >= 1 && READ_spec(pos) == '0') {
				198	format->fill_char = '0';
				199	if (!align_specified) {
				200	format->align = '=';
				201	}
				202	++pos;
				203	}
				204
				205	consumed = get_integer(format_spec, &pos, end, &format->width);
				206	if (consumed == -1)
				207	/* Overflow error. Exception already set. */
				208	return 0;
				209
				210	/* If consumed is 0, we didn't consume any characters for the
				211	width. In that case, reset the width to -1, because
				212	get_integer() will have set it to zero. -1 is how we record
				213	that the width wasn't specified. */
				214	if (consumed == 0)
				215	format->width = -1;
				216
				217	/* Comma signifies add thousands separators */
				218	if (end-pos && READ_spec(pos) == ',') {
				219	format->thousands_separators = 1;
				220	++pos;
				221	}
				222
				223	/* Parse field precision */
				224	if (end-pos && READ_spec(pos) == '.') {
				225	++pos;
				226
				227	consumed = get_integer(format_spec, &pos, end, &format->precision);
				228	if (consumed == -1)
				229	/* Overflow error. Exception already set. */
				230	return 0;
				231
				232	/* Not having a precision after a dot is an error. */
				233	if (consumed == 0) {
				234	PyErr_Format(PyExc_ValueError,
				235	"Format specifier missing precision");
				236	return 0;
				237	}
				238
				239	}
				240
				241	/* Finally, parse the type field. */
				242
				243	if (end-pos > 1) {
				244	/* More than one char remain, invalid conversion spec. */
				245	PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
				246	return 0;
				247	}
				248
				249	if (end-pos == 1) {
				250	format->type = READ_spec(pos);
				251	++pos;
				252	}
				253
				254	/* Do as much validating as we can, just by looking at the format
				255	specifier. Do not take into account what type of formatting
				256	we're doing (int, float, string). */
				257
				258	if (format->thousands_separators) {
				259	switch (format->type) {
				260	case 'd':
				261	case 'e':
				262	case 'f':
				263	case 'g':
				264	case 'E':
				265	case 'G':
				266	case '%':
				267	case 'F':
				268	case '\0':
				269	/* These are allowed. See PEP 378.*/
				270	break;
				271	default:
				272	invalid_comma_type(format->type);
				273	return 0;
				274	}
				275	}
				276
				277	if (format->fill_char > 127 \|\| format->align > 127 \|\|
				278	format->sign > 127) {
				279	PyErr_SetString(PyExc_ValueError, "fill character too large");
				280	return 0;
				281	}
				282
				283	return 1;
				284	}
				285
				286	/* Calculate the padding needed. */
				287	static void
				288	calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
				289	Py_ssize_t n_lpadding, Py_ssize_t n_rpadding,
				290	Py_ssize_t *n_total)
				291	{
				292	if (width >= 0) {
				293	if (nchars > width)
				294	*n_total = nchars;
				295	else
				296	*n_total = width;
				297	}
				298	else {
				299	/* not specified, use all of the chars and no more */
				300	*n_total = nchars;
				301	}
				302
				303	/* Figure out how much leading space we need, based on the
				304	aligning */
				305	if (align == '>')
				306	n_lpadding = n_total - nchars;
				307	else if (align == '^')
				308	n_lpadding = (n_total - nchars) / 2;
				309	else if (align == '<' \|\| align == '=')
				310	*n_lpadding = 0;
				311	else {
				312	/* We should never have an unspecified alignment. */
				313	*n_lpadding = 0;
				314	assert(0);
				315	}
				316
				317	n_rpadding = n_total - nchars - *n_lpadding;
				318	}
				319
				320	static void
				321	unicode_fill(PyObject *str, Py_ssize_t start, Py_ssize_t end, Py_UCS4 ch)
				322	{
				323	int kind = PyUnicode_KIND(str);
				324	void *data = PyUnicode_DATA(str);
				325	while (start < end)
				326	PyUnicode_WRITE(kind, data, start++, ch);
				327	}
				328
				329	/* Do the padding, and return a pointer to where the caller-supplied
				330	content goes. */
				331	static Py_ssize_t
				332	fill_padding(PyObject *s, Py_ssize_t start, Py_ssize_t nchars,
				333	Py_UCS4 fill_char, Py_ssize_t n_lpadding,
				334	Py_ssize_t n_rpadding)
				335	{
				336	/* Pad on left. */
				337	if (n_lpadding)
				338	unicode_fill(s, start, start + n_lpadding, fill_char);
				339
				340	/* Pad on right. */
				341	if (n_rpadding)
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	342	unicode_fill(s, start + nchars + n_lpadding,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	343	start + nchars + n_lpadding + n_rpadding, fill_char);
				344
				345	/* Pointer to the user content. */
				346	return start + n_lpadding;
				347	}
				348
				349	/************************************************************************/
				350	/********* common routines for numeric formatting *******************/
				351	/************************************************************************/
				352
				353	/* Locale type codes. */
				354	#define LT_CURRENT_LOCALE 0
				355	#define LT_DEFAULT_LOCALE 1
				356	#define LT_NO_LOCALE 2
				357
				358	/* Locale info needed for formatting integers and the part of floats
				359	before and including the decimal. Note that locales only support
				360	8-bit chars, not unicode. */
				361	typedef struct {
				362	char *decimal_point;
				363	char *thousands_sep;
				364	char *grouping;
				365	} LocaleInfo;
				366
				367	/* describes the layout for an integer, see the comment in
				368	calc_number_widths() for details */
				369	typedef struct {
				370	Py_ssize_t n_lpadding;
				371	Py_ssize_t n_prefix;
				372	Py_ssize_t n_spadding;
				373	Py_ssize_t n_rpadding;
				374	char sign;
				375	Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
				376	Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
				377	any grouping chars. */
				378	Py_ssize_t n_decimal; /* 0 if only an integer */
				379	Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
				380	excluding the decimal itself, if
				381	present. */
				382
				383	/* These 2 are not the widths of fields, but are needed by
				384	STRINGLIB_GROUPING. */
				385	Py_ssize_t n_digits; /* The number of digits before a decimal
				386	or exponent. */
				387	Py_ssize_t n_min_width; /* The min_width we used when we computed
				388	the n_grouped_digits width. */
				389	} NumberFieldWidths;
				390
				391
				392	/* Given a number of the form:
				393	digits[remainder]
				394	where ptr points to the start and end points to the end, find where
				395	the integer part ends. This could be a decimal, an exponent, both,
				396	or neither.
				397	If a decimal point is present, set *has_decimal and increment
				398	remainder beyond it.
				399	Results are undefined (but shouldn't crash) for improperly
				400	formatted strings.
				401	*/
				402	static void
				403	parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
				404	Py_ssize_t n_remainder, int has_decimal)
				405	{
				406	Py_ssize_t remainder;
				407
				408	while (pos<end && isdigit(PyUnicode_READ_CHAR(s, pos)))
				409	++pos;
				410	remainder = pos;
				411
				412	/* Does remainder start with a decimal point? */
				413	*has_decimal = pos<end && PyUnicode_READ_CHAR(s, remainder) == '.';
				414
				415	/* Skip the decimal point. */
				416	if (*has_decimal)
				417	remainder++;
				418
				419	*n_remainder = end - remainder;
				420	}
				421
				422	/* not all fields of format are used. for example, precision is
				423	unused. should this take discrete params in order to be more clear
				424	about what it does? or is passing a single format parameter easier
				425	and more efficient enough to justify a little obfuscation? */
				426	static Py_ssize_t
				427	calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
				428	Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
				429	Py_ssize_t n_end, Py_ssize_t n_remainder,
				430	int has_decimal, const LocaleInfo *locale,
				431	const InternalFormatSpec *format)
				432	{
				433	Py_ssize_t n_non_digit_non_padding;
				434	Py_ssize_t n_padding;
				435
				436	spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
				437	spec->n_lpadding = 0;
				438	spec->n_prefix = n_prefix;
				439	spec->n_decimal = has_decimal ? strlen(locale->decimal_point) : 0;
				440	spec->n_remainder = n_remainder;
				441	spec->n_spadding = 0;
				442	spec->n_rpadding = 0;
				443	spec->sign = '\0';
				444	spec->n_sign = 0;
				445
				446	/* the output will look like:
				447	\| \|
				448	\| <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> \|
				449	\| \|
				450
				451	sign is computed from format->sign and the actual
				452	sign of the number
				453
				454	prefix is given (it's for the '0x' prefix)
				455
				456	digits is already known
				457
				458	the total width is either given, or computed from the
				459	actual digits
				460
				461	only one of lpadding, spadding, and rpadding can be non-zero,
				462	and it's calculated from the width and other fields
				463	*/
				464
				465	/* compute the various parts we're going to write */
				466	switch (format->sign) {
				467	case '+':
				468	/* always put a + or - */
				469	spec->n_sign = 1;
				470	spec->sign = (sign_char == '-' ? '-' : '+');
				471	break;
				472	case ' ':
				473	spec->n_sign = 1;
				474	spec->sign = (sign_char == '-' ? '-' : ' ');
				475	break;
				476	default:
				477	/* Not specified, or the default (-) */
				478	if (sign_char == '-') {
				479	spec->n_sign = 1;
				480	spec->sign = '-';
				481	}
				482	}
				483
				484	/* The number of chars used for non-digits and non-padding. */
				485	n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
				486	spec->n_remainder;
				487
				488	/* min_width can go negative, that's okay. format->width == -1 means
				489	we don't care. */
				490	if (format->fill_char == '0' && format->align == '=')
				491	spec->n_min_width = format->width - n_non_digit_non_padding;
				492	else
				493	spec->n_min_width = 0;
				494
				495	if (spec->n_digits == 0)
				496	/* This case only occurs when using 'c' formatting, we need
				497	to special case it because the grouping code always wants
				498	to have at least one character. */
				499	spec->n_grouped_digits = 0;
				500	else
				501	spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
Victor Stinner	c3cec78	2011-10-05 21:24:08 +0200	[diff] [blame]	502	NULL, PyUnicode_1BYTE_KIND, NULL, 0, NULL,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	503	spec->n_digits, spec->n_min_width,
				504	locale->grouping, locale->thousands_sep);
				505
				506	/* Given the desired width and the total of digit and non-digit
				507	space we consume, see if we need any padding. format->width can
				508	be negative (meaning no padding), but this code still works in
				509	that case. */
				510	n_padding = format->width -
				511	(n_non_digit_non_padding + spec->n_grouped_digits);
				512	if (n_padding > 0) {
				513	/* Some padding is needed. Determine if it's left, space, or right. */
				514	switch (format->align) {
				515	case '<':
				516	spec->n_rpadding = n_padding;
				517	break;
				518	case '^':
				519	spec->n_lpadding = n_padding / 2;
				520	spec->n_rpadding = n_padding - spec->n_lpadding;
				521	break;
				522	case '=':
				523	spec->n_spadding = n_padding;
				524	break;
				525	case '>':
				526	spec->n_lpadding = n_padding;
				527	break;
				528	default:
				529	/* Shouldn't get here, but treat it as '>' */
				530	spec->n_lpadding = n_padding;
				531	assert(0);
				532	break;
				533	}
				534	}
				535	return spec->n_lpadding + spec->n_sign + spec->n_prefix +
				536	spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
				537	spec->n_remainder + spec->n_rpadding;
				538	}
				539
				540	/* Fill in the digit parts of a numbers's string representation,
				541	as determined in calc_number_widths().
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	542	Return -1 on error, or 0 on success. */
				543	static int
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	544	fill_number(PyObject out, Py_ssize_t pos, const NumberFieldWidths spec,
				545	PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	546	PyObject *prefix, Py_ssize_t p_start,
				547	Py_UCS4 fill_char,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	548	LocaleInfo *locale, int toupper)
				549	{
				550	/* Used to keep track of digits, decimal, and remainder. */
				551	Py_ssize_t d_pos = d_start;
				552	unsigned int kind = PyUnicode_KIND(out);
				553	void *data = PyUnicode_DATA(out);
				554
				555	#ifndef NDEBUG
				556	Py_ssize_t r;
				557	#endif
				558
				559	if (spec->n_lpadding) {
				560	unicode_fill(out, pos, pos + spec->n_lpadding, fill_char);
				561	pos += spec->n_lpadding;
				562	}
				563	if (spec->n_sign == 1) {
				564	PyUnicode_WRITE(kind, data, pos++, spec->sign);
				565	}
				566	if (spec->n_prefix) {
Victor Stinner	fd85c3a	2011-09-28 21:53:49 +0200	[diff] [blame]	567	if (PyUnicode_CopyCharacters(out, pos,
				568	prefix, p_start,
				569	spec->n_prefix) < 0)
				570	return -1;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	571	if (toupper) {
				572	Py_ssize_t t;
				573	/* XXX if the upper-case prefix is wider than the target
				574	buffer, the caller should have allocated a wider string,
				575	but currently doesn't. */
				576	for (t = 0; t < spec->n_prefix; ++t)
				577	PyUnicode_WRITE(kind, data, pos + t,
				578	Py_UNICODE_TOUPPER(
				579	PyUnicode_READ(kind, data, pos + t)));
				580	}
				581	pos += spec->n_prefix;
				582	}
				583	if (spec->n_spadding) {
				584	unicode_fill(out, pos, pos + spec->n_spadding, fill_char);
				585	pos += spec->n_spadding;
				586	}
				587
				588	/* Only for type 'c' special case, it has no digits. */
				589	if (spec->n_digits != 0) {
				590	/* Fill the digits with InsertThousandsGrouping. */
Victor Stinner	dba2dee	2011-09-28 21:50:42 +0200	[diff] [blame]	591	char *pdigits;
				592	if (PyUnicode_READY(digits))
				593	return -1;
				594	pdigits = PyUnicode_DATA(digits);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	595	if (PyUnicode_KIND(digits) < kind) {
				596	pdigits = _PyUnicode_AsKind(digits, kind);
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	597	if (pdigits == NULL)
				598	return -1;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	599	}
				600	#ifndef NDEBUG
				601	r =
				602	#endif
				603	_PyUnicode_InsertThousandsGrouping(
Victor Stinner	c3cec78	2011-10-05 21:24:08 +0200	[diff] [blame]	604	out, kind,
Martin v. Löwis	c47adb0	2011-10-07 20:55:35 +0200	[diff] [blame]	605	(char)data + kind pos,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	606	spec->n_grouped_digits,
Martin v. Löwis	c47adb0	2011-10-07 20:55:35 +0200	[diff] [blame]	607	pdigits + kind * d_pos,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	608	spec->n_digits, spec->n_min_width,
				609	locale->grouping, locale->thousands_sep);
				610	#ifndef NDEBUG
				611	assert(r == spec->n_grouped_digits);
				612	#endif
				613	if (PyUnicode_KIND(digits) < kind)
				614	PyMem_Free(pdigits);
				615	d_pos += spec->n_digits;
				616	}
				617	if (toupper) {
				618	Py_ssize_t t;
				619	for (t = 0; t < spec->n_grouped_digits; ++t)
				620	PyUnicode_WRITE(kind, data, pos + t,
				621	Py_UNICODE_TOUPPER(
				622	PyUnicode_READ(kind, data, pos + t)));
				623	}
				624	pos += spec->n_grouped_digits;
				625
				626	if (spec->n_decimal) {
				627	Py_ssize_t t;
				628	for (t = 0; t < spec->n_decimal; ++t)
				629	PyUnicode_WRITE(kind, data, pos + t,
				630	locale->decimal_point[t]);
				631	pos += spec->n_decimal;
				632	d_pos += 1;
				633	}
				634
				635	if (spec->n_remainder) {
Victor Stinner	fd85c3a	2011-09-28 21:53:49 +0200	[diff] [blame]	636	if (PyUnicode_CopyCharacters(out, pos, digits, d_pos, spec->n_remainder) < 0)
				637	return -1;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	638	pos += spec->n_remainder;
				639	d_pos += spec->n_remainder;
				640	}
				641
				642	if (spec->n_rpadding) {
				643	unicode_fill(out, pos, pos + spec->n_rpadding, fill_char);
				644	pos += spec->n_rpadding;
				645	}
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	646	return 0;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	647	}
				648
				649	static char no_grouping[1] = {CHAR_MAX};
				650
				651	/* Find the decimal point character(s?), thousands_separator(s?), and
				652	grouping description, either for the current locale if type is
				653	LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
				654	none if LT_NO_LOCALE. */
				655	static void
				656	get_locale_info(int type, LocaleInfo *locale_info)
				657	{
				658	switch (type) {
				659	case LT_CURRENT_LOCALE: {
				660	struct lconv *locale_data = localeconv();
				661	locale_info->decimal_point = locale_data->decimal_point;
				662	locale_info->thousands_sep = locale_data->thousands_sep;
				663	locale_info->grouping = locale_data->grouping;
				664	break;
				665	}
				666	case LT_DEFAULT_LOCALE:
				667	locale_info->decimal_point = ".";
				668	locale_info->thousands_sep = ",";
				669	locale_info->grouping = "\3"; /* Group every 3 characters. The
				670	(implicit) trailing 0 means repeat
				671	infinitely. */
				672	break;
				673	case LT_NO_LOCALE:
				674	locale_info->decimal_point = ".";
				675	locale_info->thousands_sep = "";
				676	locale_info->grouping = no_grouping;
				677	break;
				678	default:
				679	assert(0);
				680	}
				681	}
				682
				683	/************************************************************************/
				684	/********* string formatting ****************************************/
				685	/************************************************************************/
				686
				687	static PyObject *
				688	format_string_internal(PyObject value, const InternalFormatSpec format)
				689	{
				690	Py_ssize_t lpad;
				691	Py_ssize_t rpad;
				692	Py_ssize_t total;
				693	Py_ssize_t pos;
Victor Stinner	c4f281e	2011-10-11 22:11:42 +0200	[diff] [blame]	694	Py_ssize_t len = PyUnicode_GET_LENGTH(value);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	695	PyObject *result = NULL;
				696	int maxchar = 127;
				697
				698	/* sign is not allowed on strings */
				699	if (format->sign != '\0') {
				700	PyErr_SetString(PyExc_ValueError,
				701	"Sign not allowed in string format specifier");
				702	goto done;
				703	}
				704
				705	/* alternate is not allowed on strings */
				706	if (format->alternate) {
				707	PyErr_SetString(PyExc_ValueError,
				708	"Alternate form (#) not allowed in string format "
				709	"specifier");
				710	goto done;
				711	}
				712
				713	/* '=' alignment not allowed on strings */
				714	if (format->align == '=') {
				715	PyErr_SetString(PyExc_ValueError,
				716	"'=' alignment not allowed "
				717	"in string format specifier");
				718	goto done;
				719	}
				720
				721	/* if precision is specified, output no more that format.precision
				722	characters */
				723	if (format->precision >= 0 && len >= format->precision) {
				724	len = format->precision;
				725	}
				726
				727	calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
				728
				729	/* allocate the resulting string */
				730	result = PyUnicode_New(total, maxchar);
				731	if (result == NULL)
				732	goto done;
				733
				734	/* Write into that space. First the padding. */
				735	pos = fill_padding(result, 0, len,
				736	format->fill_char=='\0'?' ':format->fill_char,
				737	lpad, rpad);
				738
				739	/* Then the source string. */
Victor Stinner	fd85c3a	2011-09-28 21:53:49 +0200	[diff] [blame]	740	if (PyUnicode_CopyCharacters(result, pos, value, 0, len) < 0)
				741	Py_CLEAR(result);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	742
				743	done:
				744	return result;
				745	}
				746
				747
				748	/************************************************************************/
				749	/********* long formatting ******************************************/
				750	/************************************************************************/
				751
				752	typedef PyObject*
				753	(IntOrLongToString)(PyObject value, int base);
				754
				755	static PyObject *
				756	format_int_or_long_internal(PyObject value, const InternalFormatSpec format,
				757	IntOrLongToString tostring)
				758	{
				759	PyObject *result = NULL;
				760	int maxchar = 127;
				761	PyObject *tmp = NULL;
				762	Py_ssize_t inumeric_chars;
				763	Py_UCS4 sign_char = '\0';
				764	Py_ssize_t n_digits; /* count of digits need from the computed
				765	string */
				766	Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
				767	produces non-digits */
				768	Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
				769	Py_ssize_t n_total;
				770	Py_ssize_t prefix;
				771	NumberFieldWidths spec;
				772	long x;
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	773	int err;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	774
				775	/* Locale settings, either from the actual locale or
				776	from a hard-code pseudo-locale */
				777	LocaleInfo locale;
				778
				779	/* no precision allowed on integers */
				780	if (format->precision != -1) {
				781	PyErr_SetString(PyExc_ValueError,
				782	"Precision not allowed in integer format specifier");
				783	goto done;
				784	}
				785
				786	/* special case for character formatting */
				787	if (format->type == 'c') {
				788	/* error to specify a sign */
				789	if (format->sign != '\0') {
				790	PyErr_SetString(PyExc_ValueError,
				791	"Sign not allowed with integer"
				792	" format specifier 'c'");
				793	goto done;
				794	}
				795
				796	/* taken from unicodeobject.c formatchar() */
				797	/* Integer input truncated to a character */
				798	/* XXX: won't work for int */
				799	x = PyLong_AsLong(value);
				800	if (x == -1 && PyErr_Occurred())
				801	goto done;
				802	if (x < 0 \|\| x > 0x10ffff) {
				803	PyErr_SetString(PyExc_OverflowError,
				804	"%c arg not in range(0x110000) "
				805	"(wide Python build)");
				806	goto done;
				807	}
				808	tmp = PyUnicode_FromOrdinal(x);
				809	inumeric_chars = 0;
				810	n_digits = 1;
				811	if (x > maxchar)
				812	maxchar = x;
				813
				814	/* As a sort-of hack, we tell calc_number_widths that we only
				815	have "remainder" characters. calc_number_widths thinks
				816	these are characters that don't get formatted, only copied
				817	into the output string. We do this for 'c' formatting,
				818	because the characters are likely to be non-digits. */
				819	n_remainder = 1;
				820	}
				821	else {
				822	int base;
				823	int leading_chars_to_skip = 0; /* Number of characters added by
				824	PyNumber_ToBase that we want to
				825	skip over. */
				826
				827	/* Compute the base and how many characters will be added by
				828	PyNumber_ToBase */
				829	switch (format->type) {
				830	case 'b':
				831	base = 2;
				832	leading_chars_to_skip = 2; /* 0b */
				833	break;
				834	case 'o':
				835	base = 8;
				836	leading_chars_to_skip = 2; /* 0o */
				837	break;
				838	case 'x':
				839	case 'X':
				840	base = 16;
				841	leading_chars_to_skip = 2; /* 0x */
				842	break;
				843	default: /* shouldn't be needed, but stops a compiler warning */
				844	case 'd':
				845	case 'n':
				846	base = 10;
				847	break;
				848	}
				849
				850	/* The number of prefix chars is the same as the leading
				851	chars to skip */
				852	if (format->alternate)
				853	n_prefix = leading_chars_to_skip;
				854
				855	/* Do the hard part, converting to a string in a given base */
				856	tmp = tostring(value, base);
				857	if (tmp == NULL \|\| PyUnicode_READY(tmp) == -1)
				858	goto done;
				859
				860	inumeric_chars = 0;
				861	n_digits = PyUnicode_GET_LENGTH(tmp);
				862
				863	prefix = inumeric_chars;
				864
				865	/* Is a sign character present in the output? If so, remember it
				866	and skip it */
				867	if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
				868	sign_char = '-';
				869	++prefix;
				870	++leading_chars_to_skip;
				871	}
				872
				873	/* Skip over the leading chars (0x, 0b, etc.) */
				874	n_digits -= leading_chars_to_skip;
				875	inumeric_chars += leading_chars_to_skip;
				876	}
				877
				878	/* Determine the grouping, separator, and decimal point, if any. */
				879	get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
				880	(format->thousands_separators ?
				881	LT_DEFAULT_LOCALE :
				882	LT_NO_LOCALE),
				883	&locale);
				884
				885	/* Calculate how much memory we'll need. */
				886	n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
				887	inumeric_chars + n_digits, n_remainder, 0, &locale, format);
				888
				889	/* Allocate the memory. */
				890	result = PyUnicode_New(n_total, maxchar);
				891	if (!result)
				892	goto done;
				893
				894	/* Populate the memory. */
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	895	err = fill_number(result, 0, &spec,
				896	tmp, inumeric_chars, inumeric_chars + n_digits,
				897	tmp, prefix,
				898	format->fill_char == '\0' ? ' ' : format->fill_char,
				899	&locale, format->type == 'X');
				900	if (err)
				901	Py_CLEAR(result);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	902
				903	done:
				904	Py_XDECREF(tmp);
				905	return result;
				906	}
				907
				908	/************************************************************************/
				909	/********* float formatting *****************************************/
				910	/************************************************************************/
				911
				912	static PyObject*
				913	strtounicode(char *charbuffer, Py_ssize_t len)
				914	{
				915	return PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, charbuffer, len);
				916	}
				917
				918	/* much of this is taken from unicodeobject.c */
				919	static PyObject *
				920	format_float_internal(PyObject *value,
				921	const InternalFormatSpec *format)
				922	{
				923	char buf = NULL; / buffer returned from PyOS_double_to_string */
				924	Py_ssize_t n_digits;
				925	Py_ssize_t n_remainder;
				926	Py_ssize_t n_total;
				927	int has_decimal;
				928	double val;
				929	Py_ssize_t precision = format->precision;
				930	Py_ssize_t default_precision = 6;
				931	Py_UCS4 type = format->type;
				932	int add_pct = 0;
				933	Py_ssize_t index;
				934	NumberFieldWidths spec;
				935	int flags = 0;
				936	PyObject *result = NULL;
				937	int maxchar = 127;
				938	Py_UCS4 sign_char = '\0';
				939	int float_type; /* Used to see if we have a nan, inf, or regular float. */
				940	PyObject *unicode_tmp = NULL;
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	941	int err;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	942
				943	/* Locale settings, either from the actual locale or
				944	from a hard-code pseudo-locale */
				945	LocaleInfo locale;
				946
				947	if (format->alternate)
				948	flags \|= Py_DTSF_ALT;
				949
				950	if (type == '\0') {
				951	/* Omitted type specifier. Behaves in the same way as repr(x)
				952	and str(x) if no precision is given, else like 'g', but with
				953	at least one digit after the decimal point. */
				954	flags \|= Py_DTSF_ADD_DOT_0;
				955	type = 'r';
				956	default_precision = 0;
				957	}
				958
				959	if (type == 'n')
				960	/* 'n' is the same as 'g', except for the locale used to
				961	format the result. We take care of that later. */
				962	type = 'g';
				963
				964	val = PyFloat_AsDouble(value);
				965	if (val == -1.0 && PyErr_Occurred())
				966	goto done;
				967
				968	if (type == '%') {
				969	type = 'f';
				970	val *= 100;
				971	add_pct = 1;
				972	}
				973
				974	if (precision < 0)
				975	precision = default_precision;
				976	else if (type == 'r')
				977	type = 'g';
				978
				979	/* Cast "type", because if we're in unicode we need to pass a
				980	8-bit char. This is safe, because we've restricted what "type"
				981	can be. */
				982	buf = PyOS_double_to_string(val, (char)type, precision, flags,
				983	&float_type);
				984	if (buf == NULL)
				985	goto done;
				986	n_digits = strlen(buf);
				987
				988	if (add_pct) {
				989	/* We know that buf has a trailing zero (since we just called
				990	strlen() on it), and we don't use that fact any more. So we
				991	can just write over the trailing zero. */
				992	buf[n_digits] = '%';
				993	n_digits += 1;
				994	}
				995
				996	/* Since there is no unicode version of PyOS_double_to_string,
				997	just use the 8 bit version and then convert to unicode. */
				998	unicode_tmp = strtounicode(buf, n_digits);
				999	if (unicode_tmp == NULL)
				1000	goto done;
				1001	index = 0;
				1002
				1003	/* Is a sign character present in the output? If so, remember it
				1004	and skip it */
				1005	if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
				1006	sign_char = '-';
				1007	++index;
				1008	--n_digits;
				1009	}
				1010
				1011	/* Determine if we have any "remainder" (after the digits, might include
				1012	decimal or exponent or both (or neither)) */
				1013	parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
				1014
				1015	/* Determine the grouping, separator, and decimal point, if any. */
				1016	get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
				1017	(format->thousands_separators ?
				1018	LT_DEFAULT_LOCALE :
				1019	LT_NO_LOCALE),
				1020	&locale);
				1021
				1022	/* Calculate how much memory we'll need. */
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1023	n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1024	index + n_digits, n_remainder, has_decimal,
				1025	&locale, format);
				1026
				1027	/* Allocate the memory. */
				1028	result = PyUnicode_New(n_total, maxchar);
				1029	if (result == NULL)
				1030	goto done;
				1031
				1032	/* Populate the memory. */
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1033	err = fill_number(result, 0, &spec,
				1034	unicode_tmp, index, index + n_digits,
				1035	NULL, 0,
				1036	format->fill_char == '\0' ? ' ' : format->fill_char,
				1037	&locale, 0);
				1038	if (err)
				1039	Py_CLEAR(result);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1040
				1041	done:
				1042	PyMem_Free(buf);
				1043	Py_DECREF(unicode_tmp);
				1044	return result;
				1045	}
				1046
				1047	/************************************************************************/
				1048	/********* complex formatting ***************************************/
				1049	/************************************************************************/
				1050
				1051	static PyObject *
				1052	format_complex_internal(PyObject *value,
				1053	const InternalFormatSpec *format)
				1054	{
				1055	double re;
				1056	double im;
				1057	char re_buf = NULL; / buffer returned from PyOS_double_to_string */
				1058	char im_buf = NULL; / buffer returned from PyOS_double_to_string */
				1059
				1060	InternalFormatSpec tmp_format = *format;
				1061	Py_ssize_t n_re_digits;
				1062	Py_ssize_t n_im_digits;
				1063	Py_ssize_t n_re_remainder;
				1064	Py_ssize_t n_im_remainder;
				1065	Py_ssize_t n_re_total;
				1066	Py_ssize_t n_im_total;
				1067	int re_has_decimal;
				1068	int im_has_decimal;
				1069	Py_ssize_t precision = format->precision;
				1070	Py_ssize_t default_precision = 6;
				1071	Py_UCS4 type = format->type;
				1072	Py_ssize_t i_re;
				1073	Py_ssize_t i_im;
				1074	NumberFieldWidths re_spec;
				1075	NumberFieldWidths im_spec;
				1076	int flags = 0;
				1077	PyObject *result = NULL;
				1078	int maxchar = 127;
				1079	int rkind;
				1080	void *rdata;
				1081	Py_ssize_t index;
				1082	Py_UCS4 re_sign_char = '\0';
				1083	Py_UCS4 im_sign_char = '\0';
				1084	int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
				1085	int im_float_type;
				1086	int add_parens = 0;
				1087	int skip_re = 0;
				1088	Py_ssize_t lpad;
				1089	Py_ssize_t rpad;
				1090	Py_ssize_t total;
				1091	PyObject *re_unicode_tmp = NULL;
				1092	PyObject *im_unicode_tmp = NULL;
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1093	int err;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1094
				1095	/* Locale settings, either from the actual locale or
				1096	from a hard-code pseudo-locale */
				1097	LocaleInfo locale;
				1098
				1099	/* Zero padding is not allowed. */
				1100	if (format->fill_char == '0') {
				1101	PyErr_SetString(PyExc_ValueError,
				1102	"Zero padding is not allowed in complex format "
				1103	"specifier");
				1104	goto done;
				1105	}
				1106
				1107	/* Neither is '=' alignment . */
				1108	if (format->align == '=') {
				1109	PyErr_SetString(PyExc_ValueError,
				1110	"'=' alignment flag is not allowed in complex format "
				1111	"specifier");
				1112	goto done;
				1113	}
				1114
				1115	re = PyComplex_RealAsDouble(value);
				1116	if (re == -1.0 && PyErr_Occurred())
				1117	goto done;
				1118	im = PyComplex_ImagAsDouble(value);
				1119	if (im == -1.0 && PyErr_Occurred())
				1120	goto done;
				1121
				1122	if (format->alternate)
				1123	flags \|= Py_DTSF_ALT;
				1124
				1125	if (type == '\0') {
				1126	/* Omitted type specifier. Should be like str(self). */
				1127	type = 'r';
				1128	default_precision = 0;
				1129	if (re == 0.0 && copysign(1.0, re) == 1.0)
				1130	skip_re = 1;
				1131	else
				1132	add_parens = 1;
				1133	}
				1134
				1135	if (type == 'n')
				1136	/* 'n' is the same as 'g', except for the locale used to
				1137	format the result. We take care of that later. */
				1138	type = 'g';
				1139
				1140	if (precision < 0)
				1141	precision = default_precision;
				1142	else if (type == 'r')
				1143	type = 'g';
				1144
				1145	/* Cast "type", because if we're in unicode we need to pass a
				1146	8-bit char. This is safe, because we've restricted what "type"
				1147	can be. */
				1148	re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
				1149	&re_float_type);
				1150	if (re_buf == NULL)
				1151	goto done;
				1152	im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
				1153	&im_float_type);
				1154	if (im_buf == NULL)
				1155	goto done;
				1156
				1157	n_re_digits = strlen(re_buf);
				1158	n_im_digits = strlen(im_buf);
				1159
				1160	/* Since there is no unicode version of PyOS_double_to_string,
				1161	just use the 8 bit version and then convert to unicode. */
				1162	re_unicode_tmp = strtounicode(re_buf, n_re_digits);
				1163	if (re_unicode_tmp == NULL)
				1164	goto done;
				1165	i_re = 0;
				1166
				1167	im_unicode_tmp = strtounicode(im_buf, n_im_digits);
				1168	if (im_unicode_tmp == NULL)
				1169	goto done;
				1170	i_im = 0;
				1171
				1172	/* Is a sign character present in the output? If so, remember it
				1173	and skip it */
				1174	if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
				1175	re_sign_char = '-';
				1176	++i_re;
				1177	--n_re_digits;
				1178	}
				1179	if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
				1180	im_sign_char = '-';
				1181	++i_im;
				1182	--n_im_digits;
				1183	}
				1184
				1185	/* Determine if we have any "remainder" (after the digits, might include
				1186	decimal or exponent or both (or neither)) */
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1187	parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1188	&n_re_remainder, &re_has_decimal);
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1189	parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1190	&n_im_remainder, &im_has_decimal);
				1191
				1192	/* Determine the grouping, separator, and decimal point, if any. */
				1193	get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
				1194	(format->thousands_separators ?
				1195	LT_DEFAULT_LOCALE :
				1196	LT_NO_LOCALE),
				1197	&locale);
				1198
				1199	/* Turn off any padding. We'll do it later after we've composed
				1200	the numbers without padding. */
				1201	tmp_format.fill_char = '\0';
				1202	tmp_format.align = '<';
				1203	tmp_format.width = -1;
				1204
				1205	/* Calculate how much memory we'll need. */
				1206	n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
				1207	i_re, i_re + n_re_digits, n_re_remainder,
				1208	re_has_decimal, &locale, &tmp_format);
				1209
				1210	/* Same formatting, but always include a sign, unless the real part is
				1211	* going to be omitted, in which case we use whatever sign convention was
				1212	* requested by the original format. */
				1213	if (!skip_re)
				1214	tmp_format.sign = '+';
				1215	n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
				1216	i_im, i_im + n_im_digits, n_im_remainder,
				1217	im_has_decimal, &locale, &tmp_format);
				1218
				1219	if (skip_re)
				1220	n_re_total = 0;
				1221
				1222	/* Add 1 for the 'j', and optionally 2 for parens. */
				1223	calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
				1224	format->width, format->align, &lpad, &rpad, &total);
				1225
				1226	result = PyUnicode_New(total, maxchar);
				1227	if (result == NULL)
				1228	goto done;
				1229	rkind = PyUnicode_KIND(result);
				1230	rdata = PyUnicode_DATA(result);
				1231
				1232	/* Populate the memory. First, the padding. */
				1233	index = fill_padding(result, 0,
				1234	n_re_total + n_im_total + 1 + add_parens * 2,
				1235	format->fill_char=='\0' ? ' ' : format->fill_char,
				1236	lpad, rpad);
				1237
				1238	if (add_parens)
				1239	PyUnicode_WRITE(rkind, rdata, index++, '(');
				1240
				1241	if (!skip_re) {
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1242	err = fill_number(result, index, &re_spec,
				1243	re_unicode_tmp, i_re, i_re + n_re_digits,
				1244	NULL, 0,
				1245	0,
				1246	&locale, 0);
				1247	if (err) {
				1248	Py_CLEAR(result);
				1249	goto done;
				1250	}
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1251	index += n_re_total;
				1252	}
Victor Stinner	afbaa20	2011-09-28 21:50:16 +0200	[diff] [blame]	1253	err = fill_number(result, index, &im_spec,
				1254	im_unicode_tmp, i_im, i_im + n_im_digits,
				1255	NULL, 0,
				1256	0,
				1257	&locale, 0);
				1258	if (err) {
				1259	Py_CLEAR(result);
				1260	goto done;
				1261	}
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1262	index += n_im_total;
				1263	PyUnicode_WRITE(rkind, rdata, index++, 'j');
				1264
				1265	if (add_parens)
				1266	PyUnicode_WRITE(rkind, rdata, index++, ')');
				1267
				1268	done:
				1269	PyMem_Free(re_buf);
				1270	PyMem_Free(im_buf);
				1271	Py_XDECREF(re_unicode_tmp);
				1272	Py_XDECREF(im_unicode_tmp);
				1273	return result;
				1274	}
				1275
				1276	/************************************************************************/
				1277	/********* built in formatters **************************************/
				1278	/************************************************************************/
				1279	PyObject *
				1280	_PyUnicode_FormatAdvanced(PyObject *obj,
				1281	PyObject *format_spec,
				1282	Py_ssize_t start, Py_ssize_t end)
				1283	{
				1284	InternalFormatSpec format;
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1285	PyObject *result;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1286
				1287	/* check for the special case of zero length format spec, make
				1288	it equivalent to str(obj) */
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1289	if (start == end)
				1290	return PyObject_Str(obj);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1291
				1292	/* parse the format_spec */
				1293	if (!parse_internal_render_format_spec(format_spec, start, end,
				1294	&format, 's', '<'))
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1295	return NULL;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1296
				1297	/* type conversion? */
				1298	switch (format.type) {
				1299	case 's':
				1300	/* no type conversion needed, already a string. do the formatting */
				1301	result = format_string_internal(obj, &format);
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1302	if (result != NULL)
				1303	assert(_PyUnicode_CheckConsistency(result, 1));
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1304	break;
				1305	default:
				1306	/* unknown */
				1307	unknown_presentation_type(format.type, obj->ob_type->tp_name);
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	1308	result = NULL;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1309	}
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1310	return result;
				1311	}
				1312
				1313	static PyObject*
				1314	format_int_or_long(PyObject* obj, PyObject* format_spec,
				1315	Py_ssize_t start, Py_ssize_t end,
				1316	IntOrLongToString tostring)
				1317	{
				1318	PyObject *result = NULL;
				1319	PyObject *tmp = NULL;
				1320	InternalFormatSpec format;
				1321
				1322	/* check for the special case of zero length format spec, make
				1323	it equivalent to str(obj) */
				1324	if (start == end) {
				1325	result = PyObject_Str(obj);
				1326	goto done;
				1327	}
				1328
				1329	/* parse the format_spec */
				1330	if (!parse_internal_render_format_spec(format_spec, start, end,
				1331	&format, 'd', '>'))
				1332	goto done;
				1333
				1334	/* type conversion? */
				1335	switch (format.type) {
				1336	case 'b':
				1337	case 'c':
				1338	case 'd':
				1339	case 'o':
				1340	case 'x':
				1341	case 'X':
				1342	case 'n':
				1343	/* no type conversion needed, already an int (or long). do
				1344	the formatting */
				1345	result = format_int_or_long_internal(obj, &format, tostring);
				1346	break;
				1347
				1348	case 'e':
				1349	case 'E':
				1350	case 'f':
				1351	case 'F':
				1352	case 'g':
				1353	case 'G':
				1354	case '%':
				1355	/* convert to float */
				1356	tmp = PyNumber_Float(obj);
				1357	if (tmp == NULL)
				1358	goto done;
				1359	result = format_float_internal(tmp, &format);
				1360	break;
				1361
				1362	default:
				1363	/* unknown */
				1364	unknown_presentation_type(format.type, obj->ob_type->tp_name);
				1365	goto done;
				1366	}
				1367
				1368	done:
				1369	Py_XDECREF(tmp);
				1370	return result;
				1371	}
				1372
				1373	/* Need to define long_format as a function that will convert a long
				1374	to a string. In 3.0, _PyLong_Format has the correct signature. */
				1375	#define long_format _PyLong_Format
				1376
				1377	PyObject *
				1378	_PyLong_FormatAdvanced(PyObject *obj,
				1379	PyObject *format_spec,
				1380	Py_ssize_t start, Py_ssize_t end)
				1381	{
				1382	return format_int_or_long(obj, format_spec, start, end,
				1383	long_format);
				1384	}
				1385
				1386	PyObject *
				1387	_PyFloat_FormatAdvanced(PyObject *obj,
				1388	PyObject *format_spec,
				1389	Py_ssize_t start, Py_ssize_t end)
				1390	{
				1391	PyObject *result = NULL;
				1392	InternalFormatSpec format;
				1393
				1394	/* check for the special case of zero length format spec, make
				1395	it equivalent to str(obj) */
				1396	if (start == end) {
				1397	result = PyObject_Str(obj);
				1398	goto done;
				1399	}
				1400
				1401	/* parse the format_spec */
				1402	if (!parse_internal_render_format_spec(format_spec, start, end,
				1403	&format, '\0', '>'))
				1404	goto done;
				1405
				1406	/* type conversion? */
				1407	switch (format.type) {
				1408	case '\0': /* No format code: like 'g', but with at least one decimal. */
				1409	case 'e':
				1410	case 'E':
				1411	case 'f':
				1412	case 'F':
				1413	case 'g':
				1414	case 'G':
				1415	case 'n':
				1416	case '%':
				1417	/* no conversion, already a float. do the formatting */
				1418	result = format_float_internal(obj, &format);
				1419	break;
				1420
				1421	default:
				1422	/* unknown */
				1423	unknown_presentation_type(format.type, obj->ob_type->tp_name);
				1424	goto done;
				1425	}
				1426
				1427	done:
				1428	return result;
				1429	}
				1430
				1431	PyObject *
				1432	_PyComplex_FormatAdvanced(PyObject *obj,
				1433	PyObject *format_spec,
				1434	Py_ssize_t start, Py_ssize_t end)
				1435	{
				1436	PyObject *result = NULL;
				1437	InternalFormatSpec format;
				1438
				1439	/* check for the special case of zero length format spec, make
				1440	it equivalent to str(obj) */
				1441	if (start == end) {
				1442	result = PyObject_Str(obj);
				1443	goto done;
				1444	}
				1445
				1446	/* parse the format_spec */
				1447	if (!parse_internal_render_format_spec(format_spec, start, end,
				1448	&format, '\0', '>'))
				1449	goto done;
				1450
				1451	/* type conversion? */
				1452	switch (format.type) {
				1453	case '\0': /* No format code: like 'g', but with at least one decimal. */
				1454	case 'e':
				1455	case 'E':
				1456	case 'f':
				1457	case 'F':
				1458	case 'g':
				1459	case 'G':
				1460	case 'n':
				1461	/* no conversion, already a complex. do the formatting */
				1462	result = format_complex_internal(obj, &format);
				1463	break;
				1464
				1465	default:
				1466	/* unknown */
				1467	unknown_presentation_type(format.type, obj->ob_type->tp_name);
				1468	goto done;
				1469	}
				1470
				1471	done:
				1472	return result;
				1473	}