Backport of some of the work in r71665 to trunk. This reworks much of
int, long, and float __format__(), and it keeps their implementation
in sync with py3k.

Also added PyOS_double_to_string. This is the "fallback" version
that's also available in trunk, and should be kept in sync with that
code. I'll add an issue to document PyOS_double_to_string in the C
API.

There are many internal cleanups. Externally visible changes include:

- Implement PEP 378, Format Specifier for Thousands Separator, for
  floats, ints, and longs.

- Issue #5515: 'n' formatting for ints, longs, and floats handles
  leading zero formatting poorly.

- Issue #5772: For float.__format__, don't add a trailing ".0" if
  we're using no type code and we have an exponent.
diff --git a/Objects/stringlib/localeutil.h b/Objects/stringlib/localeutil.h
index 1105609..f548133 100644
--- a/Objects/stringlib/localeutil.h
+++ b/Objects/stringlib/localeutil.h
@@ -5,126 +5,208 @@
 
 #include <locale.h>
 
+#define MAX(x, y) ((x) < (y) ? (y) : (x))
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+
+typedef struct {
+    const char *grouping;
+    char previous;
+    Py_ssize_t i; /* Where we're currently pointing in grouping. */
+} GroupGenerator;
+
+static void
+_GroupGenerator_init(GroupGenerator *self, const char *grouping)
+{
+    self->grouping = grouping;
+    self->i = 0;
+    self->previous = 0;
+}
+
+/* Returns the next grouping, or 0 to signify end. */
+static Py_ssize_t
+_GroupGenerator_next(GroupGenerator *self)
+{
+    /* Note that we don't really do much error checking here. If a
+       grouping string contains just CHAR_MAX, for example, then just
+       terminate the generator. That shouldn't happen, but at least we
+       fail gracefully. */
+    switch (self->grouping[self->i]) {
+    case 0:
+        return self->previous;
+    case CHAR_MAX:
+        /* Stop the generator. */
+        return 0;
+    default: {
+        char ch = self->grouping[self->i];
+        self->previous = ch;
+        self->i++;
+        return (Py_ssize_t)ch;
+    }
+    }
+}
+
+/* Fill in some digits, leading zeros, and thousands separator. All
+   are optional, depending on when we're called. */
+static void
+fill(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end,
+     Py_ssize_t n_chars, Py_ssize_t n_zeros, const char* thousands_sep,
+     Py_ssize_t thousands_sep_len)
+{
+#if STRINGLIB_IS_UNICODE
+    Py_ssize_t i;
+#endif
+
+    if (thousands_sep) {
+        *buffer_end -= thousands_sep_len;
+
+        /* Copy the thousands_sep chars into the buffer. */
+#if STRINGLIB_IS_UNICODE
+        /* Convert from the char's of the thousands_sep from
+           the locale into unicode. */
+        for (i = 0; i < thousands_sep_len; ++i)
+            (*buffer_end)[i] = thousands_sep[i];
+#else
+        /* No conversion, just memcpy the thousands_sep. */
+        memcpy(*buffer_end, thousands_sep, thousands_sep_len);
+#endif
+    }
+
+    *buffer_end -= n_chars;
+    *digits_end -= n_chars;
+    memcpy(*buffer_end, *digits_end, n_chars * sizeof(STRINGLIB_CHAR));
+
+    *buffer_end -= n_zeros;
+    STRINGLIB_FILL(*buffer_end, '0', n_zeros);
+}
+
 /**
  * _Py_InsertThousandsGrouping:
  * @buffer: A pointer to the start of a string.
- * @n_buffer: The length of the string.
+ * @n_buffer: Number of characters in @buffer.
+ * @digits: A pointer to the digits we're reading from. If count
+ *          is non-NULL, this is unused.
  * @n_digits: The number of digits in the string, in which we want
  *            to put the grouping chars.
- * @buf_size: The maximum size of the buffer pointed to by buffer.
- * @count: If non-NULL, points to a variable that will receive the
- *         number of characters we need to insert (and no formatting
- *         will actually occur).
- * @append_zero_char: If non-zero, put a trailing zero at the end of
- *         of the resulting string, if and only if we modified the
- *         string.
+ * @min_width: The minimum width of the digits in the output string.
+ *             Output will be zero-padded on the left to fill.
+ * @grouping: see definition in localeconv().
+ * @thousands_sep: see definition in localeconv().
  *
- * Inserts thousand grouping characters (as defined in the current
- *  locale) into the string between buffer and buffer+n_digits.  If
- *  count is non-NULL, don't do any formatting, just count the number
- *  of characters to insert.  This is used by the caller to
- *  appropriately resize the buffer, if needed.  If count is non-NULL,
- *  buffer can be NULL (it is not dereferenced at all in that case).
+ * There are 2 modes: counting and filling. If @buffer is NULL,
+ *  we are in counting mode, else filling mode.
+ * If counting, the required buffer size is returned.
+ * If filling, we know the buffer will be large enough, so we don't
+ *  need to pass in the buffer size.
+ * Inserts thousand grouping characters (as defined by grouping and
+ *  thousands_sep) into the string between buffer and buffer+n_digits.
  *
  * Return value: 0 on error, else 1.  Note that no error can occur if
  *  count is non-NULL.
  *
  * This name won't be used, the includer of this file should define
  *  it to be the actual function name, based on unicode or string.
+ *
+ * As closely as possible, this code mimics the logic in decimal.py's
+    _insert_thousands_sep().
  **/
-int
+Py_ssize_t
 _Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer,
-			    Py_ssize_t n_buffer,
-			    Py_ssize_t n_digits,
-			    Py_ssize_t buf_size,
-			    Py_ssize_t *count,
-			    int append_zero_char)
+                            Py_ssize_t n_buffer,
+                            STRINGLIB_CHAR *digits,
+                            Py_ssize_t n_digits,
+                            Py_ssize_t min_width,
+                            const char *grouping,
+                            const char *thousands_sep)
 {
-	struct lconv *locale_data = localeconv();
-	const char *grouping = locale_data->grouping;
-	const char *thousands_sep = locale_data->thousands_sep;
-	Py_ssize_t thousands_sep_len = strlen(thousands_sep);
-	STRINGLIB_CHAR *pend = NULL; /* current end of buffer */
-	STRINGLIB_CHAR *pmax = NULL; /* max of buffer */
-	char current_grouping;
-	Py_ssize_t remaining = n_digits; /* Number of chars remaining to
-					    be looked at */
+    Py_ssize_t count = 0;
+    Py_ssize_t n_zeros;
+    int loop_broken = 0;
+    int use_separator = 0; /* First time through, don't append the
+                              separator. They only go between
+                              groups. */
+    STRINGLIB_CHAR *buffer_end = NULL;
+    STRINGLIB_CHAR *digits_end = NULL;
+    Py_ssize_t l;
+    Py_ssize_t n_chars;
+    Py_ssize_t thousands_sep_len = strlen(thousands_sep);
+    Py_ssize_t remaining = n_digits; /* Number of chars remaining to
+                                        be looked at */
+    /* A generator that returns all of the grouping widths, until it
+       returns 0. */
+    GroupGenerator groupgen;
+    _GroupGenerator_init(&groupgen, grouping);
 
-	/* Initialize the character count, if we're just counting. */
-	if (count)
-		*count = 0;
-	else {
-		/* We're not just counting, we're modifying buffer */
-		pend = buffer + n_buffer;
-		pmax = buffer + buf_size;
-	}
+    if (buffer) {
+        buffer_end = buffer + n_buffer;
+        digits_end = digits + n_digits;
+    }
 
-	/* Starting at the end and working right-to-left, keep track of
-	   what grouping needs to be added and insert that. */
-	current_grouping = *grouping++;
+    while ((l = _GroupGenerator_next(&groupgen)) > 0) {
+        l = MIN(l, MAX(MAX(remaining, min_width), 1));
+        n_zeros = MAX(0, l - remaining);
+        n_chars = MAX(0, MIN(remaining, l));
 
-	/* If the first character is 0, perform no grouping at all. */
-	if (current_grouping == 0)
-		return 1;
+        /* Use n_zero zero's and n_chars chars */
 
-	while (remaining > current_grouping) {
-		/* Always leave buffer and pend valid at the end of this
-		   loop, since we might leave with a return statement. */
+        /* Count only, don't do anything. */
+        count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
 
-		remaining -= current_grouping;
-		if (count) {
-			/* We're only counting, not touching the memory. */
-			*count += thousands_sep_len;
-		}
-		else {
-			/* Do the formatting. */
+        if (buffer) {
+            /* Copy into the output buffer. */
+            fill(&digits_end, &buffer_end, n_chars, n_zeros,
+                 use_separator ? thousands_sep : NULL, thousands_sep_len);
+        }
 
-			STRINGLIB_CHAR *plast = buffer + remaining;
+        /* Use a separator next time. */
+        use_separator = 1;
 
-			/* Is there room to insert thousands_sep_len chars? */
-			if (pmax - pend < thousands_sep_len)
-				/* No room. */
-				return 0;
+        remaining -= n_chars;
+        min_width -= l;
 
-			/* Move the rest of the string down. */
-			memmove(plast + thousands_sep_len,
-				plast,
-				(pend - plast) * sizeof(STRINGLIB_CHAR));
-			/* Copy the thousands_sep chars into the buffer. */
-#if STRINGLIB_IS_UNICODE
-			/* Convert from the char's of the thousands_sep from
-			   the locale into unicode. */
-			{
-				Py_ssize_t i;
-				for (i = 0; i < thousands_sep_len; ++i)
-					plast[i] = thousands_sep[i];
-			}
-#else
-			/* No conversion, just memcpy the thousands_sep. */
-			memcpy(plast, thousands_sep, thousands_sep_len);
-#endif
-		}
+        if (remaining <= 0 && min_width <= 0) {
+            loop_broken = 1;
+            break;
+        }
+        min_width -= thousands_sep_len;
+    }
+    if (!loop_broken) {
+        /* We left the loop without using a break statement. */
 
-		/* Adjust end pointer. */
-		pend += thousands_sep_len;
+        l = MAX(MAX(remaining, min_width), 1);
+        n_zeros = MAX(0, l - remaining);
+        n_chars = MAX(0, MIN(remaining, l));
 
-		/* Move to the next grouping character, unless we're
-		   repeating (which is designated by a grouping of 0). */
-		if (*grouping != 0) {
-			current_grouping = *grouping++;
-			if (current_grouping == CHAR_MAX)
-				/* We're done. */
-				break;
-		}
-	}
-	if (append_zero_char) {
-		/* Append a zero character to mark the end of the string,
-		   if there's room. */
-		if (pend - (buffer + remaining) < 1)
-			/* No room, error. */
-			return 0;
-		*pend = 0;
-	}
-	return 1;
+        /* Use n_zero zero's and n_chars chars */
+        count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
+        if (buffer) {
+            /* Copy into the output buffer. */
+            fill(&digits_end, &buffer_end, n_chars, n_zeros,
+                 use_separator ? thousands_sep : NULL, thousands_sep_len);
+        }
+    }
+    return count;
+}
+
+/**
+ * _Py_InsertThousandsGroupingLocale:
+ * @buffer: A pointer to the start of a string.
+ * @n_digits: The number of digits in the string, in which we want
+ *            to put the grouping chars.
+ *
+ * Reads thee current locale and calls _Py_InsertThousandsGrouping().
+ **/
+Py_ssize_t
+_Py_InsertThousandsGroupingLocale(STRINGLIB_CHAR *buffer,
+                                  Py_ssize_t n_buffer,
+                                  STRINGLIB_CHAR *digits,
+                                  Py_ssize_t n_digits,
+                                  Py_ssize_t min_width)
+{
+        struct lconv *locale_data = localeconv();
+        const char *grouping = locale_data->grouping;
+        const char *thousands_sep = locale_data->thousands_sep;
+
+        return _Py_InsertThousandsGrouping(buffer, n_buffer, digits, n_digits,
+                                           min_width, grouping, thousands_sep);
 }
 #endif /* STRINGLIB_LOCALEUTIL_H */