divrem1 & long_format:  found a clean way to factor divrem1 so that
long_format can reuse a scratch area for its repeated divisions (instead
of malloc/free for every digit produced); speeds str(long)/repr(long).
diff --git a/Objects/longobject.c b/Objects/longobject.c
index b9cc924..ad13b07 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -15,7 +15,7 @@
 static PyLongObject *long_normalize(PyLongObject *);
 static PyLongObject *mul1(PyLongObject *, wdigit);
 static PyLongObject *muladd1(PyLongObject *, wdigit, wdigit);
-static PyLongObject *divrem1(PyLongObject *, wdigit, digit *);
+static PyLongObject *divrem1(PyLongObject *, digit, digit *);
 static PyObject *long_format(PyObject *aa, int base, int addL);
 
 static int ticker;	/* XXX Could be shared with ceval? */
@@ -707,28 +707,44 @@
 	return long_normalize(z);
 }
 
+/* Divide long pin, w/ size digits, by non-zero digit n, storing quotient
+   in pout, and returning the remainder.  pin and pout point at the LSD.
+   It's OK for pin == pout on entry, which saves oodles of mallocs/frees in
+   long_format, but that should be done with great care since longs are
+   immutable. */
+
+static digit
+inplace_divrem1(digit *pout, digit *pin, int size, digit n)
+{
+	twodigits rem = 0;
+
+	assert(n > 0 && n <= MASK);
+	pin += size;
+	pout += size;
+	while (--size >= 0) {
+		digit hi;
+		rem = (rem << SHIFT) + *--pin;
+		*--pout = hi = (digit)(rem / n);
+		rem -= hi * n;
+	}
+	return (digit)rem;
+}
+
 /* Divide a long integer by a digit, returning both the quotient
    (as function result) and the remainder (through *prem).
    The sign of a is ignored; n should not be zero. */
 
 static PyLongObject *
-divrem1(PyLongObject *a, wdigit n, digit *prem)
+divrem1(PyLongObject *a, digit n, digit *prem)
 {
-	int size = ABS(a->ob_size);
+	const int size = ABS(a->ob_size);
 	PyLongObject *z;
-	int i;
-	twodigits rem = 0;
 	
 	assert(n > 0 && n <= MASK);
 	z = _PyLong_New(size);
 	if (z == NULL)
 		return NULL;
-	for (i = size; --i >= 0; ) {
-		rem = (rem << SHIFT) + a->ob_digit[i];
-		z->ob_digit[i] = (digit) (rem/n);
-		rem %= n;
-	}
-	*prem = (digit) rem;
+	*prem = inplace_divrem1(z->ob_digit, a->ob_digit, size, n);
 	return long_normalize(z);
 }
 
@@ -742,7 +758,7 @@
 	register PyLongObject *a = (PyLongObject *)aa;
 	PyStringObject *str;
 	int i;
-	int size_a = ABS(a->ob_size);
+	const int size_a = ABS(a->ob_size);
 	char *p;
 	int bits;
 	char sign = '\0';
@@ -779,7 +795,7 @@
 		twodigits temp = a->ob_digit[0];
 		int bitsleft = SHIFT;
 		int rem;
-		int last = abs(a->ob_size);
+		int last = size_a;
 		int basebits = 1;
 		i = base;
 		while ((i >>= 1) > 1)
@@ -814,6 +830,10 @@
 		/* Not 0, and base not a power of 2.  Divide repeatedly by
 		   base, but for speed use the highest power of base that
 		   fits in a digit. */
+		int size = size_a;
+		digit *pin = a->ob_digit;
+		PyLongObject *scratch;
+		/* powbasw <- largest power of base that fits in a digit. */
 		digit powbase = base;  /* powbase == base ** power */
 		int power = 1;
 		for (;;) {
@@ -823,23 +843,29 @@
 			powbase = (digit)newpow;
 			++power;
 		}
-		
-		Py_INCREF(a);
+
+		/* Get a scratch area for repeated division. */
+		scratch = _PyLong_New(size);
+		if (scratch == NULL) {
+			Py_DECREF(str);
+			return NULL;
+		}
+
+		/* Repeatedly divide by powbase. */
 		do {
 			int ntostore = power;
-			digit rem;
-			PyLongObject *temp = divrem1(a, powbase, &rem);
-			Py_DECREF(a);
-			if (temp == NULL) {
-				Py_DECREF(str);
-				return NULL;
-			}
-			a = temp;
+			digit rem = inplace_divrem1(scratch->ob_digit,
+						     pin, size, powbase);
+			pin = scratch->ob_digit; /* no need to use a again */
+			if (pin[size - 1] == 0)
+				--size;
 			SIGCHECK({
-				Py_DECREF(a);
+				Py_DECREF(scratch);
 				Py_DECREF(str);
 				return NULL;
 			})
+
+			/* Break rem into digits. */
 			assert(ntostore > 0);
 			do {
 				digit nextrem = (digit)(rem / base);
@@ -851,10 +877,10 @@
 				--ntostore;
 				/* Termination is a bit delicate:  must not
 				   store leading zeroes, so must get out if
-				   a and rem are both 0 now. */
-			} while (ntostore && (a->ob_size || rem));
-		} while (a->ob_size != 0);
-		Py_DECREF(a);
+				   remaining quotient and rem are both 0. */
+			} while (ntostore && (size || rem));
+		} while (size != 0);
+		Py_DECREF(scratch);
 	}
 
 	if (base == 8) {