Bug #1334662 / patch #1335972:  int(string, base) wrong answers.

In rare cases of strings specifying true values near sys.maxint,
and oddball bases (not decimal or a power of 2), int(string, base)
could deliver insane answers.  This repairs all such problems, and
also speeds string->int significantly.  On my box, here are %
speedups for decimal strings of various lengths:

length speedup
------ -------
 1       12.4%
 2       15.7%
 3       20.6%
 4       28.1%
 5       33.2%
 6       37.5%
 7       41.9%
 8       46.3%
 9       51.2%
10       19.5%
11       19.9%
12       23.9%
13       23.7%
14       23.3%
15       24.9%
16       25.3%
17       28.3%
18       27.9%
19       35.7%

Note that the difference between 9 and 10 is the difference between
short and long Python ints on a 32-bit box.  The patch doesn't
actually do anything to speed conversion to long:  the speedup is
due to detecting "unsigned long" overflow more quickly.

This is a bugfix candidate, but it's a non-trivial patch and it
would be painful to separate the "bug fix" from the "speed up" parts.
diff --git a/Python/mystrtoul.c b/Python/mystrtoul.c
index 8e60c0c..272f827 100644
--- a/Python/mystrtoul.c
+++ b/Python/mystrtoul.c
@@ -15,6 +15,94 @@
 
 /* strtol and strtoul, renamed to avoid conflicts */
 
+
+#include <ctype.h>
+#ifndef DONT_HAVE_ERRNO_H
+#include <errno.h>
+#endif
+
+/* Static overflow check values for bases 2 through 36.
+ * smallmax[base] is the largest unsigned long i such that
+ * i * base doesn't overflow unsigned long.
+ */
+static unsigned long smallmax[] = {
+	0, /* bases 0 and 1 are invalid */
+	0,
+	ULONG_MAX / 2,
+	ULONG_MAX / 3,
+	ULONG_MAX / 4,
+	ULONG_MAX / 5,
+	ULONG_MAX / 6,
+	ULONG_MAX / 7,
+	ULONG_MAX / 8,
+	ULONG_MAX / 9,
+	ULONG_MAX / 10,
+	ULONG_MAX / 11,
+	ULONG_MAX / 12,
+	ULONG_MAX / 13,
+	ULONG_MAX / 14,
+	ULONG_MAX / 15,
+	ULONG_MAX / 16,
+	ULONG_MAX / 17,
+	ULONG_MAX / 18,
+	ULONG_MAX / 19,
+	ULONG_MAX / 20,
+	ULONG_MAX / 21,
+	ULONG_MAX / 22,
+	ULONG_MAX / 23,
+	ULONG_MAX / 24,
+	ULONG_MAX / 25,
+	ULONG_MAX / 26,
+	ULONG_MAX / 27,
+	ULONG_MAX / 28,
+	ULONG_MAX / 29,
+	ULONG_MAX / 30,
+	ULONG_MAX / 31,
+	ULONG_MAX / 32,
+	ULONG_MAX / 33,
+	ULONG_MAX / 34,
+	ULONG_MAX / 35,
+	ULONG_MAX / 36,
+};
+
+/* maximum digits that can't ever overflow for bases 2 through 36,
+ * calculated by [int(math.floor(math.log(2**32, i))) for i in range(2, 37)].
+ * Note that this is pessimistic if sizeof(long) > 4.
+ */
+static int digitlimit[] = {
+	0,  0, 32, 20, 16, 13, 12, 11, 10, 10,  /*  0 -  9 */
+	9,  9,  8,  8,  8,  8,  8,  7,  7,  7,  /* 10 - 19 */
+	7,  7,  7,  7,  6,  6,  6,  6,  6,  6,  /* 20 - 29 */
+	6,  6,  6,  6,  6,  6,  6};             /* 30 - 36 */
+
+/* char-to-digit conversion for bases 2-36; all non-digits are 37 */
+static int digitlookup[] = {
+	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+	0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  37, 37, 37, 37, 37, 37,
+	37, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+	25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 37, 37, 37, 37,
+	37, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+	25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 37, 37, 37, 37,
+	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
+	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37
+};
+
 /*
 **	strtoul
 **		This is a general purpose routine for converting
@@ -28,98 +116,100 @@
 **		Errors due to bad pointers will probably result in
 **		exceptions - we don't check for them.
 */
-
-#include <ctype.h>
-#ifndef DONT_HAVE_ERRNO_H
-#include <errno.h>
-#endif
-
 unsigned long
 PyOS_strtoul(register char *str, char **ptr, int base)
 {
-    register unsigned long	result;	/* return value of the function */
-    register int		c;	/* current input character */
-    register unsigned long	temp;	/* used in overflow testing */
-    int				ovf;	/* true if overflow occurred */
+	register unsigned long result = 0; /* return value of the function */
+	register int c;	 	/* current input character */
+	register int ovlimit; 	/* required digits to overflow */
 
-    result = 0;
-    ovf = 0;
+	/* skip leading white space */
+	while (*str && isspace(Py_CHARMASK(*str)))
+		++str;
 
-/* catch silly bases */
-    if (base != 0 && (base < 2 || base > 36))
-    {
+	/* check for leading 0 or 0x for auto-base or base 16 */
+	switch (base) {
+		case 0:		/* look for leading 0, 0x or 0X */
+			if (*str == '0') {
+				++str;
+				if (*str == 'x' || *str == 'X') {
+					++str;
+					base = 16;
+				}
+				else
+					base = 8;
+			}
+			else
+				base = 10;
+			break;
+
+		case 16:	/* skip leading 0x or 0X */
+			if (*str == '0') {
+				++str;
+				if (*str == 'x' || *str == 'X')
+					++str;
+			}
+			break;
+	}
+
+	/* catch silly bases */
+	if (base < 2 || base > 36) {
+		if (ptr)
+			*ptr = str;
+		return 0;
+	}
+
+	/* skip leading zeroes */
+	while (*str == '0')
+		++str;
+
+	/* base is guaranteed to be in [2, 36] at this point */
+	ovlimit = digitlimit[base];
+
+	/* do the conversion until non-digit character encountered */
+	while ((c = digitlookup[Py_CHARMASK(*str)]) < base) {
+		if (ovlimit > 0) /* no overflow check required */
+			result = result * base + c;
+		else { /* requires overflow check */
+			register unsigned long temp_result;
+
+			if (ovlimit < 0) /* guaranteed overflow */
+				goto overflowed;
+
+			/* there could be an overflow */
+			/* check overflow just from shifting */
+			if (result > smallmax[base])
+				goto overflowed;
+
+			result *= base;
+
+			/* check overflow from the digit's value */
+			temp_result = result + c;
+			if (temp_result < result)
+				goto overflowed;
+
+			result = temp_result;
+		}
+
+		++str;
+		--ovlimit;
+	}
+
+	/* set pointer to point to the last character scanned */
 	if (ptr)
-	    *ptr = str;
-	return 0;
-    }
+		*ptr = str;
 
-/* skip leading white space */
-    while (*str && isspace(Py_CHARMASK(*str)))
-	str++;
+	return result;
 
-/* check for leading 0 or 0x for auto-base or base 16 */
-    switch (base)
-    {
-    case 0:		/* look for leading 0, 0x or 0X */
-	if (*str == '0')
-	{
-	    str++;
-	    if (*str == 'x' || *str == 'X')
-	    {
-		str++;
-		base = 16;
-	    }
-	    else
-		base = 8;
+overflowed:
+	if (ptr) {
+		/* spool through remaining digit characters */
+		while (digitlookup[Py_CHARMASK(*str)] < base)
+			++str;
+		*ptr = str;
 	}
-	else
-	    base = 10;
-	break;
-
-    case 16:	/* skip leading 0x or 0X */
-	if (*str == '0' && (*(str+1) == 'x' || *(str+1) == 'X'))
-	    str += 2;
-	break;
-    }
-
-/* do the conversion */
-    while ((c = Py_CHARMASK(*str)) != '\0')
-    {
-	if (isdigit(c) && c - '0' < base)
-	    c -= '0';
-	else
-	{
-	    if (isupper(c))
-		c = tolower(c);
-	    if (c >= 'a' && c <= 'z')
-		c -= 'a' - 10;
-	    else	/* non-"digit" character */
-		break;
-	    if (c >= base)	/* non-"digit" character */
-		break;
-	}
-	temp = result;
-	result = result * base + c;
-	if(base == 10) {
-		if(((long)(result - c) / base != (long)temp))	/* overflow */
-			ovf = 1;
-	}
-	else {
-		if ((result - c) / base != temp)	/* overflow */
-			ovf = 1;
-	}
-	str++;
-    }
-
-/* set pointer to point to the last character scanned */
-    if (ptr)
-	*ptr = str;
-    if (ovf)
-    {
-	result = (unsigned long) ~0L;
 	errno = ERANGE;
-    }
-    return result;
+	return (unsigned long)-1;
 }
 
 long
@@ -127,25 +217,25 @@
 {
 	long result;
 	char sign;
-	
+
 	while (*str && isspace(Py_CHARMASK(*str)))
 		str++;
-	
+
 	sign = *str;
 	if (sign == '+' || sign == '-')
 		str++;
-	
+
 	result = (long) PyOS_strtoul(str, ptr, base);
-	
+
 	/* Signal overflow if the result appears negative,
 	   except for the largest negative integer */
 	if (result < 0 && !(sign == '-' && result == -result)) {
 		errno = ERANGE;
 		result = 0x7fffffff;
 	}
-	
+
 	if (sign == '-')
 		result = -result;
-	
+
 	return result;
 }