Added q/Q standard (x-platform 8-byte ints) mode in struct module.
This completes the q/Q project.

longobject.c _PyLong_AsByteArray:  The original code had a gross bug:
the most-significant Python digit doesn't necessarily have SHIFT
significant bits, and you really need to count how many copies of the sign
bit it has else spurious overflow errors result.

test_struct.py:  This now does exhaustive std q/Q testing at, and on both
sides of, all relevant power-of-2 boundaries, both positive and negative.

NEWS:  Added brief dict news while I was at it.
diff --git a/Doc/lib/libstruct.tex b/Doc/lib/libstruct.tex
index 9a1942d..f8056a2 100644
--- a/Doc/lib/libstruct.tex
+++ b/Doc/lib/libstruct.tex
@@ -72,7 +72,8 @@
 \item[(1)]
   The \character{q} and \character{Q} conversion codes are available in
   native mode only if the platform C compiler supports C \ctype{long long},
-  or, on Windows, \ctype{__int64}.
+  or, on Windows, \ctype{__int64}.  They're always available in standard
+  modes.
 \end{description}
 
 
@@ -100,8 +101,8 @@
 is truncated.  If the string is too short, padding is used to ensure
 that exactly enough bytes are used to satisfy the count.
 
-For the \character{I} and \character{L} format characters, the return
-value is a Python long integer.
+For the \character{I}, \character{L}, \character{q} and \character{Q}
+format characters, the return value is a Python long integer.
 
 For the \character{P} format character, the return value is a Python
 integer or long integer, depending on the size needed to hold a
@@ -139,10 +140,12 @@
 order.
 
 Standard size and alignment are as follows: no alignment is required
-for any type (so you have to use pad bytes); \ctype{short} is 2 bytes;
-\ctype{int} and \ctype{long} are 4 bytes.  \ctype{float} and
-\ctype{double} are 32-bit and 64-bit IEEE floating point numbers,
-respectively.
+for any type (so you have to use pad bytes);
+\ctype{short} is 2 bytes;
+\ctype{int} and \ctype{long} are 4 bytes;
+\ctype{long long} (\ctype{__int64} on Windows) is 8 bytes;
+\ctype{float} and \ctype{double} are 32-bit and 64-bit
+IEEE floating point numbers, respectively.
 
 Note the difference between \character{@} and \character{=}: both use
 native byte order, but the size and alignment of the latter is
diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py
index c977913..e6c8bb2 100644
--- a/Lib/test/test_struct.py
+++ b/Lib/test/test_struct.py
@@ -12,6 +12,16 @@
             func.__name__, args)
 ##      pdb.set_trace()
 
+def any_err(func, *args):
+    try:
+        apply(func, args)
+    except (struct.error, OverflowError, TypeError):
+        pass
+    else:
+        raise TestFailed, "%s%s did not raise error" % (
+            func.__name__, args)
+##      pdb.set_trace()
+
 simple_err(struct.calcsize, 'Z')
 
 sz = struct.calcsize('i')
@@ -113,7 +123,8 @@
             raise TestFailed, "unpack(%s, %s) -> (%s,) # expected (%s,)" % (
                 `fmt`, `res`, `rev`, `arg`)
 
-# Some q/Q sanity checks.
+###########################################################################
+# q/Q tests.
 
 has_native_qQ = 1
 try:
@@ -124,18 +135,22 @@
 if verbose:
     print "Platform has native q/Q?", has_native_qQ and "Yes." or "No."
 
-simple_err(struct.pack, "Q", -1)   # can't pack -1 as unsigned regardless
+any_err(struct.pack, "Q", -1)   # can't pack -1 as unsigned regardless
 simple_err(struct.pack, "q", "a")  # can't pack string as 'q' regardless
 simple_err(struct.pack, "Q", "a")  # ditto, but 'Q'
 
+def string_reverse(s):
+    chars = list(s)
+    chars.reverse()
+    return "".join(chars)
+
 def bigendian_to_native(value):
     if isbigendian:
         return value
-    chars = list(value)
-    chars.reverse()
-    return "".join(chars)
+    else:
+        return string_reverse(value)
 
-if has_native_qQ:
+def test_native_qQ():
     bytes = struct.calcsize('q')
     # The expected values here are in big-endian format, primarily because
     # I'm on a little-endian machine and so this is the clearest way (for
@@ -156,3 +171,147 @@
         verify(retrieved == input,
                "%r-unpack of %r gave %r, not %r" %
                     (format, got, retrieved, input))
+
+if has_native_qQ:
+    test_native_qQ()
+
+# Standard q/Q (8 bytes; should work on all platforms).
+
+MIN_Q, MAX_Q = 0, 2L**64 - 1
+MIN_q, MAX_q = -(2L**63), 2L**63 - 1
+
+import binascii
+def test_one_qQ(x, pack=struct.pack,
+                   unpack=struct.unpack,
+                   unhexlify=binascii.unhexlify):
+    if verbose:
+        print "trying std q/Q on", x, "==", hex(x)
+
+    # Try 'q'.
+    if MIN_q <= x <= MAX_q:
+        # Try '>q'.
+        expected = long(x)
+        if x < 0:
+            expected += 1L << 64
+            assert expected > 0
+        expected = hex(expected)[2:-1] # chop "0x" and trailing 'L'
+        if len(expected) & 1:
+            expected = "0" + expected
+        expected = unhexlify(expected)
+        expected = "\x00" * (8 - len(expected)) + expected
+
+        # >q pack work?
+        got = pack(">q", x)
+        verify(got == expected,
+               "'>q'-pack of %r gave %r, not %r" %
+                (x, got, expected))
+
+        # >q unpack work?
+        retrieved = unpack(">q", got)[0]
+        verify(x == retrieved,
+               "'>q'-unpack of %r gave %r, not %r" %
+                (got, retrieved, x))
+
+        # Adding any byte should cause a "too big" error.
+        any_err(unpack, ">q", '\x01' + got)
+
+        # Try '<q'.
+        expected = string_reverse(expected)
+
+        # <q pack work?
+        got = pack("<q", x)
+        verify(got == expected,
+               "'<q'-pack of %r gave %r, not %r" %
+                (x, got, expected))
+
+        # <q unpack work?
+        retrieved = unpack("<q", got)[0]
+        verify(x == retrieved,
+               "'<q'-unpack of %r gave %r, not %r" %
+                (got, retrieved, x))
+
+        # Adding any byte should cause a "too big" error.
+        any_err(unpack, "<q", '\x01' + got)
+
+    else:
+        # x is out of q's range -- verify pack realizes that.
+        any_err(pack, '>q', x)
+        any_err(pack, '<q', x)
+
+    # Much the same for 'Q'.
+    if MIN_Q <= x <= MAX_Q:
+        # Try '>Q'.
+        expected = long(x)
+        expected = hex(expected)[2:-1] # chop "0x" and trailing 'L'
+        if len(expected) & 1:
+            expected = "0" + expected
+        expected = unhexlify(expected)
+        expected = "\x00" * (8 - len(expected)) + expected
+
+        # >Q pack work?
+        got = pack(">Q", x)
+        verify(got == expected,
+               "'>Q'-pack of %r gave %r, not %r" %
+                (x, got, expected))
+
+        # >Q unpack work?
+        retrieved = unpack(">Q", got)[0]
+        verify(x == retrieved,
+               "'>Q'-unpack of %r gave %r, not %r" %
+                (got, retrieved, x))
+
+        # Adding any byte should cause a "too big" error.
+        any_err(unpack, ">Q", '\x01' + got)
+
+        # Try '<Q'.
+        expected = string_reverse(expected)
+
+        # <Q pack work?
+        got = pack("<Q", x)
+        verify(got == expected,
+               "'<Q'-pack of %r gave %r, not %r" %
+                (x, got, expected))
+
+        # <Q unpack work?
+        retrieved = unpack("<Q", got)[0]
+        verify(x == retrieved,
+               "'<Q'-unpack of %r gave %r, not %r" %
+                (got, retrieved, x))
+
+        # Adding any byte should cause a "too big" error.
+        any_err(unpack, "<Q", '\x01' + got)
+
+    else:
+        # x is out of Q's range -- verify pack realizes that.
+        any_err(pack, '>Q', x)
+        any_err(pack, '<Q', x)
+
+def test_std_qQ():
+    from random import randrange
+
+    # Create all interesting powers of 2.
+    values = []
+    for exp in range(70):
+        values.append(1L << exp)
+
+    # Add some random 64-bit values.
+    for i in range(50):
+        val = 0L
+        for j in range(8):
+            val = (val << 8) | randrange(256)
+        values.append(val)
+
+    # Try all those, and their negations, and +-1 from them.  Note
+    # that this tests all power-of-2 boundaries in range, and a few out
+    # of range, plus +-(2**n +- 1).
+    for base in values:
+        for val in -base, base:
+            for incr in -1, 0, 1:
+                x = val + incr
+                try:
+                    x = int(x)
+                except OverflowError:
+                    pass
+                test_one_qQ(x)
+
+test_std_qQ()
diff --git a/Misc/NEWS b/Misc/NEWS
index 18a87e9..16850aa 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -84,6 +84,9 @@
   sortdict(dict) function for a simple way to display a dict in sorted
   order.
 
+- Many other small changes to dicts were made, resulting in faster
+  operation along the most common code paths.
+
 - Dictionary objects now support the "in" operator: "x in dict" means
   the same as dict.has_key(x).
 
@@ -119,7 +122,7 @@
 
 - Collisions in dicts are resolved via a new approach, which can help
   dramatically in bad cases.  For example, looking up every key in a dict
-  d with d.keys() = [i << 16 for i in range(20000)] is approximately 500x
+  d with d.keys() == [i << 16 for i in range(20000)] is approximately 500x
   faster now.  Thanks to Christian Tismer for pointing out the cause and
   the nature of an effective cure (last December! better late than never).
 
@@ -145,8 +148,8 @@
   native mode, these can be used only when the platform C compiler supports
   these types (when HAVE_LONG_LONG is #define'd by the Python config
   process), and then they inherit the sizes and alignments of the C types.
-  XXX TODO In standard mode, 'q' and 'Q' are supported on all platforms, and
-  XXX TODO are 8-byte integral types.
+  In standard mode, 'q' and 'Q' are supported on all platforms, and are
+  8-byte integral types.
 
 Tests
 
diff --git a/Modules/structmodule.c b/Modules/structmodule.c
index 9b79978..4a8886f 100644
--- a/Modules/structmodule.c
+++ b/Modules/structmodule.c
@@ -80,6 +80,34 @@
 #pragma options align=reset
 #endif
 
+/* Helper to get a PyLongObject by hook or by crook.  Caller should decref. */
+
+static PyObject *
+get_pylong(PyObject *v)
+{
+	PyNumberMethods *m;
+
+	assert(v != NULL);
+	if (PyInt_Check(v))
+		return PyLong_FromLong(PyInt_AS_LONG(v));
+	if (PyLong_Check(v)) {
+		Py_INCREF(v);
+		return v;
+	}
+	m = v->ob_type->tp_as_number;
+	if (m != NULL && m->nb_long != NULL) {
+		v = m->nb_long(v);
+		if (v == NULL)
+			return NULL;
+		if (PyLong_Check(v))
+			return v;
+		Py_DECREF(v);
+	}
+	PyErr_SetString(StructError,
+			"cannot convert argument to long");
+	return NULL;
+}
+
 /* Helper routine to get a Python integer and raise the appropriate error
    if it isn't one */
 
@@ -123,33 +151,13 @@
 get_longlong(PyObject *v, LONG_LONG *p)
 {
 	LONG_LONG x;
-	int v_needs_decref = 0;
 
-	if (PyInt_Check(v)) {
-		x = (LONG_LONG)PyInt_AS_LONG(v);
-		*p = x;
-		return 0;
-	}
-	if (!PyLong_Check(v)) {
-		PyNumberMethods *m = v->ob_type->tp_as_number;
-		if (m != NULL && m->nb_long != NULL) {
-			v = m->nb_long(v);
-			if (v == NULL)
-				return -1;
-			v_needs_decref = 1;
-		}
-		if (!PyLong_Check(v)) {
-			PyErr_SetString(StructError,
-					"cannot convert argument to long");
-			if (v_needs_decref)
-				Py_DECREF(v);
-			return -1;
-		}
-	}
+	v = get_pylong(v);
+	if (v == NULL)
+		return -1;
 	assert(PyLong_Check(v));
 	x = PyLong_AsLongLong(v);
-	if (v_needs_decref)
-		Py_DECREF(v);
+	Py_DECREF(v);
 	if (x == (LONG_LONG)-1 && PyErr_Occurred())
 		return -1;
 	*p = x;
@@ -162,39 +170,13 @@
 get_ulonglong(PyObject *v, unsigned LONG_LONG *p)
 {
 	unsigned LONG_LONG x;
-	int v_needs_decref = 0;
 
-	if (PyInt_Check(v)) {
-		long i = PyInt_AS_LONG(v);
-		if (i < 0) {
-			PyErr_SetString(StructError, "can't convert negative "
-					"int to unsigned");
-			return -1;
-		}
-		x = (unsigned LONG_LONG)i;
-		*p = x;
-		return 0;
-	}
-	if (!PyLong_Check(v)) {
-		PyNumberMethods *m = v->ob_type->tp_as_number;
-		if (m != NULL && m->nb_long != NULL) {
-			v = m->nb_long(v);
-			if (v == NULL)
-				return -1;
-			v_needs_decref = 1;
-		}
-		if (!PyLong_Check(v)) {
-			PyErr_SetString(StructError,
-					"cannot convert argument to long");
-			if (v_needs_decref)
-				Py_DECREF(v);
-			return -1;
-		}
-	}
+	v = get_pylong(v);
+	if (v == NULL)
+		return -1;
 	assert(PyLong_Check(v));
 	x = PyLong_AsUnsignedLongLong(v);
-	if (v_needs_decref)
-		Py_DECREF(v);
+	Py_DECREF(v);
 	if (x == (unsigned LONG_LONG)-1 && PyErr_Occurred())
 		return -1;
 	*p = x;
@@ -500,7 +482,7 @@
    TYPE is one of char, byte, ubyte, etc.
 */
 
-/* Native mode routines. */
+/* Native mode routines. ****************************************************/
 
 static PyObject *
 nu_char(const char *p, const formatdef *f)
@@ -797,6 +779,8 @@
 	{0}
 };
 
+/* Big-endian routines. *****************************************************/
+
 static PyObject *
 bu_int(const char *p, const formatdef *f)
 {
@@ -826,6 +810,24 @@
 }
 
 static PyObject *
+bu_longlong(const char *p, const formatdef *f)
+{
+	return _PyLong_FromByteArray((const unsigned char *)p,
+				      8,
+				      0, /* little-endian */
+				      1  /* signed */);
+}
+
+static PyObject *
+bu_ulonglong(const char *p, const formatdef *f)
+{
+	return _PyLong_FromByteArray((const unsigned char *)p,
+				      8,
+				      0, /* little-endian */
+				      0  /* signed */);
+}
+
+static PyObject *
 bu_float(const char *p, const formatdef *f)
 {
 	return unpack_float(p, 1);
@@ -868,6 +870,34 @@
 }
 
 static int
+bp_longlong(char *p, PyObject *v, const formatdef *f)
+{
+	int res;
+	v = get_pylong(v);
+	res = _PyLong_AsByteArray((PyLongObject *)v,
+			   	  (unsigned char *)p,
+				  8,
+				  0, /* little_endian */
+				  1  /* signed */);
+	Py_DECREF(v);
+	return res;
+}
+
+static int
+bp_ulonglong(char *p, PyObject *v, const formatdef *f)
+{
+	int res;
+	v = get_pylong(v);
+	res = _PyLong_AsByteArray((PyLongObject *)v,
+			   	  (unsigned char *)p,
+				  8,
+				  0, /* little_endian */
+				  0  /* signed */);
+	Py_DECREF(v);
+	return res;
+}
+
+static int
 bp_float(char *p, PyObject *v, const formatdef *f)
 {
 	double x = PyFloat_AsDouble(v);
@@ -904,11 +934,15 @@
 	{'I',	4,		0,		bu_uint,	bp_uint},
 	{'l',	4,		0,		bu_int,		bp_int},
 	{'L',	4,		0,		bu_uint,	bp_uint},
+	{'q',	8,		0,		bu_longlong,	bp_longlong},
+	{'Q',	8,		0,		bu_ulonglong,	bp_ulonglong},
 	{'f',	4,		0,		bu_float,	bp_float},
 	{'d',	8,		0,		bu_double,	bp_double},
 	{0}
 };
 
+/* Little-endian routines. *****************************************************/
+
 static PyObject *
 lu_int(const char *p, const formatdef *f)
 {
@@ -938,6 +972,24 @@
 }
 
 static PyObject *
+lu_longlong(const char *p, const formatdef *f)
+{
+	return _PyLong_FromByteArray((const unsigned char *)p,
+				      8,
+				      1, /* little-endian */
+				      1  /* signed */);
+}
+
+static PyObject *
+lu_ulonglong(const char *p, const formatdef *f)
+{
+	return _PyLong_FromByteArray((const unsigned char *)p,
+				      8,
+				      1, /* little-endian */
+				      0  /* signed */);
+}
+
+static PyObject *
 lu_float(const char *p, const formatdef *f)
 {
 	return unpack_float(p+3, -1);
@@ -980,6 +1032,34 @@
 }
 
 static int
+lp_longlong(char *p, PyObject *v, const formatdef *f)
+{
+	int res;
+	v = get_pylong(v);
+	res = _PyLong_AsByteArray((PyLongObject*)v,
+			   	  (unsigned char *)p,
+				  8,
+				  1, /* little_endian */
+				  1  /* signed */);
+	Py_DECREF(v);
+	return res;
+}
+
+static int
+lp_ulonglong(char *p, PyObject *v, const formatdef *f)
+{
+	int res;
+	v = get_pylong(v);
+	res = _PyLong_AsByteArray((PyLongObject*)v,
+			   	  (unsigned char *)p,
+				  8,
+				  1, /* little_endian */
+				  0  /* signed */);
+	Py_DECREF(v);
+	return res;
+}
+
+static int
 lp_float(char *p, PyObject *v, const formatdef *f)
 {
 	double x = PyFloat_AsDouble(v);
@@ -1016,6 +1096,8 @@
 	{'I',	4,		0,		lu_uint,	lp_uint},
 	{'l',	4,		0,		lu_int,		lp_int},
 	{'L',	4,		0,		lu_uint,	lp_uint},
+	{'q',	8,		0,		lu_longlong,	lp_longlong},
+	{'Q',	8,		0,		lu_ulonglong,	lp_ulonglong},
 	{'f',	4,		0,		lu_float,	lp_float},
 	{'d',	8,		0,		lu_double,	lp_double},
 	{0}
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 92f8b04..fac8bb6 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -364,20 +364,33 @@
 	accumbits = 0;
 	carry = do_twos_comp ? 1 : 0;
 	for (i = 0; i < ndigits; ++i) {
+		unsigned int oldaccumbits = accumbits;
 		twodigits thisdigit = v->ob_digit[i];
 		if (do_twos_comp) {
 			thisdigit = (thisdigit ^ MASK) + carry;
 			carry = thisdigit >> SHIFT;
 			thisdigit &= MASK;
 		}
+		if (i < ndigits - 1)
+			accumbits += SHIFT;
+		else {
+			/* The most-significant digit may be partly empty. */
+			twodigits bitmask = 1 << (SHIFT - 1);
+			twodigits signbit = do_twos_comp << (SHIFT - 1);
+			unsigned int nsignbits = 0;
+			while ((thisdigit & bitmask) == signbit && bitmask) {
+				++nsignbits;
+				bitmask >>= 1;
+				signbit >>= 1;
+			}
+			accumbits += SHIFT - nsignbits;
+		}
 		/* Because we're going LSB to MSB, thisdigit is more
 		   significant than what's already in accum, so needs to be
 		   prepended to accum. */
-		accum |= thisdigit << accumbits;
-		accumbits += SHIFT;
+		accum |= thisdigit << oldaccumbits;
 		/* Store as many bytes as possible. */
-		assert(accumbits >= 8);
-		do {
+		while (accumbits >= 8) {
 			if (j >= n)
 				goto Overflow;
 			++j;
@@ -385,13 +398,13 @@
 			p += pincr;
 			accumbits -= 8;
 			accum >>= 8;
-		} while (accumbits >= 8);
+		}
 	}
 
 	/* Store the straggler (if any). */
 	assert(accumbits < 8);
 	assert(carry == 0);  /* else do_twos_comp and *every* digit was 0 */
-	if (accum) {
+	if (accumbits > 0) {
 		if (j >= n)
 			goto Overflow;
 		++j;