Bring comments up to date (e.g., they still said the table had to be
a prime size, which is in fact never true anymore ...).
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 924928f..32bf647 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -15,7 +15,7 @@
 
 /*
 Table of irreducible polynomials to efficiently cycle through
-GF(2^n)-{0}, 2<=n<=30.
+GF(2^n)-{0}, 2<=n<=30.  A table size is always a power of 2.
 */
 static long polys[] = {
 	4 + 3,
@@ -54,13 +54,26 @@
 static PyObject *dummy; /* Initialized by first call to newdictobject() */
 
 /*
-Invariant for entries: when in use, me_value is not NULL and me_key is
-not NULL and not dummy; when not in use, me_value is NULL and me_key
-is either NULL or dummy.  A dummy key value cannot be replaced by
-NULL, since otherwise other keys may be lost.
+There are three kinds of slots in the table:
+
+1. Unused.  me_key == me_value == NULL
+   Does not hold an active (key, value) pair now and never did.  Unused can
+   transition to Active upon key insertion.  This is the only case in which
+   me_key is NULL, and is each slot's initial state.
+
+2. Active.  me_key != NULL and me_key != dummy and me_value != NULL
+   Holds an active (key, value) pair.  Active can transition to Dummy upon
+   key deletion.  This is the only case in which me_value != NULL.
+
+3. Dummy.  me_key == dummy && me_value == NULL
+   Previously held an active (key, value) pair, but that was deleted and an
+   active pair has not yet overwritten the slot.  Dummy can transition to
+   Active upon key insertion.  Dummy slots cannot be made Unused again
+   (cannot have me_key set to NULL), else the probe sequence in case of
+   collision would have no way to know they were once active.
 */
 typedef struct {
-	long me_hash;
+	long me_hash;      /* cached hash code of me_key */
 	PyObject *me_key;
 	PyObject *me_value;
 #ifdef USE_CACHE_ALIGNED
@@ -69,20 +82,21 @@
 } dictentry;
 
 /*
-To ensure the lookup algorithm terminates, the table size must be a
-prime number and there must be at least one NULL key in the table.
-The value ma_fill is the number of non-NULL keys; ma_used is the number
-of non-NULL, non-dummy keys.
-To avoid slowing down lookups on a near-full table, we resize the table
-when it is more than half filled.
+To ensure the lookup algorithm terminates, there must be at least one Unsused
+slot (NULL key) in the table.
+The value ma_fill is the number of non-NULL keys (sum of Active and Dummy);
+ma_used is the number of non-NULL, non-dummy keys (== the number of non-NULL
+values == the number of Active items).
+To avoid slowing down lookups on a near-full table, we resize the table when
+it is more than half filled.
 */
 typedef struct dictobject dictobject;
 struct dictobject {
 	PyObject_HEAD
-	int ma_fill;
-	int ma_used;
-	int ma_size;
-	int ma_poly;
+	int ma_fill;  /* # Active + # Dummy */
+	int ma_used;  /* # Active */
+	int ma_size;  /* total # slots in ma_table */
+	int ma_poly;  /* appopriate entry from polys vector */
 	dictentry *ma_table;
 	dictentry *(*ma_lookup)(dictobject *mp, PyObject *key, long hash);
 };
@@ -138,12 +152,12 @@
 Open addressing is preferred over chaining since the link overhead for
 chaining would be substantial (100% with typical malloc overhead).
 However, instead of going through the table at constant steps, we cycle
-through the values of GF(2^n)-{0}. This avoids modulo computations, being
+through the values of GF(2^n).  This avoids modulo computations, being
 much cheaper on RISC machines, without leading to clustering.
 
 The initial probe index is computed as hash mod the table size.
-Subsequent probe indices use the values of x^i in GF(2^n) as an offset,
-where x is a root. The initial value is derived from hash, too.
+Subsequent probe indices use the values of x^i in GF(2^n)-{0} as an offset,
+where x is a root. The initial offset is derived from hash, too.
 
 All arithmetic on hash should ignore overflow.
 
@@ -168,11 +182,14 @@
 	register int cmp;
 	PyObject *err_type, *err_value, *err_tb;
 	/* We must come up with (i, incr) such that 0 <= i < ma_size
-	   and 0 < incr < ma_size and both are a function of hash */
+	   and 0 < incr < ma_size and both are a function of hash.
+	   i is the initial table index and incr the initial probe offset. */
 	i = (~hash) & mask;
 	/* We use ~hash instead of hash, as degenerate hash functions, such
 	   as for ints <sigh>, can have lots of leading zeros. It's not
-	   really a performance risk, but better safe than sorry. */
+	   really a performance risk, but better safe than sorry.
+	   12-Dec-00 tim:  so ~hash produces lots of leading ones instead --
+	   what's the gain? */
 	ep = &ep0[i];
 	if (ep->me_key == NULL || ep->me_key == key)
 		return ep;
@@ -514,8 +531,8 @@
 	old_value = ep->me_value;
 	ep->me_value = NULL;
 	mp->ma_used--;
-	Py_DECREF(old_value); 
-	Py_DECREF(old_key); 
+	Py_DECREF(old_value);
+	Py_DECREF(old_key);
 	return 0;
 }