Optimize unicode_hash() by not calling _PyUnicode_AsDefaultEncodedString() at all -- this saves two object allocations (three block allocations!) and lots of redundant work. By using the same hash algorithm as string_hash(), we maintain the invariant that the hash of an ASCII string is the same whether represented as a PyString or a PyUnicode.

commit: c2504931ee6bb19b4d38d0d654b02a6fbc797ebd [log] [tgz]
author: Guido van Rossum <guido@python.org> Tue Sep 18 19:42:40 2007 +0000
committer: Guido van Rossum <guido@python.org> Tue Sep 18 19:42:40 2007 +0000
tree: ae0fac70cd3c00538e97315db099f26d3f634d35
parent: e4a9e788d367b99162a0b584d23f4fd111bde1cf [diff] [blame]
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 140ffaf..2a6a087 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c

@@ -6587,21 +6587,27 @@
     return (PyObject*) PyUnicode_FromUnicode(&self->str[index], 1);
 }
 
+/* Believe it or not, this produces the same value for ASCII strings
+   as string_hash(). */
 static long
 unicode_hash(PyUnicodeObject *self)
 {
-    if (self->hash != -1) {
-	return self->hash;
-    }
-    else {
-        /* Since Unicode objects compare equal to their UTF-8 string
-           counterparts, we hash the UTF-8 string. */
-        PyObject *v = _PyUnicode_AsDefaultEncodedString((PyObject*)self, NULL);
-        if (v == NULL)
-            return -1;
-        assert(PyString_CheckExact(v));
-        return self->hash = v->ob_type->tp_hash(v);
-    }
+    Py_ssize_t len;
+    Py_UNICODE *p;
+    long x;
+
+    if (self->hash != -1)
+        return self->hash;
+    len = Py_Size(self);
+    p = self->str;
+    x = *p << 7;
+    while (--len >= 0)
+        x = (1000003*x) ^ *p++;
+    x ^= Py_Size(self);
+    if (x == -1)
+        x = -2;
+    self->hash = x;
+    return x;
 }
 
 PyDoc_STRVAR(index__doc__,
commit	c2504931ee6bb19b4d38d0d654b02a6fbc797ebd	[log] [tgz]
author	Guido van Rossum <guido@python.org>	Tue Sep 18 19:42:40 2007 +0000
committer	Guido van Rossum <guido@python.org>	Tue Sep 18 19:42:40 2007 +0000
tree	ae0fac70cd3c00538e97315db099f26d3f634d35
parent	e4a9e788d367b99162a0b584d23f4fd111bde1cf [diff] [blame]