bpo-43475: Fix worst case collision behavior for NaN instances (GH-25493)

commit: a07da09ad5bd7d234ccd084a3a0933c290d1b592 [log] [tgz]
author: Raymond Hettinger <rhettinger@users.noreply.github.com> Thu Apr 22 08:34:57 2021 -0700
committer: GitHub <noreply@github.com> Thu Apr 22 08:34:57 2021 -0700
tree: 8c1ab67575527bd5c0c9452a74458ad5a29a1d08
parent: accea7dc2bd30a6e8e1b0334acfca9585cbd7f8a [diff] [blame]
diff --git a/Python/pyhash.c b/Python/pyhash.c
index 3b6c34e..f0c8235 100644
--- a/Python/pyhash.c
+++ b/Python/pyhash.c

@@ -56,8 +56,12 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
    If the result of the reduction is infinity (this is impossible for
    integers, floats and Decimals) then use the predefined hash value
    _PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead.
-   _PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the
-   hashes of float and Decimal infinities and nans.
+   _PyHASH_INF and -_PyHASH_INF are also used for the
+   hashes of float and Decimal infinities.
+
+   NaNs hash with a pointer hash.  Having distinct hash values prevents
+   catastrophic pileups from distinct NaN instances which used to always
+   have the same hash value but would compare unequal.
 
    A selling point for the above strategy is that it makes it possible
    to compute hashes of decimal and binary floating-point numbers
@@ -82,8 +86,10 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
 
    */
 
+Py_hash_t _Py_HashPointer(const void *);
+
 Py_hash_t
-_Py_HashDouble(double v)
+_Py_HashDouble(PyObject *inst, double v)
 {
     int e, sign;
     double m;
@@ -93,7 +99,7 @@ _Py_HashDouble(double v)
         if (Py_IS_INFINITY(v))
             return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
         else
-            return _PyHASH_NAN;
+            return _Py_HashPointer(inst);
     }
 
     m = frexp(v, &e);
commit	a07da09ad5bd7d234ccd084a3a0933c290d1b592	[log] [tgz]
author	Raymond Hettinger <rhettinger@users.noreply.github.com>	Thu Apr 22 08:34:57 2021 -0700
committer	GitHub <noreply@github.com>	Thu Apr 22 08:34:57 2021 -0700
tree	8c1ab67575527bd5c0c9452a74458ad5a29a1d08
parent	accea7dc2bd30a6e8e1b0334acfca9585cbd7f8a [diff] [blame]