bpo-8425: Fast path for set inplace difference when the second set is large (GH-15590)
diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-08-29-01-55-38.bpo-8425.FTq4A8.rst b/Misc/NEWS.d/next/Core and Builtins/2019-08-29-01-55-38.bpo-8425.FTq4A8.rst
new file mode 100644
index 0000000..8e5ec0b
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2019-08-29-01-55-38.bpo-8425.FTq4A8.rst
@@ -0,0 +1,3 @@
+Optimize set difference_update for the case when the other set is much
+larger than the base set. (Suggested by Evgeny Kapun with code contributed
+by Michele Orrù).
diff --git a/Objects/setobject.c b/Objects/setobject.c
index 56858db..fafc2fa 100644
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -1463,9 +1463,25 @@
setentry *entry;
Py_ssize_t pos = 0;
- while (set_next((PySetObject *)other, &pos, &entry))
- if (set_discard_entry(so, entry->key, entry->hash) < 0)
+ /* Optimization: When the other set is more than 8 times
+ larger than the base set, replace the other set with
+ interesection of the two sets.
+ */
+ if ((PySet_GET_SIZE(other) >> 3) > PySet_GET_SIZE(so)) {
+ other = set_intersection(so, other);
+ if (other == NULL)
return -1;
+ } else {
+ Py_INCREF(other);
+ }
+
+ while (set_next((PySetObject *)other, &pos, &entry))
+ if (set_discard_entry(so, entry->key, entry->hash) < 0) {
+ Py_DECREF(other);
+ return -1;
+ }
+
+ Py_DECREF(other);
} else {
PyObject *key, *it;
it = PyObject_GetIter(other);