lib2to3.pgen3.driver.load_grammar() now creates a stable cache file
between runs given the same Grammar.txt input regardless of the hash
randomization setting.
Backport of 186bb8dc5540 from 3.5. Done in 2.7 per the lib2to3 exemption.
diff --git a/Lib/lib2to3/pgen2/grammar.py b/Lib/lib2to3/pgen2/grammar.py
index 8220b0a..75255e9 100644
--- a/Lib/lib2to3/pgen2/grammar.py
+++ b/Lib/lib2to3/pgen2/grammar.py
@@ -13,6 +13,7 @@
"""
# Python imports
+import collections
import pickle
# Local imports
@@ -85,10 +86,21 @@
self.start = 256
def dump(self, filename):
- """Dump the grammar tables to a pickle file."""
- f = open(filename, "wb")
- pickle.dump(self.__dict__, f, 2)
- f.close()
+ """Dump the grammar tables to a pickle file.
+
+ dump() recursively changes all dict to OrderedDict, so the pickled file
+ is not exactly the same as what was passed in to dump(). load() uses the
+ pickled file to create the tables, but only changes OrderedDict to dict
+ at the top level; it does not recursively change OrderedDict to dict.
+ So, the loaded tables are different from the original tables that were
+ passed to load() in that some of the OrderedDict (from the pickled file)
+ are not changed back to dict. For parsing, this has no effect on
+ performance because OrderedDict uses dict's __getitem__ with nothing in
+ between.
+ """
+ with open(filename, "wb") as f:
+ d = _make_deterministic(self.__dict__)
+ pickle.dump(d, f, 2)
def load(self, filename):
"""Load the grammar tables from a pickle file."""
@@ -126,6 +138,17 @@
print "start", self.start
+def _make_deterministic(top):
+ if isinstance(top, dict):
+ return collections.OrderedDict(
+ sorted(((k, _make_deterministic(v)) for k, v in top.iteritems())))
+ if isinstance(top, list):
+ return [_make_deterministic(e) for e in top]
+ if isinstance(top, tuple):
+ return tuple(_make_deterministic(e) for e in top)
+ return top
+
+
# Map from operator to number (since tokenize doesn't do this)
opmap_raw = """