lib2to3.pgen3.driver.load_grammar() now creates a stable cache file between runs given the same Grammar.txt input regardless of the hash randomization setting. Backport of 186bb8dc5540 from 3.5. Done in 2.7 per the lib2to3 exemption.

commit: 280bc223b4b5df841da20faafd1b63cde2694acd [log] [tgz]
author: Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D) <greg@krypto.org> Thu Sep 08 01:04:37 2016 +0000
committer: Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D) <greg@krypto.org> Thu Sep 08 01:04:37 2016 +0000
tree: 95b271c2eed151c5c70d0eb7112e068e3f7db9fc
parent: 039f1846543c18810a65bfd0e52c0cdcd46cad08 [diff] [blame]
diff --git a/Lib/lib2to3/pgen2/grammar.py b/Lib/lib2to3/pgen2/grammar.py
index 8220b0a..75255e9 100644
--- a/Lib/lib2to3/pgen2/grammar.py
+++ b/Lib/lib2to3/pgen2/grammar.py

@@ -13,6 +13,7 @@
 """
 
 # Python imports
+import collections
 import pickle
 
 # Local imports
@@ -85,10 +86,21 @@
         self.start = 256
 
     def dump(self, filename):
-        """Dump the grammar tables to a pickle file."""
-        f = open(filename, "wb")
-        pickle.dump(self.__dict__, f, 2)
-        f.close()
+        """Dump the grammar tables to a pickle file.
+
+        dump() recursively changes all dict to OrderedDict, so the pickled file
+        is not exactly the same as what was passed in to dump(). load() uses the
+        pickled file to create the tables, but  only changes OrderedDict to dict
+        at the top level; it does not recursively change OrderedDict to dict.
+        So, the loaded tables are different from the original tables that were
+        passed to load() in that some of the OrderedDict (from the pickled file)
+        are not changed back to dict. For parsing, this has no effect on
+        performance because OrderedDict uses dict's __getitem__ with nothing in
+        between.
+        """
+        with open(filename, "wb") as f:
+            d = _make_deterministic(self.__dict__)
+            pickle.dump(d, f, 2)
 
     def load(self, filename):
         """Load the grammar tables from a pickle file."""
@@ -126,6 +138,17 @@
         print "start", self.start
 
 
+def _make_deterministic(top):
+    if isinstance(top, dict):
+        return collections.OrderedDict(
+            sorted(((k, _make_deterministic(v)) for k, v in top.iteritems())))
+    if isinstance(top, list):
+        return [_make_deterministic(e) for e in top]
+    if isinstance(top, tuple):
+        return tuple(_make_deterministic(e) for e in top)
+    return top
+
+
 # Map from operator to number (since tokenize doesn't do this)
 
 opmap_raw = """
commit	280bc223b4b5df841da20faafd1b63cde2694acd	[log] [tgz]
author	Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D) <greg@krypto.org>	Thu Sep 08 01:04:37 2016 +0000
committer	Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D) <greg@krypto.org>	Thu Sep 08 01:04:37 2016 +0000
tree	95b271c2eed151c5c70d0eb7112e068e3f7db9fc
parent	039f1846543c18810a65bfd0e52c0cdcd46cad08 [diff] [blame]