lib2to3.pgen3.driver.load_grammar() now creates a stable cache file between runs given the same Grammar.txt input regardless of the hash randomization setting. Backport of 186bb8dc5540 from 3.5. Done in 2.7 per the lib2to3 exemption.

commit: 280bc223b4b5df841da20faafd1b63cde2694acd [log] [tgz]
author: Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D) <greg@krypto.org> Thu Sep 08 01:04:37 2016 +0000
committer: Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D) <greg@krypto.org> Thu Sep 08 01:04:37 2016 +0000
tree: 95b271c2eed151c5c70d0eb7112e068e3f7db9fc
parent: 039f1846543c18810a65bfd0e52c0cdcd46cad08 [diff]
diff --git a/Lib/lib2to3/pgen2/driver.py b/Lib/lib2to3/pgen2/driver.py
index 39dafb9..ce601bb 100644
--- a/Lib/lib2to3/pgen2/driver.py
+++ b/Lib/lib2to3/pgen2/driver.py

@@ -106,16 +106,19 @@
         return self.parse_tokens(tokens, debug)
 
 
+def _generate_pickle_name(gt):
+    head, tail = os.path.splitext(gt)
+    if tail == ".txt":
+        tail = ""
+    return head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
+
+
 def load_grammar(gt="Grammar.txt", gp=None,
                  save=True, force=False, logger=None):
     """Load the grammar (maybe from a pickle)."""
     if logger is None:
         logger = logging.getLogger()
-    if gp is None:
-        head, tail = os.path.splitext(gt)
-        if tail == ".txt":
-            tail = ""
-        gp = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
+    gp = _generate_pickle_name(gt) if gp is None else gp
     if force or not _newer(gp, gt):
         logger.info("Generating grammar tables from %s", gt)
         g = pgen.generate_grammar(gt)
@@ -123,8 +126,8 @@
             logger.info("Writing grammar tables to %s", gp)
             try:
                 g.dump(gp)
-            except IOError, e:
-                logger.info("Writing failed:"+str(e))
+            except IOError as e:
+                logger.info("Writing failed: %s", e)
     else:
         g = grammar.Grammar()
         g.load(gp)

diff --git a/Lib/lib2to3/pgen2/grammar.py b/Lib/lib2to3/pgen2/grammar.py
index 8220b0a..75255e9 100644
--- a/Lib/lib2to3/pgen2/grammar.py
+++ b/Lib/lib2to3/pgen2/grammar.py

@@ -13,6 +13,7 @@
 """
 
 # Python imports
+import collections
 import pickle
 
 # Local imports
@@ -85,10 +86,21 @@
         self.start = 256
 
     def dump(self, filename):
-        """Dump the grammar tables to a pickle file."""
-        f = open(filename, "wb")
-        pickle.dump(self.__dict__, f, 2)
-        f.close()
+        """Dump the grammar tables to a pickle file.
+
+        dump() recursively changes all dict to OrderedDict, so the pickled file
+        is not exactly the same as what was passed in to dump(). load() uses the
+        pickled file to create the tables, but  only changes OrderedDict to dict
+        at the top level; it does not recursively change OrderedDict to dict.
+        So, the loaded tables are different from the original tables that were
+        passed to load() in that some of the OrderedDict (from the pickled file)
+        are not changed back to dict. For parsing, this has no effect on
+        performance because OrderedDict uses dict's __getitem__ with nothing in
+        between.
+        """
+        with open(filename, "wb") as f:
+            d = _make_deterministic(self.__dict__)
+            pickle.dump(d, f, 2)
 
     def load(self, filename):
         """Load the grammar tables from a pickle file."""
@@ -126,6 +138,17 @@
         print "start", self.start
 
 
+def _make_deterministic(top):
+    if isinstance(top, dict):
+        return collections.OrderedDict(
+            sorted(((k, _make_deterministic(v)) for k, v in top.iteritems())))
+    if isinstance(top, list):
+        return [_make_deterministic(e) for e in top]
+    if isinstance(top, tuple):
+        return tuple(_make_deterministic(e) for e in top)
+    return top
+
+
 # Map from operator to number (since tokenize doesn't do this)
 
 opmap_raw = """

diff --git a/Lib/lib2to3/pgen2/pgen.py b/Lib/lib2to3/pgen2/pgen.py
index 63084a4..ed16992 100644
--- a/Lib/lib2to3/pgen2/pgen.py
+++ b/Lib/lib2to3/pgen2/pgen.py

@@ -39,7 +39,7 @@
             states = []
             for state in dfa:
                 arcs = []
-                for label, next in state.arcs.iteritems():
+                for label, next in sorted(state.arcs.iteritems()):
                     arcs.append((self.make_label(c, label), dfa.index(next)))
                 if state.isfinal:
                     arcs.append((0, dfa.index(state)))
@@ -52,7 +52,7 @@
     def make_first(self, c, name):
         rawfirst = self.first[name]
         first = {}
-        for label in rawfirst:
+        for label in sorted(rawfirst):
             ilabel = self.make_label(c, label)
             ##assert ilabel not in first # XXX failed on <> ... !=
             first[ilabel] = 1
@@ -192,7 +192,7 @@
                 for label, next in nfastate.arcs:
                     if label is not None:
                         addclosure(next, arcs.setdefault(label, {}))
-            for label, nfaset in arcs.iteritems():
+            for label, nfaset in sorted(arcs.iteritems()):
                 for st in states:
                     if st.nfaset == nfaset:
                         break
@@ -222,7 +222,7 @@
         print "Dump of DFA for", name
         for i, state in enumerate(dfa):
             print "  State", i, state.isfinal and "(final)" or ""
-            for label, next in state.arcs.iteritems():
+            for label, next in sorted(state.arcs.iteritems()):
                 print "    %s -> %d" % (label, dfa.index(next))
 
     def simplify_dfa(self, dfa):
commit	280bc223b4b5df841da20faafd1b63cde2694acd	[log] [tgz]
author	Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D) <greg@krypto.org>	Thu Sep 08 01:04:37 2016 +0000
committer	Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D) <greg@krypto.org>	Thu Sep 08 01:04:37 2016 +0000
tree	95b271c2eed151c5c70d0eb7112e068e3f7db9fc
parent	039f1846543c18810a65bfd0e52c0cdcd46cad08 [diff]