lib2to3.pgen3.driver.load_grammar() now creates a stable cache file
between runs given the same Grammar.txt input regardless of the hash
randomization setting.
Backport of 186bb8dc5540 from 3.5. Done in 2.7 per the lib2to3 exemption.
diff --git a/Lib/lib2to3/pgen2/driver.py b/Lib/lib2to3/pgen2/driver.py
index 39dafb9..ce601bb 100644
--- a/Lib/lib2to3/pgen2/driver.py
+++ b/Lib/lib2to3/pgen2/driver.py
@@ -106,16 +106,19 @@
return self.parse_tokens(tokens, debug)
+def _generate_pickle_name(gt):
+ head, tail = os.path.splitext(gt)
+ if tail == ".txt":
+ tail = ""
+ return head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
+
+
def load_grammar(gt="Grammar.txt", gp=None,
save=True, force=False, logger=None):
"""Load the grammar (maybe from a pickle)."""
if logger is None:
logger = logging.getLogger()
- if gp is None:
- head, tail = os.path.splitext(gt)
- if tail == ".txt":
- tail = ""
- gp = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
+ gp = _generate_pickle_name(gt) if gp is None else gp
if force or not _newer(gp, gt):
logger.info("Generating grammar tables from %s", gt)
g = pgen.generate_grammar(gt)
@@ -123,8 +126,8 @@
logger.info("Writing grammar tables to %s", gp)
try:
g.dump(gp)
- except IOError, e:
- logger.info("Writing failed:"+str(e))
+ except IOError as e:
+ logger.info("Writing failed: %s", e)
else:
g = grammar.Grammar()
g.load(gp)
diff --git a/Lib/lib2to3/pgen2/grammar.py b/Lib/lib2to3/pgen2/grammar.py
index 8220b0a..75255e9 100644
--- a/Lib/lib2to3/pgen2/grammar.py
+++ b/Lib/lib2to3/pgen2/grammar.py
@@ -13,6 +13,7 @@
"""
# Python imports
+import collections
import pickle
# Local imports
@@ -85,10 +86,21 @@
self.start = 256
def dump(self, filename):
- """Dump the grammar tables to a pickle file."""
- f = open(filename, "wb")
- pickle.dump(self.__dict__, f, 2)
- f.close()
+ """Dump the grammar tables to a pickle file.
+
+ dump() recursively changes all dict to OrderedDict, so the pickled file
+ is not exactly the same as what was passed in to dump(). load() uses the
+ pickled file to create the tables, but only changes OrderedDict to dict
+ at the top level; it does not recursively change OrderedDict to dict.
+ So, the loaded tables are different from the original tables that were
+ passed to load() in that some of the OrderedDict (from the pickled file)
+ are not changed back to dict. For parsing, this has no effect on
+ performance because OrderedDict uses dict's __getitem__ with nothing in
+ between.
+ """
+ with open(filename, "wb") as f:
+ d = _make_deterministic(self.__dict__)
+ pickle.dump(d, f, 2)
def load(self, filename):
"""Load the grammar tables from a pickle file."""
@@ -126,6 +138,17 @@
print "start", self.start
+def _make_deterministic(top):
+ if isinstance(top, dict):
+ return collections.OrderedDict(
+ sorted(((k, _make_deterministic(v)) for k, v in top.iteritems())))
+ if isinstance(top, list):
+ return [_make_deterministic(e) for e in top]
+ if isinstance(top, tuple):
+ return tuple(_make_deterministic(e) for e in top)
+ return top
+
+
# Map from operator to number (since tokenize doesn't do this)
opmap_raw = """
diff --git a/Lib/lib2to3/pgen2/pgen.py b/Lib/lib2to3/pgen2/pgen.py
index 63084a4..ed16992 100644
--- a/Lib/lib2to3/pgen2/pgen.py
+++ b/Lib/lib2to3/pgen2/pgen.py
@@ -39,7 +39,7 @@
states = []
for state in dfa:
arcs = []
- for label, next in state.arcs.iteritems():
+ for label, next in sorted(state.arcs.iteritems()):
arcs.append((self.make_label(c, label), dfa.index(next)))
if state.isfinal:
arcs.append((0, dfa.index(state)))
@@ -52,7 +52,7 @@
def make_first(self, c, name):
rawfirst = self.first[name]
first = {}
- for label in rawfirst:
+ for label in sorted(rawfirst):
ilabel = self.make_label(c, label)
##assert ilabel not in first # XXX failed on <> ... !=
first[ilabel] = 1
@@ -192,7 +192,7 @@
for label, next in nfastate.arcs:
if label is not None:
addclosure(next, arcs.setdefault(label, {}))
- for label, nfaset in arcs.iteritems():
+ for label, nfaset in sorted(arcs.iteritems()):
for st in states:
if st.nfaset == nfaset:
break
@@ -222,7 +222,7 @@
print "Dump of DFA for", name
for i, state in enumerate(dfa):
print " State", i, state.isfinal and "(final)" or ""
- for label, next in state.arcs.iteritems():
+ for label, next in sorted(state.arcs.iteritems()):
print " %s -> %d" % (label, dfa.index(next))
def simplify_dfa(self, dfa):