Blame - Lib/lib2to3/pgen2/grammar.py - platform/external/python/cpython2

blob: 75255e9c013827dd3b740eae4c883db01f12abdb [file] [log] [blame]

Martin v. Löwis	5e37bae	2008-03-19 04:43:46 +0000	[diff] [blame]	1	# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
				2	# Licensed to PSF under a Contributor Agreement.
				3
				4	"""This module defines the data structures used to represent a grammar.
				5
				6	These are a bit arcane because they are derived from the data
				7	structures used by Python's 'pgen' parser generator.
				8
				9	There's also a table here mapping operators to their names in the
				10	token module; the Python tokenize module reports all operators as the
				11	fallback token code OP, but the parser needs the actual token code.
				12
				13	"""
				14
				15	# Python imports
Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)	280bc22	2016-09-08 01:04:37 +0000	[diff] [blame^]	16	import collections
Martin v. Löwis	5e37bae	2008-03-19 04:43:46 +0000	[diff] [blame]	17	import pickle
				18
				19	# Local imports
				20	from . import token, tokenize
				21
				22
				23	class Grammar(object):
Terry Jan Reedy	a70f60a	2013-03-11 17:56:17 -0400	[diff] [blame]	24	"""Pgen parsing tables conversion class.
Martin v. Löwis	5e37bae	2008-03-19 04:43:46 +0000	[diff] [blame]	25
				26	Once initialized, this class supplies the grammar tables for the
				27	parsing engine implemented by parse.py. The parsing engine
				28	accesses the instance variables directly. The class here does not
				29	provide initialization of the tables; several subclasses exist to
				30	do this (see the conv and pgen modules).
				31
				32	The load() method reads the tables from a pickle file, which is
				33	much faster than the other ways offered by subclasses. The pickle
				34	file is written by calling dump() (after loading the grammar
				35	tables using a subclass). The report() method prints a readable
				36	representation of the tables to stdout, for debugging.
				37
				38	The instance variables are as follows:
				39
				40	symbol2number -- a dict mapping symbol names to numbers. Symbol
				41	numbers are always 256 or higher, to distinguish
				42	them from token numbers, which are between 0 and
				43	255 (inclusive).
				44
				45	number2symbol -- a dict mapping numbers to symbol names;
				46	these two are each other's inverse.
				47
				48	states -- a list of DFAs, where each DFA is a list of
Terry Jan Reedy	a70f60a	2013-03-11 17:56:17 -0400	[diff] [blame]	49	states, each state is a list of arcs, and each
Martin v. Löwis	5e37bae	2008-03-19 04:43:46 +0000	[diff] [blame]	50	arc is a (i, j) pair where i is a label and j is
				51	a state number. The DFA number is the index into
				52	this list. (This name is slightly confusing.)
				53	Final states are represented by a special arc of
				54	the form (0, j) where j is its own state number.
				55
				56	dfas -- a dict mapping symbol numbers to (DFA, first)
				57	pairs, where DFA is an item from the states list
				58	above, and first is a set of tokens that can
				59	begin this grammar rule (represented by a dict
				60	whose values are always 1).
				61
				62	labels -- a list of (x, y) pairs where x is either a token
				63	number or a symbol number, and y is either None
				64	or a string; the strings are keywords. The label
				65	number is the index in this list; label numbers
				66	are used to mark state transitions (arcs) in the
				67	DFAs.
				68
				69	start -- the number of the grammar's start symbol.
				70
				71	keywords -- a dict mapping keyword strings to arc labels.
				72
				73	tokens -- a dict mapping token numbers to arc labels.
				74
				75	"""
				76
				77	def __init__(self):
				78	self.symbol2number = {}
				79	self.number2symbol = {}
				80	self.states = []
				81	self.dfas = {}
				82	self.labels = [(0, "EMPTY")]
				83	self.keywords = {}
				84	self.tokens = {}
				85	self.symbol2label = {}
				86	self.start = 256
				87
				88	def dump(self, filename):
Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)	280bc22	2016-09-08 01:04:37 +0000	[diff] [blame^]	89	"""Dump the grammar tables to a pickle file.
				90
				91	dump() recursively changes all dict to OrderedDict, so the pickled file
				92	is not exactly the same as what was passed in to dump(). load() uses the
				93	pickled file to create the tables, but only changes OrderedDict to dict
				94	at the top level; it does not recursively change OrderedDict to dict.
				95	So, the loaded tables are different from the original tables that were
				96	passed to load() in that some of the OrderedDict (from the pickled file)
				97	are not changed back to dict. For parsing, this has no effect on
				98	performance because OrderedDict uses dict's __getitem__ with nothing in
				99	between.
				100	"""
				101	with open(filename, "wb") as f:
				102	d = _make_deterministic(self.__dict__)
				103	pickle.dump(d, f, 2)
Martin v. Löwis	5e37bae	2008-03-19 04:43:46 +0000	[diff] [blame]	104
				105	def load(self, filename):
				106	"""Load the grammar tables from a pickle file."""
				107	f = open(filename, "rb")
				108	d = pickle.load(f)
				109	f.close()
				110	self.__dict__.update(d)
				111
Benjamin Peterson	840077c	2009-07-20 15:33:09 +0000	[diff] [blame]	112	def copy(self):
				113	"""
				114	Copy the grammar.
				115	"""
				116	new = self.__class__()
				117	for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
				118	"tokens", "symbol2label"):
				119	setattr(new, dict_attr, getattr(self, dict_attr).copy())
				120	new.labels = self.labels[:]
				121	new.states = self.states[:]
				122	new.start = self.start
				123	return new
				124
Martin v. Löwis	5e37bae	2008-03-19 04:43:46 +0000	[diff] [blame]	125	def report(self):
				126	"""Dump the grammar tables to standard output, for debugging."""
				127	from pprint import pprint
				128	print "s2n"
				129	pprint(self.symbol2number)
				130	print "n2s"
				131	pprint(self.number2symbol)
				132	print "states"
				133	pprint(self.states)
				134	print "dfas"
				135	pprint(self.dfas)
				136	print "labels"
				137	pprint(self.labels)
				138	print "start", self.start
				139
				140
Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)	280bc22	2016-09-08 01:04:37 +0000	[diff] [blame^]	141	def _make_deterministic(top):
				142	if isinstance(top, dict):
				143	return collections.OrderedDict(
				144	sorted(((k, _make_deterministic(v)) for k, v in top.iteritems())))
				145	if isinstance(top, list):
				146	return [_make_deterministic(e) for e in top]
				147	if isinstance(top, tuple):
				148	return tuple(_make_deterministic(e) for e in top)
				149	return top
				150
				151
Martin v. Löwis	5e37bae	2008-03-19 04:43:46 +0000	[diff] [blame]	152	# Map from operator to number (since tokenize doesn't do this)
				153
				154	opmap_raw = """
				155	( LPAR
				156	) RPAR
				157	[ LSQB
				158	] RSQB
				159	: COLON
				160	, COMMA
				161	; SEMI
				162	+ PLUS
				163	- MINUS
				164	* STAR
				165	/ SLASH
				166	\| VBAR
				167	& AMPER
				168	< LESS
				169	> GREATER
				170	= EQUAL
				171	. DOT
				172	% PERCENT
				173	` BACKQUOTE
				174	{ LBRACE
				175	} RBRACE
				176	@ AT
Benjamin Peterson	da952f3	2014-04-10 00:12:47 -0400	[diff] [blame]	177	@= ATEQUAL
Martin v. Löwis	5e37bae	2008-03-19 04:43:46 +0000	[diff] [blame]	178	== EQEQUAL
				179	!= NOTEQUAL
				180	<> NOTEQUAL
				181	<= LESSEQUAL
				182	>= GREATEREQUAL
				183	~ TILDE
				184	^ CIRCUMFLEX
				185	<< LEFTSHIFT
				186	>> RIGHTSHIFT
				187	** DOUBLESTAR
				188	+= PLUSEQUAL
				189	-= MINEQUAL
				190	*= STAREQUAL
				191	/= SLASHEQUAL
				192	%= PERCENTEQUAL
				193	&= AMPEREQUAL
				194	\|= VBAREQUAL
				195	^= CIRCUMFLEXEQUAL
				196	<<= LEFTSHIFTEQUAL
				197	>>= RIGHTSHIFTEQUAL
				198	**= DOUBLESTAREQUAL
				199	// DOUBLESLASH
				200	//= DOUBLESLASHEQUAL
				201	-> RARROW
				202	"""
				203
				204	opmap = {}
				205	for line in opmap_raw.splitlines():
				206	if line:
				207	op, name = line.split()
				208	opmap[op] = getattr(token, name)