Blame - Lib/lib2to3/pgen2/grammar.py - platform/external/python/cpython3

blob: 088c58bfa99c1b1b0ea5f269604e919e74685c3c [file] [log] [blame]

Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	1	# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
				2	# Licensed to PSF under a Contributor Agreement.
				3
				4	"""This module defines the data structures used to represent a grammar.
				5
				6	These are a bit arcane because they are derived from the data
				7	structures used by Python's 'pgen' parser generator.
				8
				9	There's also a table here mapping operators to their names in the
				10	token module; the Python tokenize module reports all operators as the
				11	fallback token code OP, but the parser needs the actual token code.
				12
				13	"""
				14
				15	# Python imports
Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)	dd1c638	2016-09-08 00:40:07 +0000	[diff] [blame]	16	import collections
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	17	import pickle
				18
				19	# Local imports
Victor Stinner	d6debb2	2017-03-27 16:05:26 +0200	[diff] [blame]	20	from . import token
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	21
				22
				23	class Grammar(object):
Terry Jan Reedy	c30b7b1	2013-03-11 17:57:08 -0400	[diff] [blame]	24	"""Pgen parsing tables conversion class.
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	25
				26	Once initialized, this class supplies the grammar tables for the
				27	parsing engine implemented by parse.py. The parsing engine
				28	accesses the instance variables directly. The class here does not
				29	provide initialization of the tables; several subclasses exist to
				30	do this (see the conv and pgen modules).
				31
				32	The load() method reads the tables from a pickle file, which is
				33	much faster than the other ways offered by subclasses. The pickle
				34	file is written by calling dump() (after loading the grammar
				35	tables using a subclass). The report() method prints a readable
				36	representation of the tables to stdout, for debugging.
				37
				38	The instance variables are as follows:
				39
				40	symbol2number -- a dict mapping symbol names to numbers. Symbol
				41	numbers are always 256 or higher, to distinguish
				42	them from token numbers, which are between 0 and
				43	255 (inclusive).
				44
				45	number2symbol -- a dict mapping numbers to symbol names;
				46	these two are each other's inverse.
				47
				48	states -- a list of DFAs, where each DFA is a list of
Terry Jan Reedy	c30b7b1	2013-03-11 17:57:08 -0400	[diff] [blame]	49	states, each state is a list of arcs, and each
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	50	arc is a (i, j) pair where i is a label and j is
				51	a state number. The DFA number is the index into
				52	this list. (This name is slightly confusing.)
				53	Final states are represented by a special arc of
				54	the form (0, j) where j is its own state number.
				55
				56	dfas -- a dict mapping symbol numbers to (DFA, first)
				57	pairs, where DFA is an item from the states list
				58	above, and first is a set of tokens that can
				59	begin this grammar rule (represented by a dict
				60	whose values are always 1).
				61
				62	labels -- a list of (x, y) pairs where x is either a token
				63	number or a symbol number, and y is either None
				64	or a string; the strings are keywords. The label
				65	number is the index in this list; label numbers
				66	are used to mark state transitions (arcs) in the
				67	DFAs.
				68
				69	start -- the number of the grammar's start symbol.
				70
				71	keywords -- a dict mapping keyword strings to arc labels.
				72
				73	tokens -- a dict mapping token numbers to arc labels.
				74
				75	"""
				76
				77	def __init__(self):
				78	self.symbol2number = {}
				79	self.number2symbol = {}
				80	self.states = []
				81	self.dfas = {}
				82	self.labels = [(0, "EMPTY")]
				83	self.keywords = {}
				84	self.tokens = {}
				85	self.symbol2label = {}
				86	self.start = 256
				87
				88	def dump(self, filename):
Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)	dd1c638	2016-09-08 00:40:07 +0000	[diff] [blame]	89	"""Dump the grammar tables to a pickle file.
				90
				91	dump() recursively changes all dict to OrderedDict, so the pickled file
				92	is not exactly the same as what was passed in to dump(). load() uses the
				93	pickled file to create the tables, but only changes OrderedDict to dict
				94	at the top level; it does not recursively change OrderedDict to dict.
				95	So, the loaded tables are different from the original tables that were
				96	passed to load() in that some of the OrderedDict (from the pickled file)
				97	are not changed back to dict. For parsing, this has no effect on
				98	performance because OrderedDict uses dict's __getitem__ with nothing in
				99	between.
				100	"""
Giampaolo Rodola'	2f50aaf	2013-02-12 02:04:27 +0100	[diff] [blame]	101	with open(filename, "wb") as f:
Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)	dd1c638	2016-09-08 00:40:07 +0000	[diff] [blame]	102	d = _make_deterministic(self.__dict__)
				103	pickle.dump(d, f, 2)
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	104
				105	def load(self, filename):
				106	"""Load the grammar tables from a pickle file."""
Giampaolo Rodola'	2f50aaf	2013-02-12 02:04:27 +0100	[diff] [blame]	107	with open(filename, "rb") as f:
				108	d = pickle.load(f)
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	109	self.__dict__.update(d)
				110
Benjamin Peterson	8a58771	2017-12-22 12:18:33 -0800	[diff] [blame]	111	def loads(self, pkl):
				112	"""Load the grammar tables from a pickle bytes object."""
				113	self.__dict__.update(pickle.loads(pkl))
				114
Benjamin Peterson	3059b00	2009-07-20 16:42:03 +0000	[diff] [blame]	115	def copy(self):
				116	"""
				117	Copy the grammar.
				118	"""
				119	new = self.__class__()
				120	for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
				121	"tokens", "symbol2label"):
				122	setattr(new, dict_attr, getattr(self, dict_attr).copy())
				123	new.labels = self.labels[:]
				124	new.states = self.states[:]
				125	new.start = self.start
				126	return new
				127
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	128	def report(self):
				129	"""Dump the grammar tables to standard output, for debugging."""
				130	from pprint import pprint
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	131	print("s2n")
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	132	pprint(self.symbol2number)
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	133	print("n2s")
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	134	pprint(self.number2symbol)
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	135	print("states")
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	136	pprint(self.states)
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	137	print("dfas")
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	138	pprint(self.dfas)
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	139	print("labels")
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	140	pprint(self.labels)
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	141	print("start", self.start)
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	142
				143
Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)	dd1c638	2016-09-08 00:40:07 +0000	[diff] [blame]	144	def _make_deterministic(top):
				145	if isinstance(top, dict):
Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)	9719161	2016-09-08 00:48:07 +0000	[diff] [blame]	146	return collections.OrderedDict(
				147	sorted(((k, _make_deterministic(v)) for k, v in top.items())))
Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)	dd1c638	2016-09-08 00:40:07 +0000	[diff] [blame]	148	if isinstance(top, list):
Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)	9719161	2016-09-08 00:48:07 +0000	[diff] [blame]	149	return [_make_deterministic(e) for e in top]
Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)	dd1c638	2016-09-08 00:40:07 +0000	[diff] [blame]	150	if isinstance(top, tuple):
Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)	9719161	2016-09-08 00:48:07 +0000	[diff] [blame]	151	return tuple(_make_deterministic(e) for e in top)
Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)	dd1c638	2016-09-08 00:40:07 +0000	[diff] [blame]	152	return top
				153
				154
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	155	# Map from operator to number (since tokenize doesn't do this)
				156
				157	opmap_raw = """
				158	( LPAR
				159	) RPAR
				160	[ LSQB
				161	] RSQB
				162	: COLON
				163	, COMMA
				164	; SEMI
				165	+ PLUS
				166	- MINUS
				167	* STAR
				168	/ SLASH
				169	\| VBAR
				170	& AMPER
				171	< LESS
				172	> GREATER
				173	= EQUAL
				174	. DOT
				175	% PERCENT
				176	` BACKQUOTE
				177	{ LBRACE
				178	} RBRACE
				179	@ AT
Benjamin Peterson	4ab92c8	2014-04-10 00:12:47 -0400	[diff] [blame]	180	@= ATEQUAL
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	181	== EQEQUAL
				182	!= NOTEQUAL
				183	<> NOTEQUAL
				184	<= LESSEQUAL
				185	>= GREATEREQUAL
				186	~ TILDE
				187	^ CIRCUMFLEX
				188	<< LEFTSHIFT
				189	>> RIGHTSHIFT
				190	** DOUBLESTAR
				191	+= PLUSEQUAL
				192	-= MINEQUAL
				193	*= STAREQUAL
				194	/= SLASHEQUAL
				195	%= PERCENTEQUAL
				196	&= AMPEREQUAL
				197	\|= VBAREQUAL
				198	^= CIRCUMFLEXEQUAL
				199	<<= LEFTSHIFTEQUAL
				200	>>= RIGHTSHIFTEQUAL
				201	**= DOUBLESTAREQUAL
				202	// DOUBLESLASH
				203	//= DOUBLESLASHEQUAL
				204	-> RARROW
				205	"""
				206
				207	opmap = {}
				208	for line in opmap_raw.splitlines():
				209	if line:
				210	op, name = line.split()
				211	opmap[op] = getattr(token, name)