Blame - Lib/lib2to3/pgen2/grammar.py - platform/external/python/cpython3

blob: a1da546eee20e12fc29d442835d80bea6379e40c [file] [log] [blame]

Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	1	# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
				2	# Licensed to PSF under a Contributor Agreement.
				3
				4	"""This module defines the data structures used to represent a grammar.
				5
				6	These are a bit arcane because they are derived from the data
				7	structures used by Python's 'pgen' parser generator.
				8
				9	There's also a table here mapping operators to their names in the
				10	token module; the Python tokenize module reports all operators as the
				11	fallback token code OP, but the parser needs the actual token code.
				12
				13	"""
				14
				15	# Python imports
				16	import pickle
				17
				18	# Local imports
Victor Stinner	d6debb2	2017-03-27 16:05:26 +0200	[diff] [blame]	19	from . import token
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	20
				21
				22	class Grammar(object):
Terry Jan Reedy	c30b7b1	2013-03-11 17:57:08 -0400	[diff] [blame]	23	"""Pgen parsing tables conversion class.
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	24
				25	Once initialized, this class supplies the grammar tables for the
				26	parsing engine implemented by parse.py. The parsing engine
				27	accesses the instance variables directly. The class here does not
				28	provide initialization of the tables; several subclasses exist to
				29	do this (see the conv and pgen modules).
				30
				31	The load() method reads the tables from a pickle file, which is
				32	much faster than the other ways offered by subclasses. The pickle
				33	file is written by calling dump() (after loading the grammar
				34	tables using a subclass). The report() method prints a readable
				35	representation of the tables to stdout, for debugging.
				36
				37	The instance variables are as follows:
				38
				39	symbol2number -- a dict mapping symbol names to numbers. Symbol
				40	numbers are always 256 or higher, to distinguish
				41	them from token numbers, which are between 0 and
				42	255 (inclusive).
				43
				44	number2symbol -- a dict mapping numbers to symbol names;
				45	these two are each other's inverse.
				46
				47	states -- a list of DFAs, where each DFA is a list of
Terry Jan Reedy	c30b7b1	2013-03-11 17:57:08 -0400	[diff] [blame]	48	states, each state is a list of arcs, and each
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	49	arc is a (i, j) pair where i is a label and j is
				50	a state number. The DFA number is the index into
				51	this list. (This name is slightly confusing.)
				52	Final states are represented by a special arc of
				53	the form (0, j) where j is its own state number.
				54
				55	dfas -- a dict mapping symbol numbers to (DFA, first)
				56	pairs, where DFA is an item from the states list
				57	above, and first is a set of tokens that can
				58	begin this grammar rule (represented by a dict
				59	whose values are always 1).
				60
				61	labels -- a list of (x, y) pairs where x is either a token
				62	number or a symbol number, and y is either None
				63	or a string; the strings are keywords. The label
				64	number is the index in this list; label numbers
				65	are used to mark state transitions (arcs) in the
				66	DFAs.
				67
				68	start -- the number of the grammar's start symbol.
				69
				70	keywords -- a dict mapping keyword strings to arc labels.
				71
				72	tokens -- a dict mapping token numbers to arc labels.
				73
				74	"""
				75
				76	def __init__(self):
				77	self.symbol2number = {}
				78	self.number2symbol = {}
				79	self.states = []
				80	self.dfas = {}
				81	self.labels = [(0, "EMPTY")]
				82	self.keywords = {}
				83	self.tokens = {}
				84	self.symbol2label = {}
				85	self.start = 256
				86
				87	def dump(self, filename):
Łukasz Langa	7661806	2018-04-16 17:33:59 -0700	[diff] [blame]	88	"""Dump the grammar tables to a pickle file."""
Giampaolo Rodola'	2f50aaf	2013-02-12 02:04:27 +0100	[diff] [blame]	89	with open(filename, "wb") as f:
Łukasz Langa	7661806	2018-04-16 17:33:59 -0700	[diff] [blame]	90	pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	91
				92	def load(self, filename):
				93	"""Load the grammar tables from a pickle file."""
Giampaolo Rodola'	2f50aaf	2013-02-12 02:04:27 +0100	[diff] [blame]	94	with open(filename, "rb") as f:
				95	d = pickle.load(f)
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	96	self.__dict__.update(d)
				97
Benjamin Peterson	8a58771	2017-12-22 12:18:33 -0800	[diff] [blame]	98	def loads(self, pkl):
				99	"""Load the grammar tables from a pickle bytes object."""
				100	self.__dict__.update(pickle.loads(pkl))
				101
Benjamin Peterson	3059b00	2009-07-20 16:42:03 +0000	[diff] [blame]	102	def copy(self):
				103	"""
				104	Copy the grammar.
				105	"""
				106	new = self.__class__()
				107	for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
				108	"tokens", "symbol2label"):
				109	setattr(new, dict_attr, getattr(self, dict_attr).copy())
				110	new.labels = self.labels[:]
				111	new.states = self.states[:]
				112	new.start = self.start
				113	return new
				114
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	115	def report(self):
				116	"""Dump the grammar tables to standard output, for debugging."""
				117	from pprint import pprint
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	118	print("s2n")
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	119	pprint(self.symbol2number)
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	120	print("n2s")
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	121	pprint(self.number2symbol)
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	122	print("states")
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	123	pprint(self.states)
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	124	print("dfas")
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	125	pprint(self.dfas)
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	126	print("labels")
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	127	pprint(self.labels)
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	128	print("start", self.start)
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	129
				130
				131	# Map from operator to number (since tokenize doesn't do this)
				132
				133	opmap_raw = """
				134	( LPAR
				135	) RPAR
				136	[ LSQB
				137	] RSQB
				138	: COLON
				139	, COMMA
				140	; SEMI
				141	+ PLUS
				142	- MINUS
				143	* STAR
				144	/ SLASH
				145	\| VBAR
				146	& AMPER
				147	< LESS
				148	> GREATER
				149	= EQUAL
				150	. DOT
				151	% PERCENT
				152	` BACKQUOTE
				153	{ LBRACE
				154	} RBRACE
				155	@ AT
Benjamin Peterson	4ab92c8	2014-04-10 00:12:47 -0400	[diff] [blame]	156	@= ATEQUAL
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	157	== EQEQUAL
				158	!= NOTEQUAL
				159	<> NOTEQUAL
				160	<= LESSEQUAL
				161	>= GREATEREQUAL
				162	~ TILDE
				163	^ CIRCUMFLEX
				164	<< LEFTSHIFT
				165	>> RIGHTSHIFT
				166	** DOUBLESTAR
				167	+= PLUSEQUAL
				168	-= MINEQUAL
				169	*= STAREQUAL
				170	/= SLASHEQUAL
				171	%= PERCENTEQUAL
				172	&= AMPEREQUAL
				173	\|= VBAREQUAL
				174	^= CIRCUMFLEXEQUAL
				175	<<= LEFTSHIFTEQUAL
				176	>>= RIGHTSHIFTEQUAL
				177	**= DOUBLESTAREQUAL
				178	// DOUBLESLASH
				179	//= DOUBLESLASHEQUAL
				180	-> RARROW
				181	"""
				182
				183	opmap = {}
				184	for line in opmap_raw.splitlines():
				185	if line:
				186	op, name = line.split()
				187	opmap[op] = getattr(token, name)