Blame - Lib/lib2to3/pgen2/grammar.py - platform/external/python/cpython3

blob: 14c5f70f1de8b1893d8840b4f97b39fac80f106e [file] [log] [blame]

Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	1	# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
				2	# Licensed to PSF under a Contributor Agreement.
				3
				4	"""This module defines the data structures used to represent a grammar.
				5
				6	These are a bit arcane because they are derived from the data
				7	structures used by Python's 'pgen' parser generator.
				8
				9	There's also a table here mapping operators to their names in the
				10	token module; the Python tokenize module reports all operators as the
				11	fallback token code OP, but the parser needs the actual token code.
				12
				13	"""
				14
				15	# Python imports
				16	import pickle
				17
				18	# Local imports
				19	from . import token, tokenize
				20
				21
				22	class Grammar(object):
Terry Jan Reedy	c30b7b1	2013-03-11 17:57:08 -0400	[diff] [blame]	23	"""Pgen parsing tables conversion class.
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	24
				25	Once initialized, this class supplies the grammar tables for the
				26	parsing engine implemented by parse.py. The parsing engine
				27	accesses the instance variables directly. The class here does not
				28	provide initialization of the tables; several subclasses exist to
				29	do this (see the conv and pgen modules).
				30
				31	The load() method reads the tables from a pickle file, which is
				32	much faster than the other ways offered by subclasses. The pickle
				33	file is written by calling dump() (after loading the grammar
				34	tables using a subclass). The report() method prints a readable
				35	representation of the tables to stdout, for debugging.
				36
				37	The instance variables are as follows:
				38
				39	symbol2number -- a dict mapping symbol names to numbers. Symbol
				40	numbers are always 256 or higher, to distinguish
				41	them from token numbers, which are between 0 and
				42	255 (inclusive).
				43
				44	number2symbol -- a dict mapping numbers to symbol names;
				45	these two are each other's inverse.
				46
				47	states -- a list of DFAs, where each DFA is a list of
Terry Jan Reedy	c30b7b1	2013-03-11 17:57:08 -0400	[diff] [blame]	48	states, each state is a list of arcs, and each
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	49	arc is a (i, j) pair where i is a label and j is
				50	a state number. The DFA number is the index into
				51	this list. (This name is slightly confusing.)
				52	Final states are represented by a special arc of
				53	the form (0, j) where j is its own state number.
				54
				55	dfas -- a dict mapping symbol numbers to (DFA, first)
				56	pairs, where DFA is an item from the states list
				57	above, and first is a set of tokens that can
				58	begin this grammar rule (represented by a dict
				59	whose values are always 1).
				60
				61	labels -- a list of (x, y) pairs where x is either a token
				62	number or a symbol number, and y is either None
				63	or a string; the strings are keywords. The label
				64	number is the index in this list; label numbers
				65	are used to mark state transitions (arcs) in the
				66	DFAs.
				67
				68	start -- the number of the grammar's start symbol.
				69
				70	keywords -- a dict mapping keyword strings to arc labels.
				71
				72	tokens -- a dict mapping token numbers to arc labels.
				73
				74	"""
				75
				76	def __init__(self):
				77	self.symbol2number = {}
				78	self.number2symbol = {}
				79	self.states = []
				80	self.dfas = {}
				81	self.labels = [(0, "EMPTY")]
				82	self.keywords = {}
				83	self.tokens = {}
				84	self.symbol2label = {}
				85	self.start = 256
				86
				87	def dump(self, filename):
				88	"""Dump the grammar tables to a pickle file."""
				89	f = open(filename, "wb")
				90	pickle.dump(self.__dict__, f, 2)
				91	f.close()
				92
				93	def load(self, filename):
				94	"""Load the grammar tables from a pickle file."""
				95	f = open(filename, "rb")
				96	d = pickle.load(f)
				97	f.close()
				98	self.__dict__.update(d)
				99
Benjamin Peterson	3059b00	2009-07-20 16:42:03 +0000	[diff] [blame]	100	def copy(self):
				101	"""
				102	Copy the grammar.
				103	"""
				104	new = self.__class__()
				105	for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
				106	"tokens", "symbol2label"):
				107	setattr(new, dict_attr, getattr(self, dict_attr).copy())
				108	new.labels = self.labels[:]
				109	new.states = self.states[:]
				110	new.start = self.start
				111	return new
				112
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	113	def report(self):
				114	"""Dump the grammar tables to standard output, for debugging."""
				115	from pprint import pprint
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	116	print("s2n")
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	117	pprint(self.symbol2number)
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	118	print("n2s")
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	119	pprint(self.number2symbol)
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	120	print("states")
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	121	pprint(self.states)
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	122	print("dfas")
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	123	pprint(self.dfas)
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	124	print("labels")
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	125	pprint(self.labels)
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	126	print("start", self.start)
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	127
				128
				129	# Map from operator to number (since tokenize doesn't do this)
				130
				131	opmap_raw = """
				132	( LPAR
				133	) RPAR
				134	[ LSQB
				135	] RSQB
				136	: COLON
				137	, COMMA
				138	; SEMI
				139	+ PLUS
				140	- MINUS
				141	* STAR
				142	/ SLASH
				143	\| VBAR
				144	& AMPER
				145	< LESS
				146	> GREATER
				147	= EQUAL
				148	. DOT
				149	% PERCENT
				150	` BACKQUOTE
				151	{ LBRACE
				152	} RBRACE
				153	@ AT
				154	== EQEQUAL
				155	!= NOTEQUAL
				156	<> NOTEQUAL
				157	<= LESSEQUAL
				158	>= GREATEREQUAL
				159	~ TILDE
				160	^ CIRCUMFLEX
				161	<< LEFTSHIFT
				162	>> RIGHTSHIFT
				163	** DOUBLESTAR
				164	+= PLUSEQUAL
				165	-= MINEQUAL
				166	*= STAREQUAL
				167	/= SLASHEQUAL
				168	%= PERCENTEQUAL
				169	&= AMPEREQUAL
				170	\|= VBAREQUAL
				171	^= CIRCUMFLEXEQUAL
				172	<<= LEFTSHIFTEQUAL
				173	>>= RIGHTSHIFTEQUAL
				174	**= DOUBLESTAREQUAL
				175	// DOUBLESLASH
				176	//= DOUBLESLASHEQUAL
				177	-> RARROW
				178	"""
				179
				180	opmap = {}
				181	for line in opmap_raw.splitlines():
				182	if line:
				183	op, name = line.split()
				184	opmap[op] = getattr(token, name)