Blame - Lib/lib2to3/pgen2/grammar.py - platform/external/python/cpython3

blob: b7f867fdf90600c547330bb6799158de32bd5104 [file] [log] [blame]

Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	1	# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
				2	# Licensed to PSF under a Contributor Agreement.
				3
				4	"""This module defines the data structures used to represent a grammar.
				5
				6	These are a bit arcane because they are derived from the data
				7	structures used by Python's 'pgen' parser generator.
				8
				9	There's also a table here mapping operators to their names in the
				10	token module; the Python tokenize module reports all operators as the
				11	fallback token code OP, but the parser needs the actual token code.
				12
				13	"""
				14
				15	# Python imports
				16	import pickle
				17
				18	# Local imports
				19	from . import token, tokenize
				20
				21
				22	class Grammar(object):
				23	"""Pgen parsing tables tables conversion class.
				24
				25	Once initialized, this class supplies the grammar tables for the
				26	parsing engine implemented by parse.py. The parsing engine
				27	accesses the instance variables directly. The class here does not
				28	provide initialization of the tables; several subclasses exist to
				29	do this (see the conv and pgen modules).
				30
				31	The load() method reads the tables from a pickle file, which is
				32	much faster than the other ways offered by subclasses. The pickle
				33	file is written by calling dump() (after loading the grammar
				34	tables using a subclass). The report() method prints a readable
				35	representation of the tables to stdout, for debugging.
				36
				37	The instance variables are as follows:
				38
				39	symbol2number -- a dict mapping symbol names to numbers. Symbol
				40	numbers are always 256 or higher, to distinguish
				41	them from token numbers, which are between 0 and
				42	255 (inclusive).
				43
				44	number2symbol -- a dict mapping numbers to symbol names;
				45	these two are each other's inverse.
				46
				47	states -- a list of DFAs, where each DFA is a list of
				48	states, each state is is a list of arcs, and each
				49	arc is a (i, j) pair where i is a label and j is
				50	a state number. The DFA number is the index into
				51	this list. (This name is slightly confusing.)
				52	Final states are represented by a special arc of
				53	the form (0, j) where j is its own state number.
				54
				55	dfas -- a dict mapping symbol numbers to (DFA, first)
				56	pairs, where DFA is an item from the states list
				57	above, and first is a set of tokens that can
				58	begin this grammar rule (represented by a dict
				59	whose values are always 1).
				60
				61	labels -- a list of (x, y) pairs where x is either a token
				62	number or a symbol number, and y is either None
				63	or a string; the strings are keywords. The label
				64	number is the index in this list; label numbers
				65	are used to mark state transitions (arcs) in the
				66	DFAs.
				67
				68	start -- the number of the grammar's start symbol.
				69
				70	keywords -- a dict mapping keyword strings to arc labels.
				71
				72	tokens -- a dict mapping token numbers to arc labels.
				73
				74	"""
				75
				76	def __init__(self):
				77	self.symbol2number = {}
				78	self.number2symbol = {}
				79	self.states = []
				80	self.dfas = {}
				81	self.labels = [(0, "EMPTY")]
				82	self.keywords = {}
				83	self.tokens = {}
				84	self.symbol2label = {}
				85	self.start = 256
				86
				87	def dump(self, filename):
				88	"""Dump the grammar tables to a pickle file."""
				89	f = open(filename, "wb")
				90	pickle.dump(self.__dict__, f, 2)
				91	f.close()
				92
				93	def load(self, filename):
				94	"""Load the grammar tables from a pickle file."""
				95	f = open(filename, "rb")
				96	d = pickle.load(f)
				97	f.close()
				98	self.__dict__.update(d)
				99
				100	def report(self):
				101	"""Dump the grammar tables to standard output, for debugging."""
				102	from pprint import pprint
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	103	print("s2n")
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	104	pprint(self.symbol2number)
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	105	print("n2s")
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	106	pprint(self.number2symbol)
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	107	print("states")
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	108	pprint(self.states)
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	109	print("dfas")
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	110	pprint(self.dfas)
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	111	print("labels")
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	112	pprint(self.labels)
Martin v. Löwis	8a5f8ca	2008-03-19 05:33:36 +0000	[diff] [blame]	113	print("start", self.start)
Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame]	114
				115
				116	# Map from operator to number (since tokenize doesn't do this)
				117
				118	opmap_raw = """
				119	( LPAR
				120	) RPAR
				121	[ LSQB
				122	] RSQB
				123	: COLON
				124	, COMMA
				125	; SEMI
				126	+ PLUS
				127	- MINUS
				128	* STAR
				129	/ SLASH
				130	\| VBAR
				131	& AMPER
				132	< LESS
				133	> GREATER
				134	= EQUAL
				135	. DOT
				136	% PERCENT
				137	` BACKQUOTE
				138	{ LBRACE
				139	} RBRACE
				140	@ AT
				141	== EQEQUAL
				142	!= NOTEQUAL
				143	<> NOTEQUAL
				144	<= LESSEQUAL
				145	>= GREATEREQUAL
				146	~ TILDE
				147	^ CIRCUMFLEX
				148	<< LEFTSHIFT
				149	>> RIGHTSHIFT
				150	** DOUBLESTAR
				151	+= PLUSEQUAL
				152	-= MINEQUAL
				153	*= STAREQUAL
				154	/= SLASHEQUAL
				155	%= PERCENTEQUAL
				156	&= AMPEREQUAL
				157	\|= VBAREQUAL
				158	^= CIRCUMFLEXEQUAL
				159	<<= LEFTSHIFTEQUAL
				160	>>= RIGHTSHIFTEQUAL
				161	**= DOUBLESTAREQUAL
				162	// DOUBLESLASH
				163	//= DOUBLESLASHEQUAL
				164	-> RARROW
				165	"""
				166
				167	opmap = {}
				168	for line in opmap_raw.splitlines():
				169	if line:
				170	op, name = line.split()
				171	opmap[op] = getattr(token, name)