Blame - Lib/lib2to3/pgen2/grammar.py - platform/external/python/cpython2

blob: 8220b0aa5e14134ee31b1fa1be1bf77c08200af5 [file] [log] [blame]

Martin v. Löwis	5e37bae	2008-03-19 04:43:46 +0000	[diff] [blame]	1	# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
				2	# Licensed to PSF under a Contributor Agreement.
				3
				4	"""This module defines the data structures used to represent a grammar.
				5
				6	These are a bit arcane because they are derived from the data
				7	structures used by Python's 'pgen' parser generator.
				8
				9	There's also a table here mapping operators to their names in the
				10	token module; the Python tokenize module reports all operators as the
				11	fallback token code OP, but the parser needs the actual token code.
				12
				13	"""
				14
				15	# Python imports
				16	import pickle
				17
				18	# Local imports
				19	from . import token, tokenize
				20
				21
				22	class Grammar(object):
Terry Jan Reedy	a70f60a	2013-03-11 17:56:17 -0400	[diff] [blame]	23	"""Pgen parsing tables conversion class.
Martin v. Löwis	5e37bae	2008-03-19 04:43:46 +0000	[diff] [blame]	24
				25	Once initialized, this class supplies the grammar tables for the
				26	parsing engine implemented by parse.py. The parsing engine
				27	accesses the instance variables directly. The class here does not
				28	provide initialization of the tables; several subclasses exist to
				29	do this (see the conv and pgen modules).
				30
				31	The load() method reads the tables from a pickle file, which is
				32	much faster than the other ways offered by subclasses. The pickle
				33	file is written by calling dump() (after loading the grammar
				34	tables using a subclass). The report() method prints a readable
				35	representation of the tables to stdout, for debugging.
				36
				37	The instance variables are as follows:
				38
				39	symbol2number -- a dict mapping symbol names to numbers. Symbol
				40	numbers are always 256 or higher, to distinguish
				41	them from token numbers, which are between 0 and
				42	255 (inclusive).
				43
				44	number2symbol -- a dict mapping numbers to symbol names;
				45	these two are each other's inverse.
				46
				47	states -- a list of DFAs, where each DFA is a list of
Terry Jan Reedy	a70f60a	2013-03-11 17:56:17 -0400	[diff] [blame]	48	states, each state is a list of arcs, and each
Martin v. Löwis	5e37bae	2008-03-19 04:43:46 +0000	[diff] [blame]	49	arc is a (i, j) pair where i is a label and j is
				50	a state number. The DFA number is the index into
				51	this list. (This name is slightly confusing.)
				52	Final states are represented by a special arc of
				53	the form (0, j) where j is its own state number.
				54
				55	dfas -- a dict mapping symbol numbers to (DFA, first)
				56	pairs, where DFA is an item from the states list
				57	above, and first is a set of tokens that can
				58	begin this grammar rule (represented by a dict
				59	whose values are always 1).
				60
				61	labels -- a list of (x, y) pairs where x is either a token
				62	number or a symbol number, and y is either None
				63	or a string; the strings are keywords. The label
				64	number is the index in this list; label numbers
				65	are used to mark state transitions (arcs) in the
				66	DFAs.
				67
				68	start -- the number of the grammar's start symbol.
				69
				70	keywords -- a dict mapping keyword strings to arc labels.
				71
				72	tokens -- a dict mapping token numbers to arc labels.
				73
				74	"""
				75
				76	def __init__(self):
				77	self.symbol2number = {}
				78	self.number2symbol = {}
				79	self.states = []
				80	self.dfas = {}
				81	self.labels = [(0, "EMPTY")]
				82	self.keywords = {}
				83	self.tokens = {}
				84	self.symbol2label = {}
				85	self.start = 256
				86
				87	def dump(self, filename):
				88	"""Dump the grammar tables to a pickle file."""
				89	f = open(filename, "wb")
				90	pickle.dump(self.__dict__, f, 2)
				91	f.close()
				92
				93	def load(self, filename):
				94	"""Load the grammar tables from a pickle file."""
				95	f = open(filename, "rb")
				96	d = pickle.load(f)
				97	f.close()
				98	self.__dict__.update(d)
				99
Benjamin Peterson	840077c	2009-07-20 15:33:09 +0000	[diff] [blame]	100	def copy(self):
				101	"""
				102	Copy the grammar.
				103	"""
				104	new = self.__class__()
				105	for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
				106	"tokens", "symbol2label"):
				107	setattr(new, dict_attr, getattr(self, dict_attr).copy())
				108	new.labels = self.labels[:]
				109	new.states = self.states[:]
				110	new.start = self.start
				111	return new
				112
Martin v. Löwis	5e37bae	2008-03-19 04:43:46 +0000	[diff] [blame]	113	def report(self):
				114	"""Dump the grammar tables to standard output, for debugging."""
				115	from pprint import pprint
				116	print "s2n"
				117	pprint(self.symbol2number)
				118	print "n2s"
				119	pprint(self.number2symbol)
				120	print "states"
				121	pprint(self.states)
				122	print "dfas"
				123	pprint(self.dfas)
				124	print "labels"
				125	pprint(self.labels)
				126	print "start", self.start
				127
				128
				129	# Map from operator to number (since tokenize doesn't do this)
				130
				131	opmap_raw = """
				132	( LPAR
				133	) RPAR
				134	[ LSQB
				135	] RSQB
				136	: COLON
				137	, COMMA
				138	; SEMI
				139	+ PLUS
				140	- MINUS
				141	* STAR
				142	/ SLASH
				143	\| VBAR
				144	& AMPER
				145	< LESS
				146	> GREATER
				147	= EQUAL
				148	. DOT
				149	% PERCENT
				150	` BACKQUOTE
				151	{ LBRACE
				152	} RBRACE
				153	@ AT
Benjamin Peterson	da952f3	2014-04-10 00:12:47 -0400	[diff] [blame]	154	@= ATEQUAL
Martin v. Löwis	5e37bae	2008-03-19 04:43:46 +0000	[diff] [blame]	155	== EQEQUAL
				156	!= NOTEQUAL
				157	<> NOTEQUAL
				158	<= LESSEQUAL
				159	>= GREATEREQUAL
				160	~ TILDE
				161	^ CIRCUMFLEX
				162	<< LEFTSHIFT
				163	>> RIGHTSHIFT
				164	** DOUBLESTAR
				165	+= PLUSEQUAL
				166	-= MINEQUAL
				167	*= STAREQUAL
				168	/= SLASHEQUAL
				169	%= PERCENTEQUAL
				170	&= AMPEREQUAL
				171	\|= VBAREQUAL
				172	^= CIRCUMFLEXEQUAL
				173	<<= LEFTSHIFTEQUAL
				174	>>= RIGHTSHIFTEQUAL
				175	**= DOUBLESTAREQUAL
				176	// DOUBLESLASH
				177	//= DOUBLESLASHEQUAL
				178	-> RARROW
				179	"""
				180
				181	opmap = {}
				182	for line in opmap_raw.splitlines():
				183	if line:
				184	op, name = line.split()
				185	opmap[op] = getattr(token, name)