Lib/lib2to3/pgen2/grammar.py - platform/external/python/cpython3 - Gitiles

 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.

 """This module defines the data structures used to represent a grammar.

 These are a bit arcane because they are derived from the data
 structures used by Python's 'pgen' parser generator.

 There's also a table here mapping operators to their names in the
 token module; the Python tokenize module reports all operators as the
 fallback token code OP, but the parser needs the actual token code.

 """

 # Python imports
 import pickle

 # Local imports
 from . import token, tokenize


 class Grammar(object):
     """Pgen parsing tables tables conversion class.

     Once initialized, this class supplies the grammar tables for the
     parsing engine implemented by parse.py.  The parsing engine
     accesses the instance variables directly.  The class here does not
     provide initialization of the tables; several subclasses exist to
     do this (see the conv and pgen modules).

     The load() method reads the tables from a pickle file, which is
     much faster than the other ways offered by subclasses.  The pickle
     file is written by calling dump() (after loading the grammar
     tables using a subclass).  The report() method prints a readable
     representation of the tables to stdout, for debugging.

     The instance variables are as follows:

     symbol2number -- a dict mapping symbol names to numbers.  Symbol
                      numbers are always 256 or higher, to distinguish
                      them from token numbers, which are between 0 and
                      255 (inclusive).

     number2symbol -- a dict mapping numbers to symbol names;
                      these two are each other's inverse.

     states        -- a list of DFAs, where each DFA is a list of
                      states, each state is is a list of arcs, and each
                      arc is a (i, j) pair where i is a label and j is
                      a state number.  The DFA number is the index into
                      this list.  (This name is slightly confusing.)
                      Final states are represented by a special arc of
                      the form (0, j) where j is its own state number.

     dfas          -- a dict mapping symbol numbers to (DFA, first)
                      pairs, where DFA is an item from the states list
                      above, and first is a set of tokens that can
                      begin this grammar rule (represented by a dict
                      whose values are always 1).

     labels        -- a list of (x, y) pairs where x is either a token
                      number or a symbol number, and y is either None
                      or a string; the strings are keywords.  The label
                      number is the index in this list; label numbers
                      are used to mark state transitions (arcs) in the
                      DFAs.

     start         -- the number of the grammar's start symbol.

     keywords      -- a dict mapping keyword strings to arc labels.

     tokens        -- a dict mapping token numbers to arc labels.

     """

     def __init__(self):
         self.symbol2number = {}
         self.number2symbol = {}
         self.states = []
         self.dfas = {}
         self.labels = [(0, "EMPTY")]
         self.keywords = {}
         self.tokens = {}
         self.symbol2label = {}
         self.start = 256

     def dump(self, filename):
         """Dump the grammar tables to a pickle file."""
         f = open(filename, "wb")
         pickle.dump(self.__dict__, f, 2)
         f.close()

     def load(self, filename):
         """Load the grammar tables from a pickle file."""
         f = open(filename, "rb")
         d = pickle.load(f)
         f.close()
         self.__dict__.update(d)

     def copy(self):
         """
         Copy the grammar.
         """
         new = self.__class__()
         for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
                           "tokens", "symbol2label"):
             setattr(new, dict_attr, getattr(self, dict_attr).copy())
         new.labels = self.labels[:]
         new.states = self.states[:]
         new.start = self.start
         return new

     def report(self):
         """Dump the grammar tables to standard output, for debugging."""
         from pprint import pprint
         print("s2n")
         pprint(self.symbol2number)
         print("n2s")
         pprint(self.number2symbol)
         print("states")
         pprint(self.states)
         print("dfas")
         pprint(self.dfas)
         print("labels")
         pprint(self.labels)
         print("start", self.start)


 # Map from operator to number (since tokenize doesn't do this)

 opmap_raw = """
 ( LPAR
 ) RPAR
 [ LSQB
 ] RSQB
 : COLON
 , COMMA
 ; SEMI
 + PLUS
 - MINUS
 * STAR
 / SLASH
 | VBAR
 & AMPER
 < LESS
 > GREATER
 = EQUAL
 . DOT
 % PERCENT
 ` BACKQUOTE
 { LBRACE
 } RBRACE
 @ AT
 == EQEQUAL
 != NOTEQUAL
 <> NOTEQUAL
 <= LESSEQUAL
 >= GREATEREQUAL
 ~ TILDE
 ^ CIRCUMFLEX
 << LEFTSHIFT
 >> RIGHTSHIFT
 ** DOUBLESTAR
 += PLUSEQUAL
 -= MINEQUAL
 *= STAREQUAL
 /= SLASHEQUAL
 %= PERCENTEQUAL
 &= AMPEREQUAL
 |= VBAREQUAL
 ^= CIRCUMFLEXEQUAL
 <<= LEFTSHIFTEQUAL
 >>= RIGHTSHIFTEQUAL
 **= DOUBLESTAREQUAL
 // DOUBLESLASH
 //= DOUBLESLASHEQUAL
 -> RARROW
 """

 opmap = {}
 for line in opmap_raw.splitlines():
     if line:
         op, name = line.split()
         opmap[op] = getattr(token, name)
	# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
	# Licensed to PSF under a Contributor Agreement.

	"""This module defines the data structures used to represent a grammar.

	These are a bit arcane because they are derived from the data
	structures used by Python's 'pgen' parser generator.

	There's also a table here mapping operators to their names in the
	token module; the Python tokenize module reports all operators as the
	fallback token code OP, but the parser needs the actual token code.

	"""

	# Python imports
	import pickle

	# Local imports
	from . import token, tokenize


	class Grammar(object):
	"""Pgen parsing tables tables conversion class.

	Once initialized, this class supplies the grammar tables for the
	parsing engine implemented by parse.py. The parsing engine
	accesses the instance variables directly. The class here does not
	provide initialization of the tables; several subclasses exist to
	do this (see the conv and pgen modules).

	The load() method reads the tables from a pickle file, which is
	much faster than the other ways offered by subclasses. The pickle
	file is written by calling dump() (after loading the grammar
	tables using a subclass). The report() method prints a readable
	representation of the tables to stdout, for debugging.

	The instance variables are as follows:

	symbol2number -- a dict mapping symbol names to numbers. Symbol
	numbers are always 256 or higher, to distinguish
	them from token numbers, which are between 0 and
	255 (inclusive).

	number2symbol -- a dict mapping numbers to symbol names;
	these two are each other's inverse.

	states -- a list of DFAs, where each DFA is a list of
	states, each state is is a list of arcs, and each
	arc is a (i, j) pair where i is a label and j is
	a state number. The DFA number is the index into
	this list. (This name is slightly confusing.)
	Final states are represented by a special arc of
	the form (0, j) where j is its own state number.

	dfas -- a dict mapping symbol numbers to (DFA, first)
	pairs, where DFA is an item from the states list
	above, and first is a set of tokens that can
	begin this grammar rule (represented by a dict
	whose values are always 1).

	labels -- a list of (x, y) pairs where x is either a token
	number or a symbol number, and y is either None
	or a string; the strings are keywords. The label
	number is the index in this list; label numbers
	are used to mark state transitions (arcs) in the
	DFAs.

	start -- the number of the grammar's start symbol.

	keywords -- a dict mapping keyword strings to arc labels.

	tokens -- a dict mapping token numbers to arc labels.

	"""

	def __init__(self):
	self.symbol2number = {}
	self.number2symbol = {}
	self.states = []
	self.dfas = {}
	self.labels = [(0, "EMPTY")]
	self.keywords = {}
	self.tokens = {}
	self.symbol2label = {}
	self.start = 256

	def dump(self, filename):
	"""Dump the grammar tables to a pickle file."""
	f = open(filename, "wb")
	pickle.dump(self.__dict__, f, 2)
	f.close()

	def load(self, filename):
	"""Load the grammar tables from a pickle file."""
	f = open(filename, "rb")
	d = pickle.load(f)
	f.close()
	self.__dict__.update(d)

	def copy(self):
	"""
	Copy the grammar.
	"""
	new = self.__class__()
	for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
	"tokens", "symbol2label"):
	setattr(new, dict_attr, getattr(self, dict_attr).copy())
	new.labels = self.labels[:]
	new.states = self.states[:]
	new.start = self.start
	return new

	def report(self):
	"""Dump the grammar tables to standard output, for debugging."""
	from pprint import pprint
	print("s2n")
	pprint(self.symbol2number)
	print("n2s")
	pprint(self.number2symbol)
	print("states")
	pprint(self.states)
	print("dfas")
	pprint(self.dfas)
	print("labels")
	pprint(self.labels)
	print("start", self.start)


	# Map from operator to number (since tokenize doesn't do this)

	opmap_raw = """
	( LPAR
	) RPAR
	[ LSQB
	] RSQB
	: COLON
	, COMMA
	; SEMI
	+ PLUS
	- MINUS
	* STAR
	/ SLASH
	\| VBAR
	& AMPER
	< LESS
	> GREATER
	= EQUAL
	. DOT
	% PERCENT
	` BACKQUOTE
	{ LBRACE
	} RBRACE
	@ AT
	== EQEQUAL
	!= NOTEQUAL
	<> NOTEQUAL
	<= LESSEQUAL
	>= GREATEREQUAL
	~ TILDE
	^ CIRCUMFLEX
	<< LEFTSHIFT
	>> RIGHTSHIFT
	** DOUBLESTAR
	+= PLUSEQUAL
	-= MINEQUAL
	*= STAREQUAL
	/= SLASHEQUAL
	%= PERCENTEQUAL
	&= AMPEREQUAL
	\|= VBAREQUAL
	^= CIRCUMFLEXEQUAL
	<<= LEFTSHIFTEQUAL
	>>= RIGHTSHIFTEQUAL
	**= DOUBLESTAREQUAL
	// DOUBLESLASH
	//= DOUBLESLASHEQUAL
	-> RARROW
	"""

	opmap = {}
	for line in opmap_raw.splitlines():
	if line:
	op, name = line.split()
	opmap[op] = getattr(token, name)