Blame - Lib/lib2to3/pgen2/parse.py - platform/external/python/cpython3

blob: 9b1ac2a74b6817554659a163ed0de1686d48c758 [file] [log] [blame]

Martin v. Löwis	ef04c44	2008-03-19 05:04:44 +0000	[diff] [blame^]	1	# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
				2	# Licensed to PSF under a Contributor Agreement.
				3
				4	"""Parser engine for the grammar tables generated by pgen.
				5
				6	The grammar table must be loaded first.
				7
				8	See Parser/parser.c in the Python distribution for additional info on
				9	how this parsing engine works.
				10
				11	"""
				12
				13	# Get a usable 'set' constructor
				14	try:
				15	set
				16	except NameError:
				17	from sets import Set as set
				18
				19	# Local imports
				20	from . import token
				21
				22	class ParseError(Exception):
				23	"""Exception to signal the parser is stuck."""
				24
				25	def __init__(self, msg, type, value, context):
				26	Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
				27	(msg, type, value, context))
				28	self.msg = msg
				29	self.type = type
				30	self.value = value
				31	self.context = context
				32
				33	class Parser(object):
				34	"""Parser engine.
				35
				36	The proper usage sequence is:
				37
				38	p = Parser(grammar, [converter]) # create instance
				39	p.setup([start]) # prepare for parsing
				40	<for each input token>:
				41	if p.addtoken(...): # parse a token; may raise ParseError
				42	break
				43	root = p.rootnode # root of abstract syntax tree
				44
				45	A Parser instance may be reused by calling setup() repeatedly.
				46
				47	A Parser instance contains state pertaining to the current token
				48	sequence, and should not be used concurrently by different threads
				49	to parse separate token sequences.
				50
				51	See driver.py for how to get input tokens by tokenizing a file or
				52	string.
				53
				54	Parsing is complete when addtoken() returns True; the root of the
				55	abstract syntax tree can then be retrieved from the rootnode
				56	instance variable. When a syntax error occurs, addtoken() raises
				57	the ParseError exception. There is no error recovery; the parser
				58	cannot be used after a syntax error was reported (but it can be
				59	reinitialized by calling setup()).
				60
				61	"""
				62
				63	def __init__(self, grammar, convert=None):
				64	"""Constructor.
				65
				66	The grammar argument is a grammar.Grammar instance; see the
				67	grammar module for more information.
				68
				69	The parser is not ready yet for parsing; you must call the
				70	setup() method to get it started.
				71
				72	The optional convert argument is a function mapping concrete
				73	syntax tree nodes to abstract syntax tree nodes. If not
				74	given, no conversion is done and the syntax tree produced is
				75	the concrete syntax tree. If given, it must be a function of
				76	two arguments, the first being the grammar (a grammar.Grammar
				77	instance), and the second being the concrete syntax tree node
				78	to be converted. The syntax tree is converted from the bottom
				79	up.
				80
				81	A concrete syntax tree node is a (type, value, context, nodes)
				82	tuple, where type is the node type (a token or symbol number),
				83	value is None for symbols and a string for tokens, context is
				84	None or an opaque value used for error reporting (typically a
				85	(lineno, offset) pair), and nodes is a list of children for
				86	symbols, and None for tokens.
				87
				88	An abstract syntax tree node may be anything; this is entirely
				89	up to the converter function.
				90
				91	"""
				92	self.grammar = grammar
				93	self.convert = convert or (lambda grammar, node: node)
				94
				95	def setup(self, start=None):
				96	"""Prepare for parsing.
				97
				98	This must be called before starting to parse.
				99
				100	The optional argument is an alternative start symbol; it
				101	defaults to the grammar's start symbol.
				102
				103	You can use a Parser instance to parse any number of programs;
				104	each time you call setup() the parser is reset to an initial
				105	state determined by the (implicit or explicit) start symbol.
				106
				107	"""
				108	if start is None:
				109	start = self.grammar.start
				110	# Each stack entry is a tuple: (dfa, state, node).
				111	# A node is a tuple: (type, value, context, children),
				112	# where children is a list of nodes or None, and context may be None.
				113	newnode = (start, None, None, [])
				114	stackentry = (self.grammar.dfas[start], 0, newnode)
				115	self.stack = [stackentry]
				116	self.rootnode = None
				117	self.used_names = set() # Aliased to self.rootnode.used_names in pop()
				118
				119	def addtoken(self, type, value, context):
				120	"""Add a token; return True iff this is the end of the program."""
				121	# Map from token to label
				122	ilabel = self.classify(type, value, context)
				123	# Loop until the token is shifted; may raise exceptions
				124	while True:
				125	dfa, state, node = self.stack[-1]
				126	states, first = dfa
				127	arcs = states[state]
				128	# Look for a state with this label
				129	for i, newstate in arcs:
				130	t, v = self.grammar.labels[i]
				131	if ilabel == i:
				132	# Look it up in the list of labels
				133	assert t < 256
				134	# Shift a token; we're done with it
				135	self.shift(type, value, newstate, context)
				136	# Pop while we are in an accept-only state
				137	state = newstate
				138	while states[state] == [(0, state)]:
				139	self.pop()
				140	if not self.stack:
				141	# Done parsing!
				142	return True
				143	dfa, state, node = self.stack[-1]
				144	states, first = dfa
				145	# Done with this token
				146	return False
				147	elif t >= 256:
				148	# See if it's a symbol and if we're in its first set
				149	itsdfa = self.grammar.dfas[t]
				150	itsstates, itsfirst = itsdfa
				151	if ilabel in itsfirst:
				152	# Push a symbol
				153	self.push(t, self.grammar.dfas[t], newstate, context)
				154	break # To continue the outer while loop
				155	else:
				156	if (0, state) in arcs:
				157	# An accepting state, pop it and try something else
				158	self.pop()
				159	if not self.stack:
				160	# Done parsing, but another token is input
				161	raise ParseError("too much input",
				162	type, value, context)
				163	else:
				164	# No success finding a transition
				165	raise ParseError("bad input", type, value, context)
				166
				167	def classify(self, type, value, context):
				168	"""Turn a token into a label. (Internal)"""
				169	if type == token.NAME:
				170	# Keep a listing of all used names
				171	self.used_names.add(value)
				172	# Check for reserved words
				173	ilabel = self.grammar.keywords.get(value)
				174	if ilabel is not None:
				175	return ilabel
				176	ilabel = self.grammar.tokens.get(type)
				177	if ilabel is None:
				178	raise ParseError("bad token", type, value, context)
				179	return ilabel
				180
				181	def shift(self, type, value, newstate, context):
				182	"""Shift a token. (Internal)"""
				183	dfa, state, node = self.stack[-1]
				184	newnode = (type, value, context, None)
				185	newnode = self.convert(self.grammar, newnode)
				186	if newnode is not None:
				187	node[-1].append(newnode)
				188	self.stack[-1] = (dfa, newstate, node)
				189
				190	def push(self, type, newdfa, newstate, context):
				191	"""Push a nonterminal. (Internal)"""
				192	dfa, state, node = self.stack[-1]
				193	newnode = (type, None, context, [])
				194	self.stack[-1] = (dfa, newstate, node)
				195	self.stack.append((newdfa, 0, newnode))
				196
				197	def pop(self):
				198	"""Pop a nonterminal. (Internal)"""
				199	popdfa, popstate, popnode = self.stack.pop()
				200	newnode = self.convert(self.grammar, popnode)
				201	if newnode is not None:
				202	if self.stack:
				203	dfa, state, node = self.stack[-1]
				204	node[-1].append(newnode)
				205	else:
				206	self.rootnode = newnode
				207	self.rootnode.used_names = self.used_names