Blame - Lib/lib2to3/pgen2/parse.py - platform/external/python/cpython2

blob: 6bebdbba7e52d6bb10c39104cef4ba21922b436e [file] [log] [blame]

Martin v. Löwis	5e37bae	2008-03-19 04:43:46 +0000	[diff] [blame]	1	# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
				2	# Licensed to PSF under a Contributor Agreement.
				3
				4	"""Parser engine for the grammar tables generated by pgen.
				5
				6	The grammar table must be loaded first.
				7
				8	See Parser/parser.c in the Python distribution for additional info on
				9	how this parsing engine works.
				10
				11	"""
				12
Martin v. Löwis	5e37bae	2008-03-19 04:43:46 +0000	[diff] [blame]	13	# Local imports
				14	from . import token
				15
				16	class ParseError(Exception):
				17	"""Exception to signal the parser is stuck."""
				18
				19	def __init__(self, msg, type, value, context):
				20	Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
				21	(msg, type, value, context))
				22	self.msg = msg
				23	self.type = type
				24	self.value = value
				25	self.context = context
				26
				27	class Parser(object):
				28	"""Parser engine.
				29
				30	The proper usage sequence is:
				31
				32	p = Parser(grammar, [converter]) # create instance
				33	p.setup([start]) # prepare for parsing
				34	<for each input token>:
				35	if p.addtoken(...): # parse a token; may raise ParseError
				36	break
				37	root = p.rootnode # root of abstract syntax tree
				38
				39	A Parser instance may be reused by calling setup() repeatedly.
				40
				41	A Parser instance contains state pertaining to the current token
				42	sequence, and should not be used concurrently by different threads
				43	to parse separate token sequences.
				44
				45	See driver.py for how to get input tokens by tokenizing a file or
				46	string.
				47
				48	Parsing is complete when addtoken() returns True; the root of the
				49	abstract syntax tree can then be retrieved from the rootnode
				50	instance variable. When a syntax error occurs, addtoken() raises
				51	the ParseError exception. There is no error recovery; the parser
				52	cannot be used after a syntax error was reported (but it can be
				53	reinitialized by calling setup()).
				54
				55	"""
				56
				57	def __init__(self, grammar, convert=None):
				58	"""Constructor.
				59
				60	The grammar argument is a grammar.Grammar instance; see the
				61	grammar module for more information.
				62
				63	The parser is not ready yet for parsing; you must call the
				64	setup() method to get it started.
				65
				66	The optional convert argument is a function mapping concrete
				67	syntax tree nodes to abstract syntax tree nodes. If not
				68	given, no conversion is done and the syntax tree produced is
				69	the concrete syntax tree. If given, it must be a function of
				70	two arguments, the first being the grammar (a grammar.Grammar
				71	instance), and the second being the concrete syntax tree node
				72	to be converted. The syntax tree is converted from the bottom
				73	up.
				74
				75	A concrete syntax tree node is a (type, value, context, nodes)
				76	tuple, where type is the node type (a token or symbol number),
				77	value is None for symbols and a string for tokens, context is
				78	None or an opaque value used for error reporting (typically a
				79	(lineno, offset) pair), and nodes is a list of children for
				80	symbols, and None for tokens.
				81
				82	An abstract syntax tree node may be anything; this is entirely
				83	up to the converter function.
				84
				85	"""
				86	self.grammar = grammar
				87	self.convert = convert or (lambda grammar, node: node)
				88
				89	def setup(self, start=None):
				90	"""Prepare for parsing.
				91
				92	This must be called before starting to parse.
				93
				94	The optional argument is an alternative start symbol; it
				95	defaults to the grammar's start symbol.
				96
				97	You can use a Parser instance to parse any number of programs;
				98	each time you call setup() the parser is reset to an initial
				99	state determined by the (implicit or explicit) start symbol.
				100
				101	"""
				102	if start is None:
				103	start = self.grammar.start
				104	# Each stack entry is a tuple: (dfa, state, node).
				105	# A node is a tuple: (type, value, context, children),
				106	# where children is a list of nodes or None, and context may be None.
				107	newnode = (start, None, None, [])
				108	stackentry = (self.grammar.dfas[start], 0, newnode)
				109	self.stack = [stackentry]
				110	self.rootnode = None
				111	self.used_names = set() # Aliased to self.rootnode.used_names in pop()
				112
				113	def addtoken(self, type, value, context):
				114	"""Add a token; return True iff this is the end of the program."""
				115	# Map from token to label
				116	ilabel = self.classify(type, value, context)
				117	# Loop until the token is shifted; may raise exceptions
				118	while True:
				119	dfa, state, node = self.stack[-1]
				120	states, first = dfa
				121	arcs = states[state]
				122	# Look for a state with this label
				123	for i, newstate in arcs:
				124	t, v = self.grammar.labels[i]
				125	if ilabel == i:
				126	# Look it up in the list of labels
				127	assert t < 256
				128	# Shift a token; we're done with it
				129	self.shift(type, value, newstate, context)
				130	# Pop while we are in an accept-only state
				131	state = newstate
				132	while states[state] == [(0, state)]:
				133	self.pop()
				134	if not self.stack:
				135	# Done parsing!
				136	return True
				137	dfa, state, node = self.stack[-1]
				138	states, first = dfa
				139	# Done with this token
				140	return False
				141	elif t >= 256:
				142	# See if it's a symbol and if we're in its first set
				143	itsdfa = self.grammar.dfas[t]
				144	itsstates, itsfirst = itsdfa
				145	if ilabel in itsfirst:
				146	# Push a symbol
				147	self.push(t, self.grammar.dfas[t], newstate, context)
				148	break # To continue the outer while loop
				149	else:
				150	if (0, state) in arcs:
				151	# An accepting state, pop it and try something else
				152	self.pop()
				153	if not self.stack:
				154	# Done parsing, but another token is input
				155	raise ParseError("too much input",
				156	type, value, context)
				157	else:
				158	# No success finding a transition
				159	raise ParseError("bad input", type, value, context)
				160
				161	def classify(self, type, value, context):
				162	"""Turn a token into a label. (Internal)"""
				163	if type == token.NAME:
				164	# Keep a listing of all used names
				165	self.used_names.add(value)
				166	# Check for reserved words
				167	ilabel = self.grammar.keywords.get(value)
				168	if ilabel is not None:
				169	return ilabel
				170	ilabel = self.grammar.tokens.get(type)
				171	if ilabel is None:
				172	raise ParseError("bad token", type, value, context)
				173	return ilabel
				174
				175	def shift(self, type, value, newstate, context):
				176	"""Shift a token. (Internal)"""
				177	dfa, state, node = self.stack[-1]
				178	newnode = (type, value, context, None)
				179	newnode = self.convert(self.grammar, newnode)
				180	if newnode is not None:
				181	node[-1].append(newnode)
				182	self.stack[-1] = (dfa, newstate, node)
				183
				184	def push(self, type, newdfa, newstate, context):
				185	"""Push a nonterminal. (Internal)"""
				186	dfa, state, node = self.stack[-1]
				187	newnode = (type, None, context, [])
				188	self.stack[-1] = (dfa, newstate, node)
				189	self.stack.append((newdfa, 0, newnode))
				190
				191	def pop(self):
				192	"""Pop a nonterminal. (Internal)"""
				193	popdfa, popstate, popnode = self.stack.pop()
				194	newnode = self.convert(self.grammar, popnode)
				195	if newnode is not None:
				196	if self.stack:
				197	dfa, state, node = self.stack[-1]
				198	node[-1].append(newnode)
				199	else:
				200	self.rootnode = newnode
				201	self.rootnode.used_names = self.used_names