Blame - Parser/pgen/automata.py - platform/external/python/cpython3

blob: 3147d8636ff3eb8c696c79f7c6eff48aa093077a [file] [log] [blame]

Pablo Galindo	71876fa	2019-08-22 02:38:39 +0100	[diff] [blame]	1	"""Classes representing state-machine concepts"""
				2
				3	class NFA:
				4	"""A non deterministic finite automata
				5
				6	A non deterministic automata is a form of a finite state
				7	machine. An NFA's rules are less restrictive than a DFA.
				8	The NFA rules are:
				9
				10	* A transition can be non-deterministic and can result in
				11	nothing, one, or two or more states.
				12
				13	* An epsilon transition consuming empty input is valid.
				14	Transitions consuming labeled symbols are also permitted.
				15
				16	This class assumes that there is only one starting state and one
				17	accepting (ending) state.
				18
				19	Attributes:
				20	name (str): The name of the rule the NFA is representing.
				21	start (NFAState): The starting state.
				22	end (NFAState): The ending state
				23	"""
				24
				25	def __init__(self, start, end):
				26	self.name = start.rule_name
				27	self.start = start
				28	self.end = end
				29
				30	def __repr__(self):
				31	return "NFA(start={}, end={})".format(self.start, self.end)
				32
				33	def dump(self, writer=print):
				34	"""Dump a graphical representation of the NFA"""
				35	todo = [self.start]
				36	for i, state in enumerate(todo):
				37	writer(" State", i, state is self.end and "(final)" or "")
				38	for arc in state.arcs:
				39	label = arc.label
				40	next = arc.target
				41	if next in todo:
				42	j = todo.index(next)
				43	else:
				44	j = len(todo)
				45	todo.append(next)
				46	if label is None:
				47	writer(" -> %d" % j)
				48	else:
				49	writer(" %s -> %d" % (label, j))
				50
				51
				52	class NFAArc:
				53	"""An arc representing a transition between two NFA states.
				54
				55	NFA states can be connected via two ways:
				56
				57	* A label transition: An input equal to the label must
				58	be consumed to perform the transition.
				59	* An epsilon transition: The transition can be taken without
				60	consuming any input symbol.
				61
				62	Attributes:
				63	target (NFAState): The ending state of the transition arc.
				64	label (Optional[str]): The label that must be consumed to make
				65	the transition. An epsilon transition is represented
				66	using `None`.
				67	"""
				68
				69	def __init__(self, target, label):
				70	self.target = target
				71	self.label = label
				72
				73	def __repr__(self):
				74	return "<%s: %s>" % (self.__class__.__name__, self.label)
				75
				76
				77	class NFAState:
				78	"""A state of a NFA, non deterministic finite automata.
				79
				80	Attributes:
				81	target (rule_name): The name of the rule used to represent the NFA's
				82	ending state after a transition.
				83	arcs (Dict[Optional[str], NFAState]): A mapping representing transitions
				84	between the current NFA state and another NFA state via following
				85	a label.
				86	"""
				87
				88	def __init__(self, rule_name):
				89	self.rule_name = rule_name
				90	self.arcs = []
				91
				92	def add_arc(self, target, label=None):
				93	"""Add a new arc to connect the state to a target state within the NFA
				94
				95	The method adds a new arc to the list of arcs available as transitions
				96	from the present state. An optional label indicates a named transition
				97	that consumes an input while the absence of a label represents an epsilon
				98	transition.
				99
				100	Attributes:
				101	target (NFAState): The end of the transition that the arc represents.
				102	label (Optional[str]): The label that must be consumed for making
				103	the transition. If the label is not provided the transition is assumed
				104	to be an epsilon-transition.
				105	"""
				106	assert label is None or isinstance(label, str)
				107	assert isinstance(target, NFAState)
				108	self.arcs.append(NFAArc(target, label))
				109
				110	def __repr__(self):
				111	return "<%s: from %s>" % (self.__class__.__name__, self.rule_name)
				112
				113
				114	class DFA:
				115	"""A deterministic finite automata
				116
				117	A deterministic finite automata is a form of a finite state machine
				118	that obeys the following rules:
				119
				120	* Each of the transitions is uniquely determined by
				121	the source state and input symbol
				122	* Reading an input symbol is required for each state
				123	transition (no epsilon transitions).
				124
				125	The finite-state machine will accept or reject a string of symbols
				126	and only produces a unique computation of the automaton for each input
				127	string. The DFA must have a unique starting state (represented as the first
				128	element in the list of states) but can have multiple final states.
				129
				130	Attributes:
				131	name (str): The name of the rule the DFA is representing.
				132	states (List[DFAState]): A collection of DFA states.
				133	"""
				134
				135	def __init__(self, name, states):
				136	self.name = name
				137	self.states = states
				138
				139	@classmethod
				140	def from_nfa(cls, nfa):
				141	"""Constructs a DFA from a NFA using the Rabin–Scott construction algorithm.
				142
				143	To simulate the operation of a DFA on a given input string, it's
				144	necessary to keep track of a single state at any time, or more precisely,
				145	the state that the automaton will reach after seeing a prefix of the
				146	input. In contrast, to simulate an NFA, it's necessary to keep track of
				147	a set of states: all of the states that the automaton could reach after
				148	seeing the same prefix of the input, according to the nondeterministic
				149	choices made by the automaton. There are two possible sources of
				150	non-determinism:
				151
				152	1) Multiple (one or more) transitions with the same label
				153
				154	'A' +-------+
				155	+----------->+ State +----------->+
				156	\| \| 2 \|
				157	+-------+ +-------+
				158	\| State \|
				159	\| 1 \| +-------+
				160	+-------+ \| State \|
				161	+----------->+ 3 +----------->+
				162	'A' +-------+
				163
				164	2) Epsilon transitions (transitions that can be taken without consuming any input)
				165
				166	+-------+ +-------+
				167	\| State \| ε \| State \|
				168	\| 1 +----------->+ 2 +----------->+
				169	+-------+ +-------+
				170
				171	Looking at the first case above, we can't determine which transition should be
				172	followed when given an input A. We could choose whether or not to follow the
				173	transition while in the second case the problem is that we can choose both to
				174	follow the transition or not doing it. To solve this problem we can imagine that
				175	we follow all possibilities at the same time and we construct new states from the
				176	set of all possible reachable states. For every case in the previous example:
				177
				178
				179	1) For multiple transitions with the same label we colapse all of the
				180	final states under the same one
				181
				182	+-------+ +-------+
				183	\| State \| 'A' \| State \|
				184	\| 1 +----------->+ 2-3 +----------->+
				185	+-------+ +-------+
				186
				187	2) For epsilon transitions we collapse all epsilon-reachable states
				188	into the same one
				189
				190	+-------+
				191	\| State \|
				192	\| 1-2 +----------->
				193	+-------+
				194
				195	Because the DFA states consist of sets of NFA states, an n-state NFA
				196	may be converted to a DFA with at most 2**n states. Notice that the
				197	constructed DFA is not minimal and can be simplified or reduced
				198	afterwards.
				199
				200	Parameters:
				201	name (NFA): The NFA to transform to DFA.
				202	"""
				203	assert isinstance(nfa, NFA)
				204
				205	def add_closure(nfa_state, base_nfa_set):
				206	"""Calculate the epsilon-closure of a given state
				207
				208	Add to the base_nfa_set all the states that are
				209	reachable from nfa_state via epsilon-transitions.
				210	"""
				211	assert isinstance(nfa_state, NFAState)
				212	if nfa_state in base_nfa_set:
				213	return
				214	base_nfa_set.add(nfa_state)
				215	for nfa_arc in nfa_state.arcs:
				216	if nfa_arc.label is None:
				217	add_closure(nfa_arc.target, base_nfa_set)
				218
				219	# Calculte the epsilon-closure of the starting state
				220	base_nfa_set = set()
				221	add_closure(nfa.start, base_nfa_set)
				222
				223	# Start by visiting the NFA starting state (there is only one).
				224	states = [DFAState(nfa.name, base_nfa_set, nfa.end)]
				225
				226	for state in states: # NB states grow while we're iterating
				227
				228	# Find transitions from the current state to other reachable states
				229	# and store them in mapping that correlates the label to all the
				230	# possible reachable states that can be obtained by consuming a
				231	# token equal to the label. Each set of all the states that can
				232	# be reached after following a label will be the a DFA state.
				233	arcs = {}
				234	for nfa_state in state.nfa_set:
				235	for nfa_arc in nfa_state.arcs:
				236	if nfa_arc.label is not None:
				237	nfa_set = arcs.setdefault(nfa_arc.label, set())
				238	# All states that can be reached by epsilon-transitions
				239	# are also included in the set of reachable states.
				240	add_closure(nfa_arc.target, nfa_set)
				241
				242	# Now create new DFAs by visiting all posible transitions between
				243	# the current DFA state and the new power-set states (each nfa_set)
				244	# via the different labels. As the nodes are appended to states this
				245	# is performing a deep-first search traversal over the power-set of
				246	# the states of the original NFA.
				247	for label, nfa_set in sorted(arcs.items()):
				248	for exisisting_state in states:
				249	if exisisting_state.nfa_set == nfa_set:
				250	# The DFA state already exists for this rule.
				251	next_state = exisisting_state
				252	break
				253	else:
				254	next_state = DFAState(nfa.name, nfa_set, nfa.end)
				255	states.append(next_state)
				256
				257	# Add a transition between the current DFA state and the new
				258	# DFA state (the power-set state) via the current label.
				259	state.add_arc(next_state, label)
				260
				261	return cls(nfa.name, states)
				262
				263	def __iter__(self):
				264	return iter(self.states)
				265
				266	def simplify(self):
				267	"""Attempt to reduce the number of states of the DFA
				268
				269	Transform the DFA into an equivalent DFA that has fewer states. Two
				270	classes of states can be removed or merged from the original DFA without
				271	affecting the language it accepts to minimize it:
				272
				273	* Unreachable states can not be reached from the initial
				274	state of the DFA, for any input string.
				275	* Nondistinguishable states are those that cannot be distinguished
				276	from one another for any input string.
				277
				278	This algorithm does not achieve the optimal fully-reduced solution, but it
				279	works well enough for the particularities of the Python grammar. The
				280	algorithm repeatedly looks for two states that have the same set of
				281	arcs (same labels pointing to the same nodes) and unifies them, until
				282	things stop changing.
				283	"""
				284	changes = True
				285	while changes:
				286	changes = False
				287	for i, state_i in enumerate(self.states):
				288	for j in range(i + 1, len(self.states)):
				289	state_j = self.states[j]
				290	if state_i == state_j:
				291	del self.states[j]
				292	for state in self.states:
				293	state.unifystate(state_j, state_i)
				294	changes = True
				295	break
				296
				297	def dump(self, writer=print):
				298	"""Dump a graphical representation of the DFA"""
				299	for i, state in enumerate(self.states):
				300	writer(" State", i, state.is_final and "(final)" or "")
				301	for label, next in sorted(state.arcs.items()):
				302	writer(" %s -> %d" % (label, self.states.index(next)))
				303
				304
				305	class DFAState(object):
				306	"""A state of a DFA
				307
				308	Attributes:
				309	rule_name (rule_name): The name of the DFA rule containing the represented state.
				310	nfa_set (Set[NFAState]): The set of NFA states used to create this state.
				311	final (bool): True if the state represents an accepting state of the DFA
				312	containing this state.
				313	arcs (Dict[label, DFAState]): A mapping representing transitions between
				314	the current DFA state and another DFA state via following a label.
				315	"""
				316
				317	def __init__(self, rule_name, nfa_set, final):
				318	assert isinstance(nfa_set, set)
				319	assert isinstance(next(iter(nfa_set)), NFAState)
				320	assert isinstance(final, NFAState)
				321	self.rule_name = rule_name
				322	self.nfa_set = nfa_set
				323	self.arcs = {} # map from terminals/nonterminals to DFAState
				324	self.is_final = final in nfa_set
				325
				326	def add_arc(self, target, label):
				327	"""Add a new arc to the current state.
				328
				329	Parameters:
				330	target (DFAState): The DFA state at the end of the arc.
				331	label (str): The label respresenting the token that must be consumed
				332	to perform this transition.
				333	"""
				334	assert isinstance(label, str)
				335	assert label not in self.arcs
				336	assert isinstance(target, DFAState)
				337	self.arcs[label] = target
				338
				339	def unifystate(self, old, new):
				340	"""Replace all arcs from the current node to old with new.
				341
				342	Parameters:
				343	old (DFAState): The DFA state to remove from all existing arcs.
				344	new (DFAState): The DFA state to replace in all existing arcs.
				345	"""
				346	for label, next_ in self.arcs.items():
				347	if next_ is old:
				348	self.arcs[label] = new
				349
				350	def __eq__(self, other):
				351	# The nfa_set does not matter for equality
				352	assert isinstance(other, DFAState)
				353	if self.is_final != other.is_final:
				354	return False
				355	# We cannot just return self.arcs == other.arcs because that
				356	# would invoke this method recursively if there are any cycles.
				357	if len(self.arcs) != len(other.arcs):
				358	return False
				359	for label, next_ in self.arcs.items():
				360	if next_ is not other.arcs.get(label):
				361	return False
				362	return True
				363
				364	__hash__ = None # For Py3 compatibility.
				365
				366	def __repr__(self):
				367	return "<%s: %s is_final=%s>" % (
				368	self.__class__.__name__,
				369	self.rule_name,
				370	self.is_final,
				371	)