Blame - Parser/pgen/automata.py - platform/external/python/cpython3

blob: f2ed221e2852c97ee3cd458b5c5e069f8dc00ef7 [file] [log] [blame]

Pablo Galindo	71876fa	2019-08-22 02:38:39 +0100	[diff] [blame]	1	"""Classes representing state-machine concepts"""
				2
				3	class NFA:
				4	"""A non deterministic finite automata
				5
				6	A non deterministic automata is a form of a finite state
				7	machine. An NFA's rules are less restrictive than a DFA.
				8	The NFA rules are:
				9
				10	* A transition can be non-deterministic and can result in
				11	nothing, one, or two or more states.
				12
				13	* An epsilon transition consuming empty input is valid.
				14	Transitions consuming labeled symbols are also permitted.
				15
				16	This class assumes that there is only one starting state and one
				17	accepting (ending) state.
				18
				19	Attributes:
				20	name (str): The name of the rule the NFA is representing.
				21	start (NFAState): The starting state.
				22	end (NFAState): The ending state
				23	"""
				24
				25	def __init__(self, start, end):
				26	self.name = start.rule_name
				27	self.start = start
				28	self.end = end
				29
				30	def __repr__(self):
				31	return "NFA(start={}, end={})".format(self.start, self.end)
				32
				33	def dump(self, writer=print):
				34	"""Dump a graphical representation of the NFA"""
				35	todo = [self.start]
				36	for i, state in enumerate(todo):
				37	writer(" State", i, state is self.end and "(final)" or "")
				38	for arc in state.arcs:
				39	label = arc.label
				40	next = arc.target
				41	if next in todo:
				42	j = todo.index(next)
				43	else:
				44	j = len(todo)
				45	todo.append(next)
				46	if label is None:
				47	writer(" -> %d" % j)
				48	else:
				49	writer(" %s -> %d" % (label, j))
				50
Pablo Galindo	45cf5db	2020-01-14 22:32:55 +0000	[diff] [blame]	51	def dump_graph(self, writer):
				52	"""Dump a DOT representation of the NFA"""
				53	writer('digraph %s_nfa {\n' % self.name)
				54	todo = [self.start]
				55	for i, state in enumerate(todo):
				56	writer(' %d [label="State %d %s"];\n' % (i, i, state is self.end and "(final)" or ""))
				57	for arc in state.arcs:
				58	label = arc.label
				59	next = arc.target
				60	if next in todo:
				61	j = todo.index(next)
				62	else:
				63	j = len(todo)
				64	todo.append(next)
				65	if label is None:
				66	writer(" %d -> %d [style=dotted label=ε];\n" % (i, j))
				67	else:
				68	writer(" %d -> %d [label=%s];\n" % (i, j, label.replace("'", '"')))
				69	writer('}\n')
				70
Pablo Galindo	71876fa	2019-08-22 02:38:39 +0100	[diff] [blame]	71
				72	class NFAArc:
				73	"""An arc representing a transition between two NFA states.
				74
				75	NFA states can be connected via two ways:
				76
				77	* A label transition: An input equal to the label must
				78	be consumed to perform the transition.
				79	* An epsilon transition: The transition can be taken without
				80	consuming any input symbol.
				81
				82	Attributes:
				83	target (NFAState): The ending state of the transition arc.
				84	label (Optional[str]): The label that must be consumed to make
				85	the transition. An epsilon transition is represented
				86	using `None`.
				87	"""
				88
				89	def __init__(self, target, label):
				90	self.target = target
				91	self.label = label
				92
				93	def __repr__(self):
				94	return "<%s: %s>" % (self.__class__.__name__, self.label)
				95
				96
				97	class NFAState:
				98	"""A state of a NFA, non deterministic finite automata.
				99
				100	Attributes:
				101	target (rule_name): The name of the rule used to represent the NFA's
				102	ending state after a transition.
				103	arcs (Dict[Optional[str], NFAState]): A mapping representing transitions
				104	between the current NFA state and another NFA state via following
				105	a label.
				106	"""
				107
				108	def __init__(self, rule_name):
				109	self.rule_name = rule_name
				110	self.arcs = []
				111
				112	def add_arc(self, target, label=None):
				113	"""Add a new arc to connect the state to a target state within the NFA
				114
				115	The method adds a new arc to the list of arcs available as transitions
				116	from the present state. An optional label indicates a named transition
				117	that consumes an input while the absence of a label represents an epsilon
				118	transition.
				119
				120	Attributes:
				121	target (NFAState): The end of the transition that the arc represents.
				122	label (Optional[str]): The label that must be consumed for making
				123	the transition. If the label is not provided the transition is assumed
				124	to be an epsilon-transition.
				125	"""
				126	assert label is None or isinstance(label, str)
				127	assert isinstance(target, NFAState)
				128	self.arcs.append(NFAArc(target, label))
				129
				130	def __repr__(self):
				131	return "<%s: from %s>" % (self.__class__.__name__, self.rule_name)
				132
				133
				134	class DFA:
				135	"""A deterministic finite automata
				136
				137	A deterministic finite automata is a form of a finite state machine
				138	that obeys the following rules:
				139
				140	* Each of the transitions is uniquely determined by
				141	the source state and input symbol
				142	* Reading an input symbol is required for each state
				143	transition (no epsilon transitions).
				144
				145	The finite-state machine will accept or reject a string of symbols
				146	and only produces a unique computation of the automaton for each input
				147	string. The DFA must have a unique starting state (represented as the first
				148	element in the list of states) but can have multiple final states.
				149
				150	Attributes:
				151	name (str): The name of the rule the DFA is representing.
				152	states (List[DFAState]): A collection of DFA states.
				153	"""
				154
				155	def __init__(self, name, states):
				156	self.name = name
				157	self.states = states
				158
				159	@classmethod
				160	def from_nfa(cls, nfa):
				161	"""Constructs a DFA from a NFA using the Rabin–Scott construction algorithm.
				162
				163	To simulate the operation of a DFA on a given input string, it's
				164	necessary to keep track of a single state at any time, or more precisely,
				165	the state that the automaton will reach after seeing a prefix of the
				166	input. In contrast, to simulate an NFA, it's necessary to keep track of
				167	a set of states: all of the states that the automaton could reach after
				168	seeing the same prefix of the input, according to the nondeterministic
				169	choices made by the automaton. There are two possible sources of
				170	non-determinism:
				171
				172	1) Multiple (one or more) transitions with the same label
				173
				174	'A' +-------+
				175	+----------->+ State +----------->+
				176	\| \| 2 \|
				177	+-------+ +-------+
				178	\| State \|
				179	\| 1 \| +-------+
				180	+-------+ \| State \|
				181	+----------->+ 3 +----------->+
				182	'A' +-------+
				183
				184	2) Epsilon transitions (transitions that can be taken without consuming any input)
				185
				186	+-------+ +-------+
				187	\| State \| ε \| State \|
				188	\| 1 +----------->+ 2 +----------->+
				189	+-------+ +-------+
				190
				191	Looking at the first case above, we can't determine which transition should be
				192	followed when given an input A. We could choose whether or not to follow the
				193	transition while in the second case the problem is that we can choose both to
				194	follow the transition or not doing it. To solve this problem we can imagine that
				195	we follow all possibilities at the same time and we construct new states from the
				196	set of all possible reachable states. For every case in the previous example:
				197
				198
				199	1) For multiple transitions with the same label we colapse all of the
				200	final states under the same one
				201
				202	+-------+ +-------+
				203	\| State \| 'A' \| State \|
				204	\| 1 +----------->+ 2-3 +----------->+
				205	+-------+ +-------+
				206
				207	2) For epsilon transitions we collapse all epsilon-reachable states
				208	into the same one
				209
				210	+-------+
				211	\| State \|
				212	\| 1-2 +----------->
				213	+-------+
				214
				215	Because the DFA states consist of sets of NFA states, an n-state NFA
				216	may be converted to a DFA with at most 2**n states. Notice that the
				217	constructed DFA is not minimal and can be simplified or reduced
				218	afterwards.
				219
				220	Parameters:
				221	name (NFA): The NFA to transform to DFA.
				222	"""
				223	assert isinstance(nfa, NFA)
				224
				225	def add_closure(nfa_state, base_nfa_set):
				226	"""Calculate the epsilon-closure of a given state
				227
				228	Add to the base_nfa_set all the states that are
				229	reachable from nfa_state via epsilon-transitions.
				230	"""
				231	assert isinstance(nfa_state, NFAState)
				232	if nfa_state in base_nfa_set:
				233	return
				234	base_nfa_set.add(nfa_state)
				235	for nfa_arc in nfa_state.arcs:
				236	if nfa_arc.label is None:
				237	add_closure(nfa_arc.target, base_nfa_set)
				238
xatier	d7a04a8	2020-03-08 19:58:24 -0700	[diff] [blame]	239	# Calculate the epsilon-closure of the starting state
Pablo Galindo	71876fa	2019-08-22 02:38:39 +0100	[diff] [blame]	240	base_nfa_set = set()
				241	add_closure(nfa.start, base_nfa_set)
				242
				243	# Start by visiting the NFA starting state (there is only one).
				244	states = [DFAState(nfa.name, base_nfa_set, nfa.end)]
				245
				246	for state in states: # NB states grow while we're iterating
				247
				248	# Find transitions from the current state to other reachable states
				249	# and store them in mapping that correlates the label to all the
				250	# possible reachable states that can be obtained by consuming a
				251	# token equal to the label. Each set of all the states that can
				252	# be reached after following a label will be the a DFA state.
				253	arcs = {}
				254	for nfa_state in state.nfa_set:
				255	for nfa_arc in nfa_state.arcs:
				256	if nfa_arc.label is not None:
				257	nfa_set = arcs.setdefault(nfa_arc.label, set())
				258	# All states that can be reached by epsilon-transitions
				259	# are also included in the set of reachable states.
				260	add_closure(nfa_arc.target, nfa_set)
				261
				262	# Now create new DFAs by visiting all posible transitions between
				263	# the current DFA state and the new power-set states (each nfa_set)
				264	# via the different labels. As the nodes are appended to states this
Pablo Galindo	c638521	2019-09-09 15:08:23 +0100	[diff] [blame]	265	# is performing a breadth-first search traversal over the power-set of
Pablo Galindo	71876fa	2019-08-22 02:38:39 +0100	[diff] [blame]	266	# the states of the original NFA.
				267	for label, nfa_set in sorted(arcs.items()):
				268	for exisisting_state in states:
				269	if exisisting_state.nfa_set == nfa_set:
				270	# The DFA state already exists for this rule.
				271	next_state = exisisting_state
				272	break
				273	else:
				274	next_state = DFAState(nfa.name, nfa_set, nfa.end)
				275	states.append(next_state)
				276
				277	# Add a transition between the current DFA state and the new
				278	# DFA state (the power-set state) via the current label.
				279	state.add_arc(next_state, label)
				280
				281	return cls(nfa.name, states)
				282
				283	def __iter__(self):
				284	return iter(self.states)
				285
				286	def simplify(self):
				287	"""Attempt to reduce the number of states of the DFA
				288
				289	Transform the DFA into an equivalent DFA that has fewer states. Two
				290	classes of states can be removed or merged from the original DFA without
				291	affecting the language it accepts to minimize it:
				292
				293	* Unreachable states can not be reached from the initial
				294	state of the DFA, for any input string.
				295	* Nondistinguishable states are those that cannot be distinguished
				296	from one another for any input string.
				297
				298	This algorithm does not achieve the optimal fully-reduced solution, but it
				299	works well enough for the particularities of the Python grammar. The
				300	algorithm repeatedly looks for two states that have the same set of
				301	arcs (same labels pointing to the same nodes) and unifies them, until
				302	things stop changing.
				303	"""
				304	changes = True
				305	while changes:
				306	changes = False
				307	for i, state_i in enumerate(self.states):
				308	for j in range(i + 1, len(self.states)):
				309	state_j = self.states[j]
				310	if state_i == state_j:
				311	del self.states[j]
				312	for state in self.states:
				313	state.unifystate(state_j, state_i)
				314	changes = True
				315	break
				316
				317	def dump(self, writer=print):
				318	"""Dump a graphical representation of the DFA"""
				319	for i, state in enumerate(self.states):
				320	writer(" State", i, state.is_final and "(final)" or "")
				321	for label, next in sorted(state.arcs.items()):
				322	writer(" %s -> %d" % (label, self.states.index(next)))
				323
Pablo Galindo	45cf5db	2020-01-14 22:32:55 +0000	[diff] [blame]	324	def dump_graph(self, writer):
				325	"""Dump a DOT representation of the DFA"""
				326	writer('digraph %s_dfa {\n' % self.name)
				327	for i, state in enumerate(self.states):
				328	writer(' %d [label="State %d %s"];\n' % (i, i, state.is_final and "(final)" or ""))
				329	for label, next in sorted(state.arcs.items()):
				330	writer(" %d -> %d [label=%s];\n" % (i, self.states.index(next), label.replace("'", '"')))
				331	writer('}\n')
				332
Pablo Galindo	71876fa	2019-08-22 02:38:39 +0100	[diff] [blame]	333
				334	class DFAState(object):
				335	"""A state of a DFA
				336
				337	Attributes:
				338	rule_name (rule_name): The name of the DFA rule containing the represented state.
				339	nfa_set (Set[NFAState]): The set of NFA states used to create this state.
				340	final (bool): True if the state represents an accepting state of the DFA
				341	containing this state.
				342	arcs (Dict[label, DFAState]): A mapping representing transitions between
				343	the current DFA state and another DFA state via following a label.
				344	"""
				345
				346	def __init__(self, rule_name, nfa_set, final):
				347	assert isinstance(nfa_set, set)
				348	assert isinstance(next(iter(nfa_set)), NFAState)
				349	assert isinstance(final, NFAState)
				350	self.rule_name = rule_name
				351	self.nfa_set = nfa_set
				352	self.arcs = {} # map from terminals/nonterminals to DFAState
				353	self.is_final = final in nfa_set
				354
				355	def add_arc(self, target, label):
				356	"""Add a new arc to the current state.
				357
				358	Parameters:
				359	target (DFAState): The DFA state at the end of the arc.
				360	label (str): The label respresenting the token that must be consumed
				361	to perform this transition.
				362	"""
				363	assert isinstance(label, str)
				364	assert label not in self.arcs
				365	assert isinstance(target, DFAState)
				366	self.arcs[label] = target
				367
				368	def unifystate(self, old, new):
				369	"""Replace all arcs from the current node to old with new.
				370
				371	Parameters:
				372	old (DFAState): The DFA state to remove from all existing arcs.
				373	new (DFAState): The DFA state to replace in all existing arcs.
				374	"""
				375	for label, next_ in self.arcs.items():
				376	if next_ is old:
				377	self.arcs[label] = new
				378
				379	def __eq__(self, other):
				380	# The nfa_set does not matter for equality
				381	assert isinstance(other, DFAState)
				382	if self.is_final != other.is_final:
				383	return False
				384	# We cannot just return self.arcs == other.arcs because that
				385	# would invoke this method recursively if there are any cycles.
				386	if len(self.arcs) != len(other.arcs):
				387	return False
				388	for label, next_ in self.arcs.items():
				389	if next_ is not other.arcs.get(label):
				390	return False
				391	return True
				392
				393	__hash__ = None # For Py3 compatibility.
				394
				395	def __repr__(self):
				396	return "<%s: %s is_final=%s>" % (
				397	self.__class__.__name__,
				398	self.rule_name,
				399	self.is_final,
				400	)