blob: 3147d8636ff3eb8c696c79f7c6eff48aa093077a [file] [log] [blame]
Pablo Galindo71876fa2019-08-22 02:38:39 +01001"""Classes representing state-machine concepts"""
2
3class NFA:
4 """A non deterministic finite automata
5
6 A non deterministic automata is a form of a finite state
7 machine. An NFA's rules are less restrictive than a DFA.
8 The NFA rules are:
9
10 * A transition can be non-deterministic and can result in
11 nothing, one, or two or more states.
12
13 * An epsilon transition consuming empty input is valid.
14 Transitions consuming labeled symbols are also permitted.
15
16 This class assumes that there is only one starting state and one
17 accepting (ending) state.
18
19 Attributes:
20 name (str): The name of the rule the NFA is representing.
21 start (NFAState): The starting state.
22 end (NFAState): The ending state
23 """
24
25 def __init__(self, start, end):
26 self.name = start.rule_name
27 self.start = start
28 self.end = end
29
30 def __repr__(self):
31 return "NFA(start={}, end={})".format(self.start, self.end)
32
33 def dump(self, writer=print):
34 """Dump a graphical representation of the NFA"""
35 todo = [self.start]
36 for i, state in enumerate(todo):
37 writer(" State", i, state is self.end and "(final)" or "")
38 for arc in state.arcs:
39 label = arc.label
40 next = arc.target
41 if next in todo:
42 j = todo.index(next)
43 else:
44 j = len(todo)
45 todo.append(next)
46 if label is None:
47 writer(" -> %d" % j)
48 else:
49 writer(" %s -> %d" % (label, j))
50
51
52class NFAArc:
53 """An arc representing a transition between two NFA states.
54
55 NFA states can be connected via two ways:
56
57 * A label transition: An input equal to the label must
58 be consumed to perform the transition.
59 * An epsilon transition: The transition can be taken without
60 consuming any input symbol.
61
62 Attributes:
63 target (NFAState): The ending state of the transition arc.
64 label (Optional[str]): The label that must be consumed to make
65 the transition. An epsilon transition is represented
66 using `None`.
67 """
68
69 def __init__(self, target, label):
70 self.target = target
71 self.label = label
72
73 def __repr__(self):
74 return "<%s: %s>" % (self.__class__.__name__, self.label)
75
76
77class NFAState:
78 """A state of a NFA, non deterministic finite automata.
79
80 Attributes:
81 target (rule_name): The name of the rule used to represent the NFA's
82 ending state after a transition.
83 arcs (Dict[Optional[str], NFAState]): A mapping representing transitions
84 between the current NFA state and another NFA state via following
85 a label.
86 """
87
88 def __init__(self, rule_name):
89 self.rule_name = rule_name
90 self.arcs = []
91
92 def add_arc(self, target, label=None):
93 """Add a new arc to connect the state to a target state within the NFA
94
95 The method adds a new arc to the list of arcs available as transitions
96 from the present state. An optional label indicates a named transition
97 that consumes an input while the absence of a label represents an epsilon
98 transition.
99
100 Attributes:
101 target (NFAState): The end of the transition that the arc represents.
102 label (Optional[str]): The label that must be consumed for making
103 the transition. If the label is not provided the transition is assumed
104 to be an epsilon-transition.
105 """
106 assert label is None or isinstance(label, str)
107 assert isinstance(target, NFAState)
108 self.arcs.append(NFAArc(target, label))
109
110 def __repr__(self):
111 return "<%s: from %s>" % (self.__class__.__name__, self.rule_name)
112
113
114class DFA:
115 """A deterministic finite automata
116
117 A deterministic finite automata is a form of a finite state machine
118 that obeys the following rules:
119
120 * Each of the transitions is uniquely determined by
121 the source state and input symbol
122 * Reading an input symbol is required for each state
123 transition (no epsilon transitions).
124
125 The finite-state machine will accept or reject a string of symbols
126 and only produces a unique computation of the automaton for each input
127 string. The DFA must have a unique starting state (represented as the first
128 element in the list of states) but can have multiple final states.
129
130 Attributes:
131 name (str): The name of the rule the DFA is representing.
132 states (List[DFAState]): A collection of DFA states.
133 """
134
135 def __init__(self, name, states):
136 self.name = name
137 self.states = states
138
139 @classmethod
140 def from_nfa(cls, nfa):
141 """Constructs a DFA from a NFA using the Rabin–Scott construction algorithm.
142
143 To simulate the operation of a DFA on a given input string, it's
144 necessary to keep track of a single state at any time, or more precisely,
145 the state that the automaton will reach after seeing a prefix of the
146 input. In contrast, to simulate an NFA, it's necessary to keep track of
147 a set of states: all of the states that the automaton could reach after
148 seeing the same prefix of the input, according to the nondeterministic
149 choices made by the automaton. There are two possible sources of
150 non-determinism:
151
152 1) Multiple (one or more) transitions with the same label
153
154 'A' +-------+
155 +----------->+ State +----------->+
156 | | 2 |
157 +-------+ +-------+
158 | State |
159 | 1 | +-------+
160 +-------+ | State |
161 +----------->+ 3 +----------->+
162 'A' +-------+
163
164 2) Epsilon transitions (transitions that can be taken without consuming any input)
165
166 +-------+ +-------+
167 | State | ε | State |
168 | 1 +----------->+ 2 +----------->+
169 +-------+ +-------+
170
171 Looking at the first case above, we can't determine which transition should be
172 followed when given an input A. We could choose whether or not to follow the
173 transition while in the second case the problem is that we can choose both to
174 follow the transition or not doing it. To solve this problem we can imagine that
175 we follow all possibilities at the same time and we construct new states from the
176 set of all possible reachable states. For every case in the previous example:
177
178
179 1) For multiple transitions with the same label we colapse all of the
180 final states under the same one
181
182 +-------+ +-------+
183 | State | 'A' | State |
184 | 1 +----------->+ 2-3 +----------->+
185 +-------+ +-------+
186
187 2) For epsilon transitions we collapse all epsilon-reachable states
188 into the same one
189
190 +-------+
191 | State |
192 | 1-2 +----------->
193 +-------+
194
195 Because the DFA states consist of sets of NFA states, an n-state NFA
196 may be converted to a DFA with at most 2**n states. Notice that the
197 constructed DFA is not minimal and can be simplified or reduced
198 afterwards.
199
200 Parameters:
201 name (NFA): The NFA to transform to DFA.
202 """
203 assert isinstance(nfa, NFA)
204
205 def add_closure(nfa_state, base_nfa_set):
206 """Calculate the epsilon-closure of a given state
207
208 Add to the *base_nfa_set* all the states that are
209 reachable from *nfa_state* via epsilon-transitions.
210 """
211 assert isinstance(nfa_state, NFAState)
212 if nfa_state in base_nfa_set:
213 return
214 base_nfa_set.add(nfa_state)
215 for nfa_arc in nfa_state.arcs:
216 if nfa_arc.label is None:
217 add_closure(nfa_arc.target, base_nfa_set)
218
219 # Calculte the epsilon-closure of the starting state
220 base_nfa_set = set()
221 add_closure(nfa.start, base_nfa_set)
222
223 # Start by visiting the NFA starting state (there is only one).
224 states = [DFAState(nfa.name, base_nfa_set, nfa.end)]
225
226 for state in states: # NB states grow while we're iterating
227
228 # Find transitions from the current state to other reachable states
229 # and store them in mapping that correlates the label to all the
230 # possible reachable states that can be obtained by consuming a
231 # token equal to the label. Each set of all the states that can
232 # be reached after following a label will be the a DFA state.
233 arcs = {}
234 for nfa_state in state.nfa_set:
235 for nfa_arc in nfa_state.arcs:
236 if nfa_arc.label is not None:
237 nfa_set = arcs.setdefault(nfa_arc.label, set())
238 # All states that can be reached by epsilon-transitions
239 # are also included in the set of reachable states.
240 add_closure(nfa_arc.target, nfa_set)
241
242 # Now create new DFAs by visiting all posible transitions between
243 # the current DFA state and the new power-set states (each nfa_set)
244 # via the different labels. As the nodes are appended to *states* this
245 # is performing a deep-first search traversal over the power-set of
246 # the states of the original NFA.
247 for label, nfa_set in sorted(arcs.items()):
248 for exisisting_state in states:
249 if exisisting_state.nfa_set == nfa_set:
250 # The DFA state already exists for this rule.
251 next_state = exisisting_state
252 break
253 else:
254 next_state = DFAState(nfa.name, nfa_set, nfa.end)
255 states.append(next_state)
256
257 # Add a transition between the current DFA state and the new
258 # DFA state (the power-set state) via the current label.
259 state.add_arc(next_state, label)
260
261 return cls(nfa.name, states)
262
263 def __iter__(self):
264 return iter(self.states)
265
266 def simplify(self):
267 """Attempt to reduce the number of states of the DFA
268
269 Transform the DFA into an equivalent DFA that has fewer states. Two
270 classes of states can be removed or merged from the original DFA without
271 affecting the language it accepts to minimize it:
272
273 * Unreachable states can not be reached from the initial
274 state of the DFA, for any input string.
275 * Nondistinguishable states are those that cannot be distinguished
276 from one another for any input string.
277
278 This algorithm does not achieve the optimal fully-reduced solution, but it
279 works well enough for the particularities of the Python grammar. The
280 algorithm repeatedly looks for two states that have the same set of
281 arcs (same labels pointing to the same nodes) and unifies them, until
282 things stop changing.
283 """
284 changes = True
285 while changes:
286 changes = False
287 for i, state_i in enumerate(self.states):
288 for j in range(i + 1, len(self.states)):
289 state_j = self.states[j]
290 if state_i == state_j:
291 del self.states[j]
292 for state in self.states:
293 state.unifystate(state_j, state_i)
294 changes = True
295 break
296
297 def dump(self, writer=print):
298 """Dump a graphical representation of the DFA"""
299 for i, state in enumerate(self.states):
300 writer(" State", i, state.is_final and "(final)" or "")
301 for label, next in sorted(state.arcs.items()):
302 writer(" %s -> %d" % (label, self.states.index(next)))
303
304
305class DFAState(object):
306 """A state of a DFA
307
308 Attributes:
309 rule_name (rule_name): The name of the DFA rule containing the represented state.
310 nfa_set (Set[NFAState]): The set of NFA states used to create this state.
311 final (bool): True if the state represents an accepting state of the DFA
312 containing this state.
313 arcs (Dict[label, DFAState]): A mapping representing transitions between
314 the current DFA state and another DFA state via following a label.
315 """
316
317 def __init__(self, rule_name, nfa_set, final):
318 assert isinstance(nfa_set, set)
319 assert isinstance(next(iter(nfa_set)), NFAState)
320 assert isinstance(final, NFAState)
321 self.rule_name = rule_name
322 self.nfa_set = nfa_set
323 self.arcs = {} # map from terminals/nonterminals to DFAState
324 self.is_final = final in nfa_set
325
326 def add_arc(self, target, label):
327 """Add a new arc to the current state.
328
329 Parameters:
330 target (DFAState): The DFA state at the end of the arc.
331 label (str): The label respresenting the token that must be consumed
332 to perform this transition.
333 """
334 assert isinstance(label, str)
335 assert label not in self.arcs
336 assert isinstance(target, DFAState)
337 self.arcs[label] = target
338
339 def unifystate(self, old, new):
340 """Replace all arcs from the current node to *old* with *new*.
341
342 Parameters:
343 old (DFAState): The DFA state to remove from all existing arcs.
344 new (DFAState): The DFA state to replace in all existing arcs.
345 """
346 for label, next_ in self.arcs.items():
347 if next_ is old:
348 self.arcs[label] = new
349
350 def __eq__(self, other):
351 # The nfa_set does not matter for equality
352 assert isinstance(other, DFAState)
353 if self.is_final != other.is_final:
354 return False
355 # We cannot just return self.arcs == other.arcs because that
356 # would invoke this method recursively if there are any cycles.
357 if len(self.arcs) != len(other.arcs):
358 return False
359 for label, next_ in self.arcs.items():
360 if next_ is not other.arcs.get(label):
361 return False
362 return True
363
364 __hash__ = None # For Py3 compatibility.
365
366 def __repr__(self):
367 return "<%s: %s is_final=%s>" % (
368 self.__class__.__name__,
369 self.rule_name,
370 self.is_final,
371 )