blob: ce40e160ca886a70672b129be9957846d0ec3505 [file] [log] [blame]
Pablo Galindo1f24a712019-03-01 15:34:44 -08001import collections
2
Pablo Galindo8bc401a2019-03-04 07:26:13 +00003
Pablo Galindo1f24a712019-03-01 15:34:44 -08004class Grammar:
Pablo Galindo8bc401a2019-03-04 07:26:13 +00005 """Pgen parsing tables class.
Pablo Galindo1f24a712019-03-01 15:34:44 -08006
7 The instance variables are as follows:
8
9 symbol2number -- a dict mapping symbol names to numbers. Symbol
10 numbers are always 256 or higher, to distinguish
11 them from token numbers, which are between 0 and
12 255 (inclusive).
13
14 number2symbol -- a dict mapping numbers to symbol names;
15 these two are each other's inverse.
16
17 states -- a list of DFAs, where each DFA is a list of
18 states, each state is a list of arcs, and each
19 arc is a (i, j) pair where i is a label and j is
20 a state number. The DFA number is the index into
21 this list. (This name is slightly confusing.)
22 Final states are represented by a special arc of
23 the form (0, j) where j is its own state number.
24
25 dfas -- a dict mapping symbol numbers to (DFA, first)
26 pairs, where DFA is an item from the states list
27 above, and first is a set of tokens that can
Pablo Galindo8bc401a2019-03-04 07:26:13 +000028 begin this grammar rule.
Pablo Galindo1f24a712019-03-01 15:34:44 -080029
30 labels -- a list of (x, y) pairs where x is either a token
31 number or a symbol number, and y is either None
32 or a string; the strings are keywords. The label
33 number is the index in this list; label numbers
34 are used to mark state transitions (arcs) in the
35 DFAs.
36
37 start -- the number of the grammar's start symbol.
38
39 keywords -- a dict mapping keyword strings to arc labels.
40
41 tokens -- a dict mapping token numbers to arc labels.
42
43 """
44
45 def __init__(self):
46 self.symbol2number = collections.OrderedDict()
47 self.number2symbol = collections.OrderedDict()
48 self.states = []
49 self.dfas = collections.OrderedDict()
50 self.labels = [(0, "EMPTY")]
51 self.keywords = collections.OrderedDict()
52 self.tokens = collections.OrderedDict()
53 self.symbol2label = collections.OrderedDict()
54 self.start = 256
55
56 def produce_graminit_h(self, writer):
57 writer("/* Generated by Parser/pgen */\n\n")
58 for number, symbol in self.number2symbol.items():
59 writer("#define {} {}\n".format(symbol, number))
60
61 def produce_graminit_c(self, writer):
62 writer("/* Generated by Parser/pgen */\n\n")
63
Vinay Sajip0b60f642019-10-15 08:26:12 +010064 writer('#include "exports.h"\n')
Pablo Galindo1f24a712019-03-01 15:34:44 -080065 writer('#include "grammar.h"\n')
Vinay Sajip0b60f642019-10-15 08:26:12 +010066 writer("Py_EXPORTED_SYMBOL grammar _PyParser_Grammar;\n")
Pablo Galindo1f24a712019-03-01 15:34:44 -080067
68 self.print_dfas(writer)
69 self.print_labels(writer)
70
Vinay Sajip0b60f642019-10-15 08:26:12 +010071 writer("Py_EXPORTED_SYMBOL grammar _PyParser_Grammar = {\n")
Pablo Galindo1f24a712019-03-01 15:34:44 -080072 writer(" {n_dfas},\n".format(n_dfas=len(self.dfas)))
73 writer(" dfas,\n")
74 writer(" {{{n_labels}, labels}},\n".format(n_labels=len(self.labels)))
75 writer(" {start_number}\n".format(start_number=self.start))
76 writer("};\n")
77
78 def print_labels(self, writer):
79 writer(
Pablo Galindo71876fa2019-08-22 02:38:39 +010080 "static const label labels[{n_labels}] = {{\n".format(
81 n_labels=len(self.labels)
82 )
Pablo Galindo1f24a712019-03-01 15:34:44 -080083 )
84 for label, name in self.labels:
Pablo Galindo8bc401a2019-03-04 07:26:13 +000085 label_name = '"{}"'.format(name) if name is not None else 0
86 writer(
Pablo Galindo71876fa2019-08-22 02:38:39 +010087 " {{{label}, {label_name}}},\n".format(
Pablo Galindo8bc401a2019-03-04 07:26:13 +000088 label=label, label_name=label_name
Pablo Galindo1f24a712019-03-01 15:34:44 -080089 )
Pablo Galindo8bc401a2019-03-04 07:26:13 +000090 )
Pablo Galindo1f24a712019-03-01 15:34:44 -080091 writer("};\n")
92
93 def print_dfas(self, writer):
94 self.print_states(writer)
tyomitch84b47842019-04-23 12:29:57 +030095 writer("static const dfa dfas[{}] = {{\n".format(len(self.dfas)))
Pablo Galindo1f24a712019-03-01 15:34:44 -080096 for dfaindex, dfa_elem in enumerate(self.dfas.items()):
97 symbol, (dfa, first_sets) = dfa_elem
98 writer(
99 ' {{{dfa_symbol}, "{symbol_name}", '.format(
100 dfa_symbol=symbol, symbol_name=self.number2symbol[symbol]
101 )
tyomitch1b304f92019-03-09 17:35:50 +0200102 + "{n_states}, states_{dfa_index},\n".format(
Pablo Galindo1f24a712019-03-01 15:34:44 -0800103 n_states=len(dfa), dfa_index=dfaindex
104 )
Pablo Galindo8bc401a2019-03-04 07:26:13 +0000105 + ' "'
Pablo Galindo1f24a712019-03-01 15:34:44 -0800106 )
Pablo Galindo1f24a712019-03-01 15:34:44 -0800107
Pablo Galindo1f24a712019-03-01 15:34:44 -0800108 bitset = bytearray((len(self.labels) >> 3) + 1)
109 for token in first_sets:
110 bitset[token >> 3] |= 1 << (token & 7)
111 for byte in bitset:
112 writer("\\%03o" % (byte & 0xFF))
113 writer('"},\n')
114 writer("};\n")
115
116 def print_states(self, write):
117 for dfaindex, dfa in enumerate(self.states):
118 self.print_arcs(write, dfaindex, dfa)
119 write(
120 "static state states_{dfa_index}[{n_states}] = {{\n".format(
121 dfa_index=dfaindex, n_states=len(dfa)
122 )
123 )
124 for stateindex, state in enumerate(dfa):
125 narcs = len(state)
126 write(
127 " {{{n_arcs}, arcs_{dfa_index}_{state_index}}},\n".format(
128 n_arcs=narcs, dfa_index=dfaindex, state_index=stateindex
129 )
130 )
131 write("};\n")
132
133 def print_arcs(self, write, dfaindex, states):
134 for stateindex, state in enumerate(states):
135 narcs = len(state)
136 write(
tyomitch84b47842019-04-23 12:29:57 +0300137 "static const arc arcs_{dfa_index}_{state_index}[{n_arcs}] = {{\n".format(
Pablo Galindo1f24a712019-03-01 15:34:44 -0800138 dfa_index=dfaindex, state_index=stateindex, n_arcs=narcs
139 )
140 )
141 for a, b in state:
142 write(
143 " {{{from_label}, {to_state}}},\n".format(
144 from_label=a, to_state=b
145 )
146 )
147 write("};\n")