blob: bd405e674e447e136d83f3d1e4a7eb5792145425 [file] [log] [blame]
Pablo Galindo1f24a712019-03-01 15:34:44 -08001import collections
2
3class Grammar:
4 """Pgen parsing tables conversion class.
5
6 Once initialized, this class supplies the grammar tables for the
7 parsing engine implemented by parse.py. The parsing engine
8 accesses the instance variables directly. The class here does not
9 provide initialization of the tables; several subclasses exist to
10 do this (see the conv and pgen modules).
11
12 The load() method reads the tables from a pickle file, which is
13 much faster than the other ways offered by subclasses. The pickle
14 file is written by calling dump() (after loading the grammar
15 tables using a subclass). The report() method prints a readable
16 representation of the tables to stdout, for debugging.
17
18 The instance variables are as follows:
19
20 symbol2number -- a dict mapping symbol names to numbers. Symbol
21 numbers are always 256 or higher, to distinguish
22 them from token numbers, which are between 0 and
23 255 (inclusive).
24
25 number2symbol -- a dict mapping numbers to symbol names;
26 these two are each other's inverse.
27
28 states -- a list of DFAs, where each DFA is a list of
29 states, each state is a list of arcs, and each
30 arc is a (i, j) pair where i is a label and j is
31 a state number. The DFA number is the index into
32 this list. (This name is slightly confusing.)
33 Final states are represented by a special arc of
34 the form (0, j) where j is its own state number.
35
36 dfas -- a dict mapping symbol numbers to (DFA, first)
37 pairs, where DFA is an item from the states list
38 above, and first is a set of tokens that can
39 begin this grammar rule (represented by a dict
40 whose values are always 1).
41
42 labels -- a list of (x, y) pairs where x is either a token
43 number or a symbol number, and y is either None
44 or a string; the strings are keywords. The label
45 number is the index in this list; label numbers
46 are used to mark state transitions (arcs) in the
47 DFAs.
48
49 start -- the number of the grammar's start symbol.
50
51 keywords -- a dict mapping keyword strings to arc labels.
52
53 tokens -- a dict mapping token numbers to arc labels.
54
55 """
56
57 def __init__(self):
58 self.symbol2number = collections.OrderedDict()
59 self.number2symbol = collections.OrderedDict()
60 self.states = []
61 self.dfas = collections.OrderedDict()
62 self.labels = [(0, "EMPTY")]
63 self.keywords = collections.OrderedDict()
64 self.tokens = collections.OrderedDict()
65 self.symbol2label = collections.OrderedDict()
66 self.start = 256
67
68 def produce_graminit_h(self, writer):
69 writer("/* Generated by Parser/pgen */\n\n")
70 for number, symbol in self.number2symbol.items():
71 writer("#define {} {}\n".format(symbol, number))
72
73 def produce_graminit_c(self, writer):
74 writer("/* Generated by Parser/pgen */\n\n")
75
76 writer('#include "pgenheaders.h"\n')
77 writer('#include "grammar.h"\n')
78 writer("grammar _PyParser_Grammar;\n")
79
80 self.print_dfas(writer)
81 self.print_labels(writer)
82
83 writer("grammar _PyParser_Grammar = {\n")
84 writer(" {n_dfas},\n".format(n_dfas=len(self.dfas)))
85 writer(" dfas,\n")
86 writer(" {{{n_labels}, labels}},\n".format(n_labels=len(self.labels)))
87 writer(" {start_number}\n".format(start_number=self.start))
88 writer("};\n")
89
90 def print_labels(self, writer):
91 writer(
92 "static label labels[{n_labels}] = {{\n".format(n_labels=len(self.labels))
93 )
94 for label, name in self.labels:
95 if name is None:
96 writer(" {{{label}, 0}},\n".format(label=label))
97 else:
98 writer(
99 ' {{{label}, "{label_name}"}},\n'.format(
100 label=label, label_name=name
101 )
102 )
103 writer("};\n")
104
105 def print_dfas(self, writer):
106 self.print_states(writer)
107 writer("static dfa dfas[{}] = {{\n".format(len(self.dfas)))
108 for dfaindex, dfa_elem in enumerate(self.dfas.items()):
109 symbol, (dfa, first_sets) = dfa_elem
110 writer(
111 ' {{{dfa_symbol}, "{symbol_name}", '.format(
112 dfa_symbol=symbol, symbol_name=self.number2symbol[symbol]
113 )
114 + "0, {n_states}, states_{dfa_index},\n".format(
115 n_states=len(dfa), dfa_index=dfaindex
116 )
117 )
118 writer(' "')
119
120 k = [name for label, name in self.labels if label in first_sets]
121 bitset = bytearray((len(self.labels) >> 3) + 1)
122 for token in first_sets:
123 bitset[token >> 3] |= 1 << (token & 7)
124 for byte in bitset:
125 writer("\\%03o" % (byte & 0xFF))
126 writer('"},\n')
127 writer("};\n")
128
129 def print_states(self, write):
130 for dfaindex, dfa in enumerate(self.states):
131 self.print_arcs(write, dfaindex, dfa)
132 write(
133 "static state states_{dfa_index}[{n_states}] = {{\n".format(
134 dfa_index=dfaindex, n_states=len(dfa)
135 )
136 )
137 for stateindex, state in enumerate(dfa):
138 narcs = len(state)
139 write(
140 " {{{n_arcs}, arcs_{dfa_index}_{state_index}}},\n".format(
141 n_arcs=narcs, dfa_index=dfaindex, state_index=stateindex
142 )
143 )
144 write("};\n")
145
146 def print_arcs(self, write, dfaindex, states):
147 for stateindex, state in enumerate(states):
148 narcs = len(state)
149 write(
150 "static arc arcs_{dfa_index}_{state_index}[{n_arcs}] = {{\n".format(
151 dfa_index=dfaindex, state_index=stateindex, n_arcs=narcs
152 )
153 )
154 for a, b in state:
155 write(
156 " {{{from_label}, {to_state}}},\n".format(
157 from_label=a, to_state=b
158 )
159 )
160 write("};\n")