Pablo Galindo | 1f24a71 | 2019-03-01 15:34:44 -0800 | [diff] [blame] | 1 | import collections |
| 2 | |
Pablo Galindo | 8bc401a | 2019-03-04 07:26:13 +0000 | [diff] [blame] | 3 | |
Pablo Galindo | 1f24a71 | 2019-03-01 15:34:44 -0800 | [diff] [blame] | 4 | class Grammar: |
Pablo Galindo | 8bc401a | 2019-03-04 07:26:13 +0000 | [diff] [blame] | 5 | """Pgen parsing tables class. |
Pablo Galindo | 1f24a71 | 2019-03-01 15:34:44 -0800 | [diff] [blame] | 6 | |
| 7 | The instance variables are as follows: |
| 8 | |
| 9 | symbol2number -- a dict mapping symbol names to numbers. Symbol |
| 10 | numbers are always 256 or higher, to distinguish |
| 11 | them from token numbers, which are between 0 and |
| 12 | 255 (inclusive). |
| 13 | |
| 14 | number2symbol -- a dict mapping numbers to symbol names; |
| 15 | these two are each other's inverse. |
| 16 | |
| 17 | states -- a list of DFAs, where each DFA is a list of |
| 18 | states, each state is a list of arcs, and each |
| 19 | arc is a (i, j) pair where i is a label and j is |
| 20 | a state number. The DFA number is the index into |
| 21 | this list. (This name is slightly confusing.) |
| 22 | Final states are represented by a special arc of |
| 23 | the form (0, j) where j is its own state number. |
| 24 | |
| 25 | dfas -- a dict mapping symbol numbers to (DFA, first) |
| 26 | pairs, where DFA is an item from the states list |
| 27 | above, and first is a set of tokens that can |
Pablo Galindo | 8bc401a | 2019-03-04 07:26:13 +0000 | [diff] [blame] | 28 | begin this grammar rule. |
Pablo Galindo | 1f24a71 | 2019-03-01 15:34:44 -0800 | [diff] [blame] | 29 | |
| 30 | labels -- a list of (x, y) pairs where x is either a token |
| 31 | number or a symbol number, and y is either None |
| 32 | or a string; the strings are keywords. The label |
| 33 | number is the index in this list; label numbers |
| 34 | are used to mark state transitions (arcs) in the |
| 35 | DFAs. |
| 36 | |
| 37 | start -- the number of the grammar's start symbol. |
| 38 | |
| 39 | keywords -- a dict mapping keyword strings to arc labels. |
| 40 | |
| 41 | tokens -- a dict mapping token numbers to arc labels. |
| 42 | |
| 43 | """ |
| 44 | |
| 45 | def __init__(self): |
| 46 | self.symbol2number = collections.OrderedDict() |
| 47 | self.number2symbol = collections.OrderedDict() |
| 48 | self.states = [] |
| 49 | self.dfas = collections.OrderedDict() |
| 50 | self.labels = [(0, "EMPTY")] |
| 51 | self.keywords = collections.OrderedDict() |
| 52 | self.tokens = collections.OrderedDict() |
| 53 | self.symbol2label = collections.OrderedDict() |
| 54 | self.start = 256 |
| 55 | |
| 56 | def produce_graminit_h(self, writer): |
| 57 | writer("/* Generated by Parser/pgen */\n\n") |
| 58 | for number, symbol in self.number2symbol.items(): |
| 59 | writer("#define {} {}\n".format(symbol, number)) |
| 60 | |
| 61 | def produce_graminit_c(self, writer): |
| 62 | writer("/* Generated by Parser/pgen */\n\n") |
| 63 | |
Vinay Sajip | 0b60f64 | 2019-10-15 08:26:12 +0100 | [diff] [blame] | 64 | writer('#include "exports.h"\n') |
Pablo Galindo | 1f24a71 | 2019-03-01 15:34:44 -0800 | [diff] [blame] | 65 | writer('#include "grammar.h"\n') |
Vinay Sajip | 0b60f64 | 2019-10-15 08:26:12 +0100 | [diff] [blame] | 66 | writer("Py_EXPORTED_SYMBOL grammar _PyParser_Grammar;\n") |
Pablo Galindo | 1f24a71 | 2019-03-01 15:34:44 -0800 | [diff] [blame] | 67 | |
| 68 | self.print_dfas(writer) |
| 69 | self.print_labels(writer) |
| 70 | |
Vinay Sajip | 0b60f64 | 2019-10-15 08:26:12 +0100 | [diff] [blame] | 71 | writer("Py_EXPORTED_SYMBOL grammar _PyParser_Grammar = {\n") |
Pablo Galindo | 1f24a71 | 2019-03-01 15:34:44 -0800 | [diff] [blame] | 72 | writer(" {n_dfas},\n".format(n_dfas=len(self.dfas))) |
| 73 | writer(" dfas,\n") |
| 74 | writer(" {{{n_labels}, labels}},\n".format(n_labels=len(self.labels))) |
| 75 | writer(" {start_number}\n".format(start_number=self.start)) |
| 76 | writer("};\n") |
| 77 | |
| 78 | def print_labels(self, writer): |
| 79 | writer( |
Pablo Galindo | 71876fa | 2019-08-22 02:38:39 +0100 | [diff] [blame] | 80 | "static const label labels[{n_labels}] = {{\n".format( |
| 81 | n_labels=len(self.labels) |
| 82 | ) |
Pablo Galindo | 1f24a71 | 2019-03-01 15:34:44 -0800 | [diff] [blame] | 83 | ) |
| 84 | for label, name in self.labels: |
Pablo Galindo | 8bc401a | 2019-03-04 07:26:13 +0000 | [diff] [blame] | 85 | label_name = '"{}"'.format(name) if name is not None else 0 |
| 86 | writer( |
Pablo Galindo | 71876fa | 2019-08-22 02:38:39 +0100 | [diff] [blame] | 87 | " {{{label}, {label_name}}},\n".format( |
Pablo Galindo | 8bc401a | 2019-03-04 07:26:13 +0000 | [diff] [blame] | 88 | label=label, label_name=label_name |
Pablo Galindo | 1f24a71 | 2019-03-01 15:34:44 -0800 | [diff] [blame] | 89 | ) |
Pablo Galindo | 8bc401a | 2019-03-04 07:26:13 +0000 | [diff] [blame] | 90 | ) |
Pablo Galindo | 1f24a71 | 2019-03-01 15:34:44 -0800 | [diff] [blame] | 91 | writer("};\n") |
| 92 | |
| 93 | def print_dfas(self, writer): |
| 94 | self.print_states(writer) |
tyomitch | 84b4784 | 2019-04-23 12:29:57 +0300 | [diff] [blame] | 95 | writer("static const dfa dfas[{}] = {{\n".format(len(self.dfas))) |
Pablo Galindo | 1f24a71 | 2019-03-01 15:34:44 -0800 | [diff] [blame] | 96 | for dfaindex, dfa_elem in enumerate(self.dfas.items()): |
| 97 | symbol, (dfa, first_sets) = dfa_elem |
| 98 | writer( |
| 99 | ' {{{dfa_symbol}, "{symbol_name}", '.format( |
| 100 | dfa_symbol=symbol, symbol_name=self.number2symbol[symbol] |
| 101 | ) |
tyomitch | 1b304f9 | 2019-03-09 17:35:50 +0200 | [diff] [blame] | 102 | + "{n_states}, states_{dfa_index},\n".format( |
Pablo Galindo | 1f24a71 | 2019-03-01 15:34:44 -0800 | [diff] [blame] | 103 | n_states=len(dfa), dfa_index=dfaindex |
| 104 | ) |
Pablo Galindo | 8bc401a | 2019-03-04 07:26:13 +0000 | [diff] [blame] | 105 | + ' "' |
Pablo Galindo | 1f24a71 | 2019-03-01 15:34:44 -0800 | [diff] [blame] | 106 | ) |
Pablo Galindo | 1f24a71 | 2019-03-01 15:34:44 -0800 | [diff] [blame] | 107 | |
Pablo Galindo | 1f24a71 | 2019-03-01 15:34:44 -0800 | [diff] [blame] | 108 | bitset = bytearray((len(self.labels) >> 3) + 1) |
| 109 | for token in first_sets: |
| 110 | bitset[token >> 3] |= 1 << (token & 7) |
| 111 | for byte in bitset: |
| 112 | writer("\\%03o" % (byte & 0xFF)) |
| 113 | writer('"},\n') |
| 114 | writer("};\n") |
| 115 | |
| 116 | def print_states(self, write): |
| 117 | for dfaindex, dfa in enumerate(self.states): |
| 118 | self.print_arcs(write, dfaindex, dfa) |
| 119 | write( |
| 120 | "static state states_{dfa_index}[{n_states}] = {{\n".format( |
| 121 | dfa_index=dfaindex, n_states=len(dfa) |
| 122 | ) |
| 123 | ) |
| 124 | for stateindex, state in enumerate(dfa): |
| 125 | narcs = len(state) |
| 126 | write( |
| 127 | " {{{n_arcs}, arcs_{dfa_index}_{state_index}}},\n".format( |
| 128 | n_arcs=narcs, dfa_index=dfaindex, state_index=stateindex |
| 129 | ) |
| 130 | ) |
| 131 | write("};\n") |
| 132 | |
| 133 | def print_arcs(self, write, dfaindex, states): |
| 134 | for stateindex, state in enumerate(states): |
| 135 | narcs = len(state) |
| 136 | write( |
tyomitch | 84b4784 | 2019-04-23 12:29:57 +0300 | [diff] [blame] | 137 | "static const arc arcs_{dfa_index}_{state_index}[{n_arcs}] = {{\n".format( |
Pablo Galindo | 1f24a71 | 2019-03-01 15:34:44 -0800 | [diff] [blame] | 138 | dfa_index=dfaindex, state_index=stateindex, n_arcs=narcs |
| 139 | ) |
| 140 | ) |
| 141 | for a, b in state: |
| 142 | write( |
| 143 | " {{{from_label}, {to_state}}},\n".format( |
| 144 | from_label=a, to_state=b |
| 145 | ) |
| 146 | ) |
| 147 | write("};\n") |