| # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. |
| # Licensed to PSF under a Contributor Agreement. |
| |
| """This module defines the data structures used to represent a grammar. |
| |
| These are a bit arcane because they are derived from the data |
| structures used by Python's 'pgen' parser generator. |
| |
| There's also a table here mapping operators to their names in the |
| token module; the Python tokenize module reports all operators as the |
| fallback token code OP, but the parser needs the actual token code. |
| |
| """ |
| |
| # Python imports |
| import pickle |
| |
| # Local imports |
| from . import token, tokenize |
| |
| |
| class Grammar(object): |
| """Pgen parsing tables tables conversion class. |
| |
| Once initialized, this class supplies the grammar tables for the |
| parsing engine implemented by parse.py. The parsing engine |
| accesses the instance variables directly. The class here does not |
| provide initialization of the tables; several subclasses exist to |
| do this (see the conv and pgen modules). |
| |
| The load() method reads the tables from a pickle file, which is |
| much faster than the other ways offered by subclasses. The pickle |
| file is written by calling dump() (after loading the grammar |
| tables using a subclass). The report() method prints a readable |
| representation of the tables to stdout, for debugging. |
| |
| The instance variables are as follows: |
| |
| symbol2number -- a dict mapping symbol names to numbers. Symbol |
| numbers are always 256 or higher, to distinguish |
| them from token numbers, which are between 0 and |
| 255 (inclusive). |
| |
| number2symbol -- a dict mapping numbers to symbol names; |
| these two are each other's inverse. |
| |
| states -- a list of DFAs, where each DFA is a list of |
| states, each state is is a list of arcs, and each |
| arc is a (i, j) pair where i is a label and j is |
| a state number. The DFA number is the index into |
| this list. (This name is slightly confusing.) |
| Final states are represented by a special arc of |
| the form (0, j) where j is its own state number. |
| |
| dfas -- a dict mapping symbol numbers to (DFA, first) |
| pairs, where DFA is an item from the states list |
| above, and first is a set of tokens that can |
| begin this grammar rule (represented by a dict |
| whose values are always 1). |
| |
| labels -- a list of (x, y) pairs where x is either a token |
| number or a symbol number, and y is either None |
| or a string; the strings are keywords. The label |
| number is the index in this list; label numbers |
| are used to mark state transitions (arcs) in the |
| DFAs. |
| |
| start -- the number of the grammar's start symbol. |
| |
| keywords -- a dict mapping keyword strings to arc labels. |
| |
| tokens -- a dict mapping token numbers to arc labels. |
| |
| """ |
| |
| def __init__(self): |
| self.symbol2number = {} |
| self.number2symbol = {} |
| self.states = [] |
| self.dfas = {} |
| self.labels = [(0, "EMPTY")] |
| self.keywords = {} |
| self.tokens = {} |
| self.symbol2label = {} |
| self.start = 256 |
| |
| def dump(self, filename): |
| """Dump the grammar tables to a pickle file.""" |
| f = open(filename, "wb") |
| pickle.dump(self.__dict__, f, 2) |
| f.close() |
| |
| def load(self, filename): |
| """Load the grammar tables from a pickle file.""" |
| f = open(filename, "rb") |
| d = pickle.load(f) |
| f.close() |
| self.__dict__.update(d) |
| |
| def copy(self): |
| """ |
| Copy the grammar. |
| """ |
| new = self.__class__() |
| for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords", |
| "tokens", "symbol2label"): |
| setattr(new, dict_attr, getattr(self, dict_attr).copy()) |
| new.labels = self.labels[:] |
| new.states = self.states[:] |
| new.start = self.start |
| return new |
| |
| def report(self): |
| """Dump the grammar tables to standard output, for debugging.""" |
| from pprint import pprint |
| print("s2n") |
| pprint(self.symbol2number) |
| print("n2s") |
| pprint(self.number2symbol) |
| print("states") |
| pprint(self.states) |
| print("dfas") |
| pprint(self.dfas) |
| print("labels") |
| pprint(self.labels) |
| print("start", self.start) |
| |
| |
| # Map from operator to number (since tokenize doesn't do this) |
| |
| opmap_raw = """ |
| ( LPAR |
| ) RPAR |
| [ LSQB |
| ] RSQB |
| : COLON |
| , COMMA |
| ; SEMI |
| + PLUS |
| - MINUS |
| * STAR |
| / SLASH |
| | VBAR |
| & AMPER |
| < LESS |
| > GREATER |
| = EQUAL |
| . DOT |
| % PERCENT |
| ` BACKQUOTE |
| { LBRACE |
| } RBRACE |
| @ AT |
| == EQEQUAL |
| != NOTEQUAL |
| <> NOTEQUAL |
| <= LESSEQUAL |
| >= GREATEREQUAL |
| ~ TILDE |
| ^ CIRCUMFLEX |
| << LEFTSHIFT |
| >> RIGHTSHIFT |
| ** DOUBLESTAR |
| += PLUSEQUAL |
| -= MINEQUAL |
| *= STAREQUAL |
| /= SLASHEQUAL |
| %= PERCENTEQUAL |
| &= AMPEREQUAL |
| |= VBAREQUAL |
| ^= CIRCUMFLEXEQUAL |
| <<= LEFTSHIFTEQUAL |
| >>= RIGHTSHIFTEQUAL |
| **= DOUBLESTAREQUAL |
| // DOUBLESLASH |
| //= DOUBLESLASHEQUAL |
| -> RARROW |
| """ |
| |
| opmap = {} |
| for line in opmap_raw.splitlines(): |
| if line: |
| op, name = line.split() |
| opmap[op] = getattr(token, name) |