| # Copyright 2006 Google, Inc. All Rights Reserved. |
| # Licensed to PSF under a Contributor Agreement. |
| |
| """Pattern compiler. |
| |
| The grammer is taken from PatternGrammar.txt. |
| |
| The compiler compiles a pattern to a pytree.*Pattern instance. |
| """ |
| |
| __author__ = "Guido van Rossum <guido@python.org>" |
| |
| # Python imports |
| import os |
| |
| # Fairly local imports |
| from .pgen2 import driver, literals, token, tokenize, parse, grammar |
| |
| # Really local imports |
| from . import pytree |
| from . import pygram |
| |
| # The pattern grammar file |
| _PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), |
| "PatternGrammar.txt") |
| |
| |
| class PatternSyntaxError(Exception): |
| pass |
| |
| |
| def tokenize_wrapper(input): |
| """Tokenizes a string suppressing significant whitespace.""" |
| skip = set((token.NEWLINE, token.INDENT, token.DEDENT)) |
| tokens = tokenize.generate_tokens(driver.generate_lines(input).__next__) |
| for quintuple in tokens: |
| type, value, start, end, line_text = quintuple |
| if type not in skip: |
| yield quintuple |
| |
| |
| class PatternCompiler(object): |
| |
| def __init__(self, grammar_file=_PATTERN_GRAMMAR_FILE): |
| """Initializer. |
| |
| Takes an optional alternative filename for the pattern grammar. |
| """ |
| self.grammar = driver.load_grammar(grammar_file) |
| self.syms = pygram.Symbols(self.grammar) |
| self.pygrammar = pygram.python_grammar |
| self.pysyms = pygram.python_symbols |
| self.driver = driver.Driver(self.grammar, convert=pattern_convert) |
| |
| def compile_pattern(self, input, debug=False): |
| """Compiles a pattern string to a nested pytree.*Pattern object.""" |
| tokens = tokenize_wrapper(input) |
| try: |
| root = self.driver.parse_tokens(tokens, debug=debug) |
| except parse.ParseError as e: |
| raise PatternSyntaxError(str(e)) |
| return self.compile_node(root) |
| |
| def compile_node(self, node): |
| """Compiles a node, recursively. |
| |
| This is one big switch on the node type. |
| """ |
| # XXX Optimize certain Wildcard-containing-Wildcard patterns |
| # that can be merged |
| if node.type == self.syms.Matcher: |
| node = node.children[0] # Avoid unneeded recursion |
| |
| if node.type == self.syms.Alternatives: |
| # Skip the odd children since they are just '|' tokens |
| alts = [self.compile_node(ch) for ch in node.children[::2]] |
| if len(alts) == 1: |
| return alts[0] |
| p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1) |
| return p.optimize() |
| |
| if node.type == self.syms.Alternative: |
| units = [self.compile_node(ch) for ch in node.children] |
| if len(units) == 1: |
| return units[0] |
| p = pytree.WildcardPattern([units], min=1, max=1) |
| return p.optimize() |
| |
| if node.type == self.syms.NegatedUnit: |
| pattern = self.compile_basic(node.children[1:]) |
| p = pytree.NegatedPattern(pattern) |
| return p.optimize() |
| |
| assert node.type == self.syms.Unit |
| |
| name = None |
| nodes = node.children |
| if len(nodes) >= 3 and nodes[1].type == token.EQUAL: |
| name = nodes[0].value |
| nodes = nodes[2:] |
| repeat = None |
| if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater: |
| repeat = nodes[-1] |
| nodes = nodes[:-1] |
| |
| # Now we've reduced it to: STRING | NAME [Details] | (...) | [...] |
| pattern = self.compile_basic(nodes, repeat) |
| |
| if repeat is not None: |
| assert repeat.type == self.syms.Repeater |
| children = repeat.children |
| child = children[0] |
| if child.type == token.STAR: |
| min = 0 |
| max = pytree.HUGE |
| elif child.type == token.PLUS: |
| min = 1 |
| max = pytree.HUGE |
| elif child.type == token.LBRACE: |
| assert children[-1].type == token.RBRACE |
| assert len(children) in (3, 5) |
| min = max = self.get_int(children[1]) |
| if len(children) == 5: |
| max = self.get_int(children[3]) |
| else: |
| assert False |
| if min != 1 or max != 1: |
| pattern = pattern.optimize() |
| pattern = pytree.WildcardPattern([[pattern]], min=min, max=max) |
| |
| if name is not None: |
| pattern.name = name |
| return pattern.optimize() |
| |
| def compile_basic(self, nodes, repeat=None): |
| # Compile STRING | NAME [Details] | (...) | [...] |
| assert len(nodes) >= 1 |
| node = nodes[0] |
| if node.type == token.STRING: |
| value = str(literals.evalString(node.value)) |
| return pytree.LeafPattern(_type_of_literal(value), value) |
| elif node.type == token.NAME: |
| value = node.value |
| if value.isupper(): |
| if value not in TOKEN_MAP: |
| raise PatternSyntaxError("Invalid token: %r" % value) |
| if nodes[1:]: |
| raise PatternSyntaxError("Can't have details for token") |
| return pytree.LeafPattern(TOKEN_MAP[value]) |
| else: |
| if value == "any": |
| type = None |
| elif not value.startswith("_"): |
| type = getattr(self.pysyms, value, None) |
| if type is None: |
| raise PatternSyntaxError("Invalid symbol: %r" % value) |
| if nodes[1:]: # Details present |
| content = [self.compile_node(nodes[1].children[1])] |
| else: |
| content = None |
| return pytree.NodePattern(type, content) |
| elif node.value == "(": |
| return self.compile_node(nodes[1]) |
| elif node.value == "[": |
| assert repeat is None |
| subpattern = self.compile_node(nodes[1]) |
| return pytree.WildcardPattern([[subpattern]], min=0, max=1) |
| assert False, node |
| |
| def get_int(self, node): |
| assert node.type == token.NUMBER |
| return int(node.value) |
| |
| |
| # Map named tokens to the type value for a LeafPattern |
| TOKEN_MAP = {"NAME": token.NAME, |
| "STRING": token.STRING, |
| "NUMBER": token.NUMBER, |
| "TOKEN": None} |
| |
| |
| def _type_of_literal(value): |
| if value[0].isalpha(): |
| return token.NAME |
| elif value in grammar.opmap: |
| return grammar.opmap[value] |
| else: |
| return None |
| |
| |
| def pattern_convert(grammar, raw_node_info): |
| """Converts raw node information to a Node or Leaf instance.""" |
| type, value, context, children = raw_node_info |
| if children or type in grammar.number2symbol: |
| return pytree.Node(type, children, context=context) |
| else: |
| return pytree.Leaf(type, value, context=context) |
| |
| |
| def compile_pattern(pattern): |
| return PatternCompiler().compile_pattern(pattern) |