blob: f9031258960b0070a0f490bade3382324219fec7 [file] [log] [blame]
Martin v. Löwisef04c442008-03-19 05:04:44 +00001# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4"""Pattern compiler.
5
6The grammer is taken from PatternGrammar.txt.
7
8The compiler compiles a pattern to a pytree.*Pattern instance.
9"""
10
11__author__ = "Guido van Rossum <guido@python.org>"
12
13# Python imports
14import os
15
16# Fairly local imports
17from .pgen2 import driver
18from .pgen2 import literals
19from .pgen2 import token
20from .pgen2 import tokenize
21
22# Really local imports
23from . import pytree
24from . import pygram
25
26# The pattern grammar file
27_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
28 "PatternGrammar.txt")
29
30
31def tokenize_wrapper(input):
32 """Tokenizes a string suppressing significant whitespace."""
33 skip = (token.NEWLINE, token.INDENT, token.DEDENT)
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +000034 tokens = tokenize.generate_tokens(driver.generate_lines(input).__next__)
Martin v. Löwisef04c442008-03-19 05:04:44 +000035 for quintuple in tokens:
36 type, value, start, end, line_text = quintuple
37 if type not in skip:
38 yield quintuple
39
40
41class PatternCompiler(object):
42
43 def __init__(self, grammar_file=_PATTERN_GRAMMAR_FILE):
44 """Initializer.
45
46 Takes an optional alternative filename for the pattern grammar.
47 """
48 self.grammar = driver.load_grammar(grammar_file)
49 self.syms = pygram.Symbols(self.grammar)
50 self.pygrammar = pygram.python_grammar
51 self.pysyms = pygram.python_symbols
52 self.driver = driver.Driver(self.grammar, convert=pattern_convert)
53
54 def compile_pattern(self, input, debug=False):
55 """Compiles a pattern string to a nested pytree.*Pattern object."""
56 tokens = tokenize_wrapper(input)
57 root = self.driver.parse_tokens(tokens, debug=debug)
58 return self.compile_node(root)
59
60 def compile_node(self, node):
61 """Compiles a node, recursively.
62
63 This is one big switch on the node type.
64 """
65 # XXX Optimize certain Wildcard-containing-Wildcard patterns
66 # that can be merged
67 if node.type == self.syms.Matcher:
68 node = node.children[0] # Avoid unneeded recursion
69
70 if node.type == self.syms.Alternatives:
71 # Skip the odd children since they are just '|' tokens
72 alts = [self.compile_node(ch) for ch in node.children[::2]]
73 if len(alts) == 1:
74 return alts[0]
75 p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
76 return p.optimize()
77
78 if node.type == self.syms.Alternative:
79 units = [self.compile_node(ch) for ch in node.children]
80 if len(units) == 1:
81 return units[0]
82 p = pytree.WildcardPattern([units], min=1, max=1)
83 return p.optimize()
84
85 if node.type == self.syms.NegatedUnit:
86 pattern = self.compile_basic(node.children[1:])
87 p = pytree.NegatedPattern(pattern)
88 return p.optimize()
89
90 assert node.type == self.syms.Unit
91
92 name = None
93 nodes = node.children
94 if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
95 name = nodes[0].value
96 nodes = nodes[2:]
97 repeat = None
98 if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
99 repeat = nodes[-1]
100 nodes = nodes[:-1]
101
102 # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
103 pattern = self.compile_basic(nodes, repeat)
104
105 if repeat is not None:
106 assert repeat.type == self.syms.Repeater
107 children = repeat.children
108 child = children[0]
109 if child.type == token.STAR:
110 min = 0
111 max = pytree.HUGE
112 elif child.type == token.PLUS:
113 min = 1
114 max = pytree.HUGE
115 elif child.type == token.LBRACE:
116 assert children[-1].type == token.RBRACE
117 assert len(children) in (3, 5)
118 min = max = self.get_int(children[1])
119 if len(children) == 5:
120 max = self.get_int(children[3])
121 else:
122 assert False
123 if min != 1 or max != 1:
124 pattern = pattern.optimize()
125 pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
126
127 if name is not None:
128 pattern.name = name
129 return pattern.optimize()
130
131 def compile_basic(self, nodes, repeat=None):
132 # Compile STRING | NAME [Details] | (...) | [...]
133 assert len(nodes) >= 1
134 node = nodes[0]
135 if node.type == token.STRING:
136 value = literals.evalString(node.value)
137 return pytree.LeafPattern(content=value)
138 elif node.type == token.NAME:
139 value = node.value
140 if value.isupper():
141 if value not in TOKEN_MAP:
142 raise SyntaxError("Invalid token: %r" % value)
143 return pytree.LeafPattern(TOKEN_MAP[value])
144 else:
145 if value == "any":
146 type = None
147 elif not value.startswith("_"):
148 type = getattr(self.pysyms, value, None)
149 if type is None:
150 raise SyntaxError("Invalid symbol: %r" % value)
151 if nodes[1:]: # Details present
152 content = [self.compile_node(nodes[1].children[1])]
153 else:
154 content = None
155 return pytree.NodePattern(type, content)
156 elif node.value == "(":
157 return self.compile_node(nodes[1])
158 elif node.value == "[":
159 assert repeat is None
160 subpattern = self.compile_node(nodes[1])
161 return pytree.WildcardPattern([[subpattern]], min=0, max=1)
162 assert False, node
163
164 def get_int(self, node):
165 assert node.type == token.NUMBER
166 return int(node.value)
167
168
169# Map named tokens to the type value for a LeafPattern
170TOKEN_MAP = {"NAME": token.NAME,
171 "STRING": token.STRING,
172 "NUMBER": token.NUMBER,
173 "TOKEN": None}
174
175
176def pattern_convert(grammar, raw_node_info):
177 """Converts raw node information to a Node or Leaf instance."""
178 type, value, context, children = raw_node_info
179 if children or type in grammar.number2symbol:
180 return pytree.Node(type, children, context=context)
181 else:
182 return pytree.Leaf(type, value, context=context)
183
184
185def compile_pattern(pattern):
186 return PatternCompiler().compile_pattern(pattern)