blob: 0a259e90afd83844c1311de5e2f379e88af05e61 [file] [log] [blame]
Martin v. Löwisef04c442008-03-19 05:04:44 +00001# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4"""Pattern compiler.
5
6The grammer is taken from PatternGrammar.txt.
7
8The compiler compiles a pattern to a pytree.*Pattern instance.
9"""
10
11__author__ = "Guido van Rossum <guido@python.org>"
12
13# Python imports
Benjamin Peterson2e7965e2011-02-26 22:12:10 +000014import io
Martin v. Löwisef04c442008-03-19 05:04:44 +000015import os
16
17# Fairly local imports
Benjamin Peterson3059b002009-07-20 16:42:03 +000018from .pgen2 import driver, literals, token, tokenize, parse, grammar
Martin v. Löwisef04c442008-03-19 05:04:44 +000019
20# Really local imports
21from . import pytree
22from . import pygram
23
24# The pattern grammar file
25_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
26 "PatternGrammar.txt")
27
28
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +000029class PatternSyntaxError(Exception):
30 pass
31
32
Martin v. Löwisef04c442008-03-19 05:04:44 +000033def tokenize_wrapper(input):
34 """Tokenizes a string suppressing significant whitespace."""
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000035 skip = set((token.NEWLINE, token.INDENT, token.DEDENT))
Benjamin Peterson2e7965e2011-02-26 22:12:10 +000036 tokens = tokenize.generate_tokens(io.StringIO(input).readline)
Martin v. Löwisef04c442008-03-19 05:04:44 +000037 for quintuple in tokens:
38 type, value, start, end, line_text = quintuple
39 if type not in skip:
40 yield quintuple
41
42
43class PatternCompiler(object):
44
45 def __init__(self, grammar_file=_PATTERN_GRAMMAR_FILE):
46 """Initializer.
47
48 Takes an optional alternative filename for the pattern grammar.
49 """
50 self.grammar = driver.load_grammar(grammar_file)
51 self.syms = pygram.Symbols(self.grammar)
52 self.pygrammar = pygram.python_grammar
53 self.pysyms = pygram.python_symbols
54 self.driver = driver.Driver(self.grammar, convert=pattern_convert)
55
Benjamin Petersonf37eb3a2010-10-14 23:00:04 +000056 def compile_pattern(self, input, debug=False, with_tree=False):
Martin v. Löwisef04c442008-03-19 05:04:44 +000057 """Compiles a pattern string to a nested pytree.*Pattern object."""
58 tokens = tokenize_wrapper(input)
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +000059 try:
60 root = self.driver.parse_tokens(tokens, debug=debug)
61 except parse.ParseError as e:
62 raise PatternSyntaxError(str(e))
Benjamin Petersonf37eb3a2010-10-14 23:00:04 +000063 if with_tree:
64 return self.compile_node(root), root
65 else:
66 return self.compile_node(root)
Martin v. Löwisef04c442008-03-19 05:04:44 +000067
68 def compile_node(self, node):
69 """Compiles a node, recursively.
70
71 This is one big switch on the node type.
72 """
73 # XXX Optimize certain Wildcard-containing-Wildcard patterns
74 # that can be merged
75 if node.type == self.syms.Matcher:
76 node = node.children[0] # Avoid unneeded recursion
77
78 if node.type == self.syms.Alternatives:
79 # Skip the odd children since they are just '|' tokens
80 alts = [self.compile_node(ch) for ch in node.children[::2]]
81 if len(alts) == 1:
82 return alts[0]
83 p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
84 return p.optimize()
85
86 if node.type == self.syms.Alternative:
87 units = [self.compile_node(ch) for ch in node.children]
88 if len(units) == 1:
89 return units[0]
90 p = pytree.WildcardPattern([units], min=1, max=1)
91 return p.optimize()
92
93 if node.type == self.syms.NegatedUnit:
94 pattern = self.compile_basic(node.children[1:])
95 p = pytree.NegatedPattern(pattern)
96 return p.optimize()
97
98 assert node.type == self.syms.Unit
99
100 name = None
101 nodes = node.children
102 if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
103 name = nodes[0].value
104 nodes = nodes[2:]
105 repeat = None
106 if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
107 repeat = nodes[-1]
108 nodes = nodes[:-1]
109
110 # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
111 pattern = self.compile_basic(nodes, repeat)
112
113 if repeat is not None:
114 assert repeat.type == self.syms.Repeater
115 children = repeat.children
116 child = children[0]
117 if child.type == token.STAR:
118 min = 0
119 max = pytree.HUGE
120 elif child.type == token.PLUS:
121 min = 1
122 max = pytree.HUGE
123 elif child.type == token.LBRACE:
124 assert children[-1].type == token.RBRACE
125 assert len(children) in (3, 5)
126 min = max = self.get_int(children[1])
127 if len(children) == 5:
128 max = self.get_int(children[3])
129 else:
130 assert False
131 if min != 1 or max != 1:
132 pattern = pattern.optimize()
133 pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
134
135 if name is not None:
136 pattern.name = name
137 return pattern.optimize()
138
139 def compile_basic(self, nodes, repeat=None):
140 # Compile STRING | NAME [Details] | (...) | [...]
141 assert len(nodes) >= 1
142 node = nodes[0]
143 if node.type == token.STRING:
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000144 value = str(literals.evalString(node.value))
Benjamin Peterson3059b002009-07-20 16:42:03 +0000145 return pytree.LeafPattern(_type_of_literal(value), value)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000146 elif node.type == token.NAME:
147 value = node.value
148 if value.isupper():
149 if value not in TOKEN_MAP:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000150 raise PatternSyntaxError("Invalid token: %r" % value)
151 if nodes[1:]:
152 raise PatternSyntaxError("Can't have details for token")
Martin v. Löwisef04c442008-03-19 05:04:44 +0000153 return pytree.LeafPattern(TOKEN_MAP[value])
154 else:
155 if value == "any":
156 type = None
157 elif not value.startswith("_"):
158 type = getattr(self.pysyms, value, None)
159 if type is None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000160 raise PatternSyntaxError("Invalid symbol: %r" % value)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000161 if nodes[1:]: # Details present
162 content = [self.compile_node(nodes[1].children[1])]
163 else:
164 content = None
165 return pytree.NodePattern(type, content)
166 elif node.value == "(":
167 return self.compile_node(nodes[1])
168 elif node.value == "[":
169 assert repeat is None
170 subpattern = self.compile_node(nodes[1])
171 return pytree.WildcardPattern([[subpattern]], min=0, max=1)
172 assert False, node
173
174 def get_int(self, node):
175 assert node.type == token.NUMBER
176 return int(node.value)
177
178
179# Map named tokens to the type value for a LeafPattern
180TOKEN_MAP = {"NAME": token.NAME,
181 "STRING": token.STRING,
182 "NUMBER": token.NUMBER,
183 "TOKEN": None}
184
185
Benjamin Peterson3059b002009-07-20 16:42:03 +0000186def _type_of_literal(value):
187 if value[0].isalpha():
188 return token.NAME
189 elif value in grammar.opmap:
190 return grammar.opmap[value]
191 else:
192 return None
193
194
Martin v. Löwisef04c442008-03-19 05:04:44 +0000195def pattern_convert(grammar, raw_node_info):
196 """Converts raw node information to a Node or Leaf instance."""
197 type, value, context, children = raw_node_info
198 if children or type in grammar.number2symbol:
199 return pytree.Node(type, children, context=context)
200 else:
201 return pytree.Leaf(type, value, context=context)
202
203
204def compile_pattern(pattern):
205 return PatternCompiler().compile_pattern(pattern)