blob: bb538d537788507ffebc3fd7a4e2f1ab64678b36 [file] [log] [blame]
Martin v. Löwisef04c442008-03-19 05:04:44 +00001# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4"""Pattern compiler.
5
6The grammer is taken from PatternGrammar.txt.
7
8The compiler compiles a pattern to a pytree.*Pattern instance.
9"""
10
11__author__ = "Guido van Rossum <guido@python.org>"
12
13# Python imports
14import os
15
16# Fairly local imports
Benjamin Peterson3059b002009-07-20 16:42:03 +000017from .pgen2 import driver, literals, token, tokenize, parse, grammar
Martin v. Löwisef04c442008-03-19 05:04:44 +000018
19# Really local imports
20from . import pytree
21from . import pygram
22
23# The pattern grammar file
24_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
25 "PatternGrammar.txt")
26
27
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +000028class PatternSyntaxError(Exception):
29 pass
30
31
Martin v. Löwisef04c442008-03-19 05:04:44 +000032def tokenize_wrapper(input):
33 """Tokenizes a string suppressing significant whitespace."""
Benjamin Peterson608d8bc2009-05-05 23:23:31 +000034 skip = set((token.NEWLINE, token.INDENT, token.DEDENT))
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +000035 tokens = tokenize.generate_tokens(driver.generate_lines(input).__next__)
Martin v. Löwisef04c442008-03-19 05:04:44 +000036 for quintuple in tokens:
37 type, value, start, end, line_text = quintuple
38 if type not in skip:
39 yield quintuple
40
41
42class PatternCompiler(object):
43
44 def __init__(self, grammar_file=_PATTERN_GRAMMAR_FILE):
45 """Initializer.
46
47 Takes an optional alternative filename for the pattern grammar.
48 """
49 self.grammar = driver.load_grammar(grammar_file)
50 self.syms = pygram.Symbols(self.grammar)
51 self.pygrammar = pygram.python_grammar
52 self.pysyms = pygram.python_symbols
53 self.driver = driver.Driver(self.grammar, convert=pattern_convert)
54
Benjamin Petersonf37eb3a2010-10-14 23:00:04 +000055 def compile_pattern(self, input, debug=False, with_tree=False):
Martin v. Löwisef04c442008-03-19 05:04:44 +000056 """Compiles a pattern string to a nested pytree.*Pattern object."""
57 tokens = tokenize_wrapper(input)
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +000058 try:
59 root = self.driver.parse_tokens(tokens, debug=debug)
60 except parse.ParseError as e:
61 raise PatternSyntaxError(str(e))
Benjamin Petersonf37eb3a2010-10-14 23:00:04 +000062 if with_tree:
63 return self.compile_node(root), root
64 else:
65 return self.compile_node(root)
Martin v. Löwisef04c442008-03-19 05:04:44 +000066
67 def compile_node(self, node):
68 """Compiles a node, recursively.
69
70 This is one big switch on the node type.
71 """
72 # XXX Optimize certain Wildcard-containing-Wildcard patterns
73 # that can be merged
74 if node.type == self.syms.Matcher:
75 node = node.children[0] # Avoid unneeded recursion
76
77 if node.type == self.syms.Alternatives:
78 # Skip the odd children since they are just '|' tokens
79 alts = [self.compile_node(ch) for ch in node.children[::2]]
80 if len(alts) == 1:
81 return alts[0]
82 p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
83 return p.optimize()
84
85 if node.type == self.syms.Alternative:
86 units = [self.compile_node(ch) for ch in node.children]
87 if len(units) == 1:
88 return units[0]
89 p = pytree.WildcardPattern([units], min=1, max=1)
90 return p.optimize()
91
92 if node.type == self.syms.NegatedUnit:
93 pattern = self.compile_basic(node.children[1:])
94 p = pytree.NegatedPattern(pattern)
95 return p.optimize()
96
97 assert node.type == self.syms.Unit
98
99 name = None
100 nodes = node.children
101 if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
102 name = nodes[0].value
103 nodes = nodes[2:]
104 repeat = None
105 if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
106 repeat = nodes[-1]
107 nodes = nodes[:-1]
108
109 # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
110 pattern = self.compile_basic(nodes, repeat)
111
112 if repeat is not None:
113 assert repeat.type == self.syms.Repeater
114 children = repeat.children
115 child = children[0]
116 if child.type == token.STAR:
117 min = 0
118 max = pytree.HUGE
119 elif child.type == token.PLUS:
120 min = 1
121 max = pytree.HUGE
122 elif child.type == token.LBRACE:
123 assert children[-1].type == token.RBRACE
124 assert len(children) in (3, 5)
125 min = max = self.get_int(children[1])
126 if len(children) == 5:
127 max = self.get_int(children[3])
128 else:
129 assert False
130 if min != 1 or max != 1:
131 pattern = pattern.optimize()
132 pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
133
134 if name is not None:
135 pattern.name = name
136 return pattern.optimize()
137
138 def compile_basic(self, nodes, repeat=None):
139 # Compile STRING | NAME [Details] | (...) | [...]
140 assert len(nodes) >= 1
141 node = nodes[0]
142 if node.type == token.STRING:
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000143 value = str(literals.evalString(node.value))
Benjamin Peterson3059b002009-07-20 16:42:03 +0000144 return pytree.LeafPattern(_type_of_literal(value), value)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000145 elif node.type == token.NAME:
146 value = node.value
147 if value.isupper():
148 if value not in TOKEN_MAP:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000149 raise PatternSyntaxError("Invalid token: %r" % value)
150 if nodes[1:]:
151 raise PatternSyntaxError("Can't have details for token")
Martin v. Löwisef04c442008-03-19 05:04:44 +0000152 return pytree.LeafPattern(TOKEN_MAP[value])
153 else:
154 if value == "any":
155 type = None
156 elif not value.startswith("_"):
157 type = getattr(self.pysyms, value, None)
158 if type is None:
Benjamin Peterson2c3ac6b2009-06-11 23:47:38 +0000159 raise PatternSyntaxError("Invalid symbol: %r" % value)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000160 if nodes[1:]: # Details present
161 content = [self.compile_node(nodes[1].children[1])]
162 else:
163 content = None
164 return pytree.NodePattern(type, content)
165 elif node.value == "(":
166 return self.compile_node(nodes[1])
167 elif node.value == "[":
168 assert repeat is None
169 subpattern = self.compile_node(nodes[1])
170 return pytree.WildcardPattern([[subpattern]], min=0, max=1)
171 assert False, node
172
173 def get_int(self, node):
174 assert node.type == token.NUMBER
175 return int(node.value)
176
177
178# Map named tokens to the type value for a LeafPattern
179TOKEN_MAP = {"NAME": token.NAME,
180 "STRING": token.STRING,
181 "NUMBER": token.NUMBER,
182 "TOKEN": None}
183
184
Benjamin Peterson3059b002009-07-20 16:42:03 +0000185def _type_of_literal(value):
186 if value[0].isalpha():
187 return token.NAME
188 elif value in grammar.opmap:
189 return grammar.opmap[value]
190 else:
191 return None
192
193
Martin v. Löwisef04c442008-03-19 05:04:44 +0000194def pattern_convert(grammar, raw_node_info):
195 """Converts raw node information to a Node or Leaf instance."""
196 type, value, context, children = raw_node_info
197 if children or type in grammar.number2symbol:
198 return pytree.Node(type, children, context=context)
199 else:
200 return pytree.Leaf(type, value, context=context)
201
202
203def compile_pattern(pattern):
204 return PatternCompiler().compile_pattern(pattern)