blob: 3878f5961deb4e5d85cc79b8285ebf8203b89f88 [file] [log] [blame]
Daniel Dunbar93fe03f2009-08-01 03:22:27 +00001import itertools
2
Daniel Dunbar7b90be72009-07-31 07:59:05 +00003import Util
4
5class ShLexer:
6 def __init__(self, data):
7 self.data = data
8 self.pos = 0
9 self.end = len(data)
10
11 def eat(self):
12 c = self.data[self.pos]
13 self.pos += 1
14 return c
15
16 def look(self):
17 return self.data[self.pos]
18
19 def maybe_eat(self, c):
20 """
21 maybe_eat(c) - Consume the character c if it is the next character,
22 returning True if a character was consumed. """
23 if self.data[self.pos] == c:
24 self.pos += 1
25 return True
26 return False
27
Daniel Dunbar93fe03f2009-08-01 03:22:27 +000028 def lex_arg_fast(self, c):
29 # Get the leading whitespace free section.
30 chunk = self.data[self.pos - 1:].split(None, 1)[0]
31
32 # If it has special characters, the fast path failed.
33 if ('|' in chunk or '&' in chunk or
34 '<' in chunk or '>' in chunk or
35 "'" in chunk or '"' in chunk):
36 return None
37
38 self.pos = self.pos - 1 + len(chunk)
39 return chunk
40
41 def lex_arg_slow(self, c):
Daniel Dunbar7b90be72009-07-31 07:59:05 +000042 if c in "'\"":
43 str = self.lex_arg_quoted(c)
44 else:
45 str = c
46 while self.pos != self.end:
47 c = self.look()
Daniel Dunbar93fe03f2009-08-01 03:22:27 +000048 if c.isspace() or c in "|&":
Daniel Dunbar7b90be72009-07-31 07:59:05 +000049 break
Daniel Dunbar93fe03f2009-08-01 03:22:27 +000050 elif c in '><':
51 # This is an annoying case; we treat '2>' as a single token so
52 # we don't have to track whitespace tokens.
53
54 # If the parse string isn't an integer, do the usual thing.
55 if not str.isdigit():
56 break
57
58 # Otherwise, lex the operator and convert to a redirection
59 # token.
60 num = int(str)
61 tok = self.lex_one_token()
62 assert isinstance(tok, tuple) and len(tok) == 1
63 return (tok[0], num)
Daniel Dunbar7b90be72009-07-31 07:59:05 +000064 elif c == '"':
65 self.eat()
66 str += self.lex_arg_quoted('"')
67 else:
68 str += self.eat()
69 return str
70
71 def lex_arg_quoted(self, delim):
72 str = ''
73 while self.pos != self.end:
74 c = self.eat()
75 if c == delim:
76 return str
77 elif c == '\\' and delim == '"':
78 # Shell escaping is just '\"' to avoid termination, no actual
79 # escaping.
80 if self.pos == self.end:
81 Util.warning("escape at end of quoted argument in: %r" %
82 self.data)
83 return str
84 c = self.eat()
85 if c != delim:
86 str += '\\'
87 str += c
88 else:
89 str += c
90 Util.warning("missing quote character in %r" % self.data)
91 return str
Daniel Dunbar93fe03f2009-08-01 03:22:27 +000092
93 def lex_arg_checked(self, c):
94 pos = self.pos
95 res = self.lex_arg_fast(c)
96 end = self.pos
Daniel Dunbar7b90be72009-07-31 07:59:05 +000097
Daniel Dunbar93fe03f2009-08-01 03:22:27 +000098 self.pos = pos
99 reference = self.lex_arg_slow(c)
100 if res is not None:
101 if res != reference:
102 raise ValueError,"Fast path failure: %r != %r" % (res, reference)
103 if self.pos != end:
104 raise ValueError,"Fast path failure: %r != %r" % (self.pos, end)
105 return reference
106
107 def lex_arg(self, c):
108 return self.lex_arg_fast(c) or self.lex_arg_slow(c)
109
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000110 def lex_one_token(self):
111 """
112 lex_one_token - Lex a single 'sh' token. """
113
114 c = self.eat()
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000115 if c in ';!':
116 return (c,)
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000117 if c == '|':
118 if self.maybe_eat('|'):
119 return ('||',)
120 return (c,)
121 if c == '&':
122 if self.maybe_eat('&'):
123 return ('&&',)
124 if self.maybe_eat('>'):
125 return ('&>',)
126 return (c,)
127 if c == '>':
128 if self.maybe_eat('&'):
129 return ('>&',)
130 if self.maybe_eat('>'):
131 return ('>>',)
132 return (c,)
133 if c == '<':
134 if self.maybe_eat('&'):
135 return ('<&',)
136 if self.maybe_eat('>'):
137 return ('<<',)
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000138
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000139 return self.lex_arg(c)
140
141 def lex(self):
142 while self.pos != self.end:
143 if self.look().isspace():
144 self.eat()
145 else:
146 yield self.lex_one_token()
147
148###
149
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000150class Command:
151 def __init__(self, args, redirects):
152 self.args = list(args)
153 self.redirects = list(redirects)
154
155 def __repr__(self):
156 return 'Command(%r, %r)' % (self.args, self.redirects)
157
158 def __cmp__(self, other):
159 if not isinstance(other, Command):
160 return -1
161
162 return cmp((self.args, self.redirects),
163 (other.args, other.redirects))
164
165class Pipeline:
166 def __init__(self, commands, negate):
167 self.commands = commands
168 self.negate = negate
169
170 def __repr__(self):
171 return 'Pipeline(%r, %r)' % (self.commands, self.negate)
172
173 def __cmp__(self, other):
174 if not isinstance(other, Pipeline):
175 return -1
176
177 return cmp((self.commands, self.negate),
178 (other.commands, other.negate))
179
180class Seq:
181 def __init__(self, lhs, op, rhs):
182 assert op in (';', '&', '||', '&&')
183 self.op = op
184 self.lhs = lhs
185 self.rhs = rhs
186
187 def __repr__(self):
188 return 'Seq(%r, %r, %r)' % (self.lhs, self.op, self.rhs)
189
190 def __cmp__(self, other):
191 if not isinstance(other, Seq):
192 return -1
193
194 return cmp((self.lhs, self.op, self.rhs),
195 (other.lhs, other.op, other.rhs))
196
197class ShParser:
198 def __init__(self, data):
199 self.data = data
200 self.tokens = ShLexer(data).lex()
201
202 def lex(self):
203 try:
204 return self.tokens.next()
205 except StopIteration:
206 return None
207
208 def look(self):
209 next = self.lex()
210 if next:
211 self.tokens = itertools.chain([next], self.tokens)
212 return next
213
214 def parse_command(self):
215 tok = self.lex()
216 if not tok:
217 raise ValueError,"empty command!"
218 if isinstance(tok, tuple):
219 raise ValueError,"syntax error near unexpected token %r" % tok[0]
220
221 args = [tok]
222 redirects = []
223 while 1:
224 tok = self.look()
225
226 # EOF?
227 if tok is None:
228 break
229
230 # If this is an argument, just add it to the current command.
231 if isinstance(tok, str):
232 args.append(self.lex())
233 continue
234
235 # Otherwise see if it is a terminator.
236 assert isinstance(tok, tuple)
237 if tok[0] in ('|',';','&','||','&&'):
238 break
239
240 # Otherwise it must be a redirection.
241 op = self.lex()
242 arg = self.lex()
243 if not arg:
244 raise ValueError,"syntax error near token %r" % op[0]
245 redirects.append((op, arg))
246
247 return Command(args, redirects)
248
249 def parse_pipeline(self):
250 negate = False
251 if self.look() == ('!',):
252 self.lex()
253 negate = True
254
255 commands = [self.parse_command()]
256 while self.look() == ('|',):
257 self.lex()
258 commands.append(self.parse_command())
259 return Pipeline(commands, negate)
260
261 def parse(self):
262 lhs = self.parse_pipeline()
263
264 while self.look():
265 operator = self.lex()
266 assert isinstance(operator, tuple) and len(operator) == 1
267
268 if not self.look():
269 raise ValueError, "missing argument to operator %r" % operator[0]
270
271 # FIXME: Operator precedence!!
272 lhs = Seq(lhs, operator[0], self.parse_pipeline())
273
274 return lhs
275
276###
277
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000278import unittest
279
280class TestShLexer(unittest.TestCase):
281 def lex(self, str):
282 return list(ShLexer(str).lex())
283
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000284 def test_basic(self):
285 self.assertEqual(self.lex('a|b>c&d'),
286 ['a', ('|',), 'b', ('>',), 'c', ('&',), 'd'])
287
288 def test_redirection_tokens(self):
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000289 self.assertEqual(self.lex('a2>c'),
290 ['a2', ('>',), 'c'])
291 self.assertEqual(self.lex('a 2>c'),
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000292 ['a', ('>',2), 'c'])
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000293
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000294 def test_quoting(self):
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000295 self.assertEqual(self.lex(""" 'a' """),
296 ['a'])
297 self.assertEqual(self.lex(""" "hello\\"world" """),
298 ['hello"world'])
299 self.assertEqual(self.lex(""" "hello\\'world" """),
300 ["hello\\'world"])
301 self.assertEqual(self.lex(""" he"llo wo"rld """),
302 ["hello world"])
303
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000304class TestShParse(unittest.TestCase):
305 def parse(self, str):
306 return ShParser(str).parse()
307
308 def test_basic(self):
309 self.assertEqual(self.parse('echo hello'),
310 Pipeline([Command(['echo', 'hello'], [])], False))
311
312 def test_redirection(self):
313 self.assertEqual(self.parse('echo hello > c'),
314 Pipeline([Command(['echo', 'hello'],
315 [((('>'),), 'c')])], False))
316 self.assertEqual(self.parse('echo hello > c >> d'),
317 Pipeline([Command(['echo', 'hello'], [(('>',), 'c'),
318 (('>>',), 'd')])], False))
319
320 def test_pipeline(self):
321 self.assertEqual(self.parse('a | b'),
322 Pipeline([Command(['a'], []),
323 Command(['b'], [])],
324 False))
325
326 self.assertEqual(self.parse('a | b | c'),
327 Pipeline([Command(['a'], []),
328 Command(['b'], []),
329 Command(['c'], [])],
330 False))
331
332 self.assertEqual(self.parse('! a'),
333 Pipeline([Command(['a'], [])],
334 True))
335
336 def test_list(self):
337 self.assertEqual(self.parse('a ; b'),
338 Seq(Pipeline([Command(['a'], [])], False),
339 ';',
340 Pipeline([Command(['b'], [])], False)))
341
342 self.assertEqual(self.parse('a & b'),
343 Seq(Pipeline([Command(['a'], [])], False),
344 '&',
345 Pipeline([Command(['b'], [])], False)))
346
347 self.assertEqual(self.parse('a && b'),
348 Seq(Pipeline([Command(['a'], [])], False),
349 '&&',
350 Pipeline([Command(['b'], [])], False)))
351
352 self.assertEqual(self.parse('a || b'),
353 Seq(Pipeline([Command(['a'], [])], False),
354 '||',
355 Pipeline([Command(['b'], [])], False)))
356
357 self.assertEqual(self.parse('a && b || c'),
358 Seq(Seq(Pipeline([Command(['a'], [])], False),
359 '&&',
360 Pipeline([Command(['b'], [])], False)),
361 '||',
362 Pipeline([Command(['c'], [])], False)))
363
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000364if __name__ == '__main__':
365 unittest.main()