blob: bf56a11413a11957421063b092e07add92527c2b [file] [log] [blame]
Daniel Dunbar93fe03f2009-08-01 03:22:27 +00001import itertools
2
Daniel Dunbar7b90be72009-07-31 07:59:05 +00003import Util
4
Daniel Dunbaree41c4d2009-08-01 05:52:04 +00005# FIXME: It would be nice to at least match a few other things like `...`, $(
6# ... ), $VAR, etc., if only so we can nicely say "we don't support this".
7
Daniel Dunbar7b90be72009-07-31 07:59:05 +00008class ShLexer:
9 def __init__(self, data):
10 self.data = data
11 self.pos = 0
12 self.end = len(data)
13
14 def eat(self):
15 c = self.data[self.pos]
16 self.pos += 1
17 return c
18
19 def look(self):
20 return self.data[self.pos]
21
22 def maybe_eat(self, c):
23 """
24 maybe_eat(c) - Consume the character c if it is the next character,
25 returning True if a character was consumed. """
26 if self.data[self.pos] == c:
27 self.pos += 1
28 return True
29 return False
30
Daniel Dunbar93fe03f2009-08-01 03:22:27 +000031 def lex_arg_fast(self, c):
32 # Get the leading whitespace free section.
33 chunk = self.data[self.pos - 1:].split(None, 1)[0]
34
35 # If it has special characters, the fast path failed.
36 if ('|' in chunk or '&' in chunk or
37 '<' in chunk or '>' in chunk or
Daniel Dunbara39be6a2009-08-01 09:41:09 +000038 "'" in chunk or '"' in chunk or
39 '\\' in chunk):
Daniel Dunbar93fe03f2009-08-01 03:22:27 +000040 return None
41
42 self.pos = self.pos - 1 + len(chunk)
43 return chunk
44
45 def lex_arg_slow(self, c):
Daniel Dunbar7b90be72009-07-31 07:59:05 +000046 if c in "'\"":
47 str = self.lex_arg_quoted(c)
48 else:
49 str = c
50 while self.pos != self.end:
51 c = self.look()
Daniel Dunbar93fe03f2009-08-01 03:22:27 +000052 if c.isspace() or c in "|&":
Daniel Dunbar7b90be72009-07-31 07:59:05 +000053 break
Daniel Dunbar93fe03f2009-08-01 03:22:27 +000054 elif c in '><':
55 # This is an annoying case; we treat '2>' as a single token so
56 # we don't have to track whitespace tokens.
57
58 # If the parse string isn't an integer, do the usual thing.
59 if not str.isdigit():
60 break
61
62 # Otherwise, lex the operator and convert to a redirection
63 # token.
64 num = int(str)
65 tok = self.lex_one_token()
66 assert isinstance(tok, tuple) and len(tok) == 1
67 return (tok[0], num)
Daniel Dunbar7b90be72009-07-31 07:59:05 +000068 elif c == '"':
69 self.eat()
70 str += self.lex_arg_quoted('"')
Daniel Dunbara39be6a2009-08-01 09:41:09 +000071 elif c == '\\':
72 # Outside of a string, '\\' escapes everything.
73 self.eat()
74 if self.pos == self.end:
75 Util.warning("escape at end of quoted argument in: %r" %
76 self.data)
77 return str
78 str += self.eat()
Daniel Dunbar7b90be72009-07-31 07:59:05 +000079 else:
80 str += self.eat()
81 return str
82
83 def lex_arg_quoted(self, delim):
84 str = ''
85 while self.pos != self.end:
86 c = self.eat()
87 if c == delim:
88 return str
89 elif c == '\\' and delim == '"':
Daniel Dunbara39be6a2009-08-01 09:41:09 +000090 # Inside a '"' quoted string, '\\' only escapes the quote
91 # character and backslash, otherwise it is preserved.
Daniel Dunbar7b90be72009-07-31 07:59:05 +000092 if self.pos == self.end:
93 Util.warning("escape at end of quoted argument in: %r" %
94 self.data)
95 return str
96 c = self.eat()
Daniel Dunbaree41c4d2009-08-01 05:52:04 +000097 if c == '"': #
98 str += '"'
99 elif c == '\\':
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000100 str += '\\'
Daniel Dunbaree41c4d2009-08-01 05:52:04 +0000101 else:
102 str += '\\' + c
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000103 else:
104 str += c
105 Util.warning("missing quote character in %r" % self.data)
106 return str
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000107
108 def lex_arg_checked(self, c):
109 pos = self.pos
110 res = self.lex_arg_fast(c)
111 end = self.pos
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000112
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000113 self.pos = pos
114 reference = self.lex_arg_slow(c)
115 if res is not None:
116 if res != reference:
117 raise ValueError,"Fast path failure: %r != %r" % (res, reference)
118 if self.pos != end:
119 raise ValueError,"Fast path failure: %r != %r" % (self.pos, end)
120 return reference
121
122 def lex_arg(self, c):
123 return self.lex_arg_fast(c) or self.lex_arg_slow(c)
124
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000125 def lex_one_token(self):
126 """
127 lex_one_token - Lex a single 'sh' token. """
128
129 c = self.eat()
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000130 if c in ';!':
131 return (c,)
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000132 if c == '|':
133 if self.maybe_eat('|'):
134 return ('||',)
135 return (c,)
136 if c == '&':
137 if self.maybe_eat('&'):
138 return ('&&',)
139 if self.maybe_eat('>'):
140 return ('&>',)
141 return (c,)
142 if c == '>':
143 if self.maybe_eat('&'):
144 return ('>&',)
145 if self.maybe_eat('>'):
146 return ('>>',)
147 return (c,)
148 if c == '<':
149 if self.maybe_eat('&'):
150 return ('<&',)
151 if self.maybe_eat('>'):
152 return ('<<',)
Daniel Dunbaree41c4d2009-08-01 05:52:04 +0000153 return (c,)
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000154
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000155 return self.lex_arg(c)
156
157 def lex(self):
158 while self.pos != self.end:
159 if self.look().isspace():
160 self.eat()
161 else:
162 yield self.lex_one_token()
163
164###
165
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000166class Command:
167 def __init__(self, args, redirects):
168 self.args = list(args)
169 self.redirects = list(redirects)
170
171 def __repr__(self):
172 return 'Command(%r, %r)' % (self.args, self.redirects)
173
174 def __cmp__(self, other):
175 if not isinstance(other, Command):
176 return -1
177
178 return cmp((self.args, self.redirects),
179 (other.args, other.redirects))
180
181class Pipeline:
182 def __init__(self, commands, negate):
183 self.commands = commands
184 self.negate = negate
185
186 def __repr__(self):
187 return 'Pipeline(%r, %r)' % (self.commands, self.negate)
188
189 def __cmp__(self, other):
190 if not isinstance(other, Pipeline):
191 return -1
192
193 return cmp((self.commands, self.negate),
194 (other.commands, other.negate))
195
196class Seq:
197 def __init__(self, lhs, op, rhs):
198 assert op in (';', '&', '||', '&&')
199 self.op = op
200 self.lhs = lhs
201 self.rhs = rhs
202
203 def __repr__(self):
204 return 'Seq(%r, %r, %r)' % (self.lhs, self.op, self.rhs)
205
206 def __cmp__(self, other):
207 if not isinstance(other, Seq):
208 return -1
209
210 return cmp((self.lhs, self.op, self.rhs),
211 (other.lhs, other.op, other.rhs))
212
213class ShParser:
214 def __init__(self, data):
215 self.data = data
216 self.tokens = ShLexer(data).lex()
217
218 def lex(self):
219 try:
220 return self.tokens.next()
221 except StopIteration:
222 return None
223
224 def look(self):
225 next = self.lex()
Daniel Dunbara39be6a2009-08-01 09:41:09 +0000226 if next is not None:
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000227 self.tokens = itertools.chain([next], self.tokens)
228 return next
229
230 def parse_command(self):
231 tok = self.lex()
232 if not tok:
233 raise ValueError,"empty command!"
234 if isinstance(tok, tuple):
235 raise ValueError,"syntax error near unexpected token %r" % tok[0]
236
237 args = [tok]
238 redirects = []
239 while 1:
240 tok = self.look()
241
242 # EOF?
243 if tok is None:
244 break
245
246 # If this is an argument, just add it to the current command.
247 if isinstance(tok, str):
248 args.append(self.lex())
249 continue
250
251 # Otherwise see if it is a terminator.
252 assert isinstance(tok, tuple)
253 if tok[0] in ('|',';','&','||','&&'):
254 break
255
256 # Otherwise it must be a redirection.
257 op = self.lex()
258 arg = self.lex()
259 if not arg:
260 raise ValueError,"syntax error near token %r" % op[0]
261 redirects.append((op, arg))
262
263 return Command(args, redirects)
264
265 def parse_pipeline(self):
266 negate = False
267 if self.look() == ('!',):
268 self.lex()
269 negate = True
270
271 commands = [self.parse_command()]
272 while self.look() == ('|',):
273 self.lex()
274 commands.append(self.parse_command())
275 return Pipeline(commands, negate)
276
277 def parse(self):
278 lhs = self.parse_pipeline()
279
280 while self.look():
281 operator = self.lex()
282 assert isinstance(operator, tuple) and len(operator) == 1
283
284 if not self.look():
285 raise ValueError, "missing argument to operator %r" % operator[0]
286
287 # FIXME: Operator precedence!!
288 lhs = Seq(lhs, operator[0], self.parse_pipeline())
289
290 return lhs
291
292###
293
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000294import unittest
295
296class TestShLexer(unittest.TestCase):
297 def lex(self, str):
298 return list(ShLexer(str).lex())
299
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000300 def test_basic(self):
Daniel Dunbaree41c4d2009-08-01 05:52:04 +0000301 self.assertEqual(self.lex('a|b>c&d<e'),
302 ['a', ('|',), 'b', ('>',), 'c', ('&',), 'd',
303 ('<',), 'e'])
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000304
305 def test_redirection_tokens(self):
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000306 self.assertEqual(self.lex('a2>c'),
307 ['a2', ('>',), 'c'])
308 self.assertEqual(self.lex('a 2>c'),
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000309 ['a', ('>',2), 'c'])
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000310
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000311 def test_quoting(self):
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000312 self.assertEqual(self.lex(""" 'a' """),
313 ['a'])
314 self.assertEqual(self.lex(""" "hello\\"world" """),
315 ['hello"world'])
316 self.assertEqual(self.lex(""" "hello\\'world" """),
317 ["hello\\'world"])
Daniel Dunbaree41c4d2009-08-01 05:52:04 +0000318 self.assertEqual(self.lex(""" "hello\\\\world" """),
319 ["hello\\world"])
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000320 self.assertEqual(self.lex(""" he"llo wo"rld """),
321 ["hello world"])
Daniel Dunbara39be6a2009-08-01 09:41:09 +0000322 self.assertEqual(self.lex(""" a\\ b a\\\\b """),
323 ["a b", "a\\b"])
324 self.assertEqual(self.lex(""" "" "" """),
325 ["", ""])
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000326
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000327class TestShParse(unittest.TestCase):
328 def parse(self, str):
329 return ShParser(str).parse()
330
331 def test_basic(self):
332 self.assertEqual(self.parse('echo hello'),
333 Pipeline([Command(['echo', 'hello'], [])], False))
Daniel Dunbara39be6a2009-08-01 09:41:09 +0000334 self.assertEqual(self.parse('echo ""'),
335 Pipeline([Command(['echo', ''], [])], False))
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000336
337 def test_redirection(self):
338 self.assertEqual(self.parse('echo hello > c'),
339 Pipeline([Command(['echo', 'hello'],
340 [((('>'),), 'c')])], False))
341 self.assertEqual(self.parse('echo hello > c >> d'),
342 Pipeline([Command(['echo', 'hello'], [(('>',), 'c'),
343 (('>>',), 'd')])], False))
344
345 def test_pipeline(self):
346 self.assertEqual(self.parse('a | b'),
347 Pipeline([Command(['a'], []),
348 Command(['b'], [])],
349 False))
350
351 self.assertEqual(self.parse('a | b | c'),
352 Pipeline([Command(['a'], []),
353 Command(['b'], []),
354 Command(['c'], [])],
355 False))
356
357 self.assertEqual(self.parse('! a'),
358 Pipeline([Command(['a'], [])],
359 True))
360
361 def test_list(self):
362 self.assertEqual(self.parse('a ; b'),
363 Seq(Pipeline([Command(['a'], [])], False),
364 ';',
365 Pipeline([Command(['b'], [])], False)))
366
367 self.assertEqual(self.parse('a & b'),
368 Seq(Pipeline([Command(['a'], [])], False),
369 '&',
370 Pipeline([Command(['b'], [])], False)))
371
372 self.assertEqual(self.parse('a && b'),
373 Seq(Pipeline([Command(['a'], [])], False),
374 '&&',
375 Pipeline([Command(['b'], [])], False)))
376
377 self.assertEqual(self.parse('a || b'),
378 Seq(Pipeline([Command(['a'], [])], False),
379 '||',
380 Pipeline([Command(['b'], [])], False)))
381
382 self.assertEqual(self.parse('a && b || c'),
383 Seq(Seq(Pipeline([Command(['a'], [])], False),
384 '&&',
385 Pipeline([Command(['b'], [])], False)),
386 '||',
387 Pipeline([Command(['c'], [])], False)))
Daniel Dunbara39be6a2009-08-01 09:41:09 +0000388
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000389if __name__ == '__main__':
390 unittest.main()