blob: dd4b584ac00b616d1ad5fc943652ccbeb4e432fb [file] [log] [blame]
Daniel Dunbar93fe03f2009-08-01 03:22:27 +00001import itertools
2
Daniel Dunbar7b90be72009-07-31 07:59:05 +00003import Util
4
5class ShLexer:
Daniel Dunbare9201f52009-08-03 05:29:22 +00006 def __init__(self, data, win32Escapes = False):
Daniel Dunbar7b90be72009-07-31 07:59:05 +00007 self.data = data
8 self.pos = 0
9 self.end = len(data)
Daniel Dunbare9201f52009-08-03 05:29:22 +000010 self.win32Escapes = win32Escapes
Daniel Dunbar7b90be72009-07-31 07:59:05 +000011
12 def eat(self):
13 c = self.data[self.pos]
14 self.pos += 1
15 return c
16
17 def look(self):
18 return self.data[self.pos]
19
20 def maybe_eat(self, c):
21 """
22 maybe_eat(c) - Consume the character c if it is the next character,
23 returning True if a character was consumed. """
24 if self.data[self.pos] == c:
25 self.pos += 1
26 return True
27 return False
28
Daniel Dunbar93fe03f2009-08-01 03:22:27 +000029 def lex_arg_fast(self, c):
30 # Get the leading whitespace free section.
31 chunk = self.data[self.pos - 1:].split(None, 1)[0]
32
33 # If it has special characters, the fast path failed.
34 if ('|' in chunk or '&' in chunk or
35 '<' in chunk or '>' in chunk or
Daniel Dunbara39be6a2009-08-01 09:41:09 +000036 "'" in chunk or '"' in chunk or
37 '\\' in chunk):
Daniel Dunbar93fe03f2009-08-01 03:22:27 +000038 return None
39
40 self.pos = self.pos - 1 + len(chunk)
41 return chunk
42
43 def lex_arg_slow(self, c):
Daniel Dunbar7b90be72009-07-31 07:59:05 +000044 if c in "'\"":
45 str = self.lex_arg_quoted(c)
46 else:
47 str = c
48 while self.pos != self.end:
49 c = self.look()
Daniel Dunbar93fe03f2009-08-01 03:22:27 +000050 if c.isspace() or c in "|&":
Daniel Dunbar7b90be72009-07-31 07:59:05 +000051 break
Daniel Dunbar93fe03f2009-08-01 03:22:27 +000052 elif c in '><':
53 # This is an annoying case; we treat '2>' as a single token so
54 # we don't have to track whitespace tokens.
55
56 # If the parse string isn't an integer, do the usual thing.
57 if not str.isdigit():
58 break
59
60 # Otherwise, lex the operator and convert to a redirection
61 # token.
62 num = int(str)
63 tok = self.lex_one_token()
64 assert isinstance(tok, tuple) and len(tok) == 1
65 return (tok[0], num)
Daniel Dunbar7b90be72009-07-31 07:59:05 +000066 elif c == '"':
67 self.eat()
Daniel Dunbare9201f52009-08-03 05:29:22 +000068 str += self.lex_arg_quoted('"')
69 elif not self.win32Escapes and c == '\\':
Daniel Dunbara39be6a2009-08-01 09:41:09 +000070 # Outside of a string, '\\' escapes everything.
71 self.eat()
72 if self.pos == self.end:
73 Util.warning("escape at end of quoted argument in: %r" %
74 self.data)
75 return str
76 str += self.eat()
Daniel Dunbar7b90be72009-07-31 07:59:05 +000077 else:
78 str += self.eat()
79 return str
80
81 def lex_arg_quoted(self, delim):
82 str = ''
83 while self.pos != self.end:
84 c = self.eat()
85 if c == delim:
86 return str
87 elif c == '\\' and delim == '"':
Daniel Dunbara39be6a2009-08-01 09:41:09 +000088 # Inside a '"' quoted string, '\\' only escapes the quote
89 # character and backslash, otherwise it is preserved.
Daniel Dunbar7b90be72009-07-31 07:59:05 +000090 if self.pos == self.end:
91 Util.warning("escape at end of quoted argument in: %r" %
92 self.data)
93 return str
94 c = self.eat()
Daniel Dunbaree41c4d2009-08-01 05:52:04 +000095 if c == '"': #
96 str += '"'
97 elif c == '\\':
Daniel Dunbar7b90be72009-07-31 07:59:05 +000098 str += '\\'
Daniel Dunbaree41c4d2009-08-01 05:52:04 +000099 else:
100 str += '\\' + c
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000101 else:
102 str += c
103 Util.warning("missing quote character in %r" % self.data)
104 return str
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000105
106 def lex_arg_checked(self, c):
107 pos = self.pos
108 res = self.lex_arg_fast(c)
109 end = self.pos
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000110
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000111 self.pos = pos
112 reference = self.lex_arg_slow(c)
113 if res is not None:
114 if res != reference:
115 raise ValueError,"Fast path failure: %r != %r" % (res, reference)
116 if self.pos != end:
117 raise ValueError,"Fast path failure: %r != %r" % (self.pos, end)
118 return reference
119
120 def lex_arg(self, c):
121 return self.lex_arg_fast(c) or self.lex_arg_slow(c)
122
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000123 def lex_one_token(self):
124 """
125 lex_one_token - Lex a single 'sh' token. """
126
127 c = self.eat()
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000128 if c in ';!':
129 return (c,)
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000130 if c == '|':
131 if self.maybe_eat('|'):
132 return ('||',)
133 return (c,)
134 if c == '&':
135 if self.maybe_eat('&'):
136 return ('&&',)
137 if self.maybe_eat('>'):
138 return ('&>',)
139 return (c,)
140 if c == '>':
141 if self.maybe_eat('&'):
142 return ('>&',)
143 if self.maybe_eat('>'):
144 return ('>>',)
145 return (c,)
146 if c == '<':
147 if self.maybe_eat('&'):
148 return ('<&',)
149 if self.maybe_eat('>'):
150 return ('<<',)
Daniel Dunbaree41c4d2009-08-01 05:52:04 +0000151 return (c,)
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000152
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000153 return self.lex_arg(c)
154
155 def lex(self):
156 while self.pos != self.end:
157 if self.look().isspace():
158 self.eat()
159 else:
160 yield self.lex_one_token()
161
162###
163
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000164class Command:
165 def __init__(self, args, redirects):
166 self.args = list(args)
167 self.redirects = list(redirects)
168
169 def __repr__(self):
170 return 'Command(%r, %r)' % (self.args, self.redirects)
171
172 def __cmp__(self, other):
173 if not isinstance(other, Command):
174 return -1
175
176 return cmp((self.args, self.redirects),
177 (other.args, other.redirects))
178
179class Pipeline:
180 def __init__(self, commands, negate):
181 self.commands = commands
182 self.negate = negate
183
184 def __repr__(self):
185 return 'Pipeline(%r, %r)' % (self.commands, self.negate)
186
187 def __cmp__(self, other):
188 if not isinstance(other, Pipeline):
189 return -1
190
191 return cmp((self.commands, self.negate),
192 (other.commands, other.negate))
193
194class Seq:
195 def __init__(self, lhs, op, rhs):
196 assert op in (';', '&', '||', '&&')
197 self.op = op
198 self.lhs = lhs
199 self.rhs = rhs
200
201 def __repr__(self):
202 return 'Seq(%r, %r, %r)' % (self.lhs, self.op, self.rhs)
203
204 def __cmp__(self, other):
205 if not isinstance(other, Seq):
206 return -1
207
208 return cmp((self.lhs, self.op, self.rhs),
209 (other.lhs, other.op, other.rhs))
210
211class ShParser:
Daniel Dunbare9201f52009-08-03 05:29:22 +0000212 def __init__(self, data, win32Escapes = False):
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000213 self.data = data
Daniel Dunbare9201f52009-08-03 05:29:22 +0000214 self.tokens = ShLexer(data, win32Escapes = win32Escapes).lex()
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000215
216 def lex(self):
217 try:
218 return self.tokens.next()
219 except StopIteration:
220 return None
221
222 def look(self):
223 next = self.lex()
Daniel Dunbara39be6a2009-08-01 09:41:09 +0000224 if next is not None:
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000225 self.tokens = itertools.chain([next], self.tokens)
226 return next
227
228 def parse_command(self):
229 tok = self.lex()
230 if not tok:
231 raise ValueError,"empty command!"
232 if isinstance(tok, tuple):
233 raise ValueError,"syntax error near unexpected token %r" % tok[0]
234
235 args = [tok]
236 redirects = []
237 while 1:
238 tok = self.look()
239
240 # EOF?
241 if tok is None:
242 break
243
244 # If this is an argument, just add it to the current command.
245 if isinstance(tok, str):
246 args.append(self.lex())
247 continue
248
249 # Otherwise see if it is a terminator.
250 assert isinstance(tok, tuple)
251 if tok[0] in ('|',';','&','||','&&'):
252 break
253
254 # Otherwise it must be a redirection.
255 op = self.lex()
256 arg = self.lex()
257 if not arg:
258 raise ValueError,"syntax error near token %r" % op[0]
259 redirects.append((op, arg))
260
261 return Command(args, redirects)
262
263 def parse_pipeline(self):
264 negate = False
265 if self.look() == ('!',):
266 self.lex()
267 negate = True
268
269 commands = [self.parse_command()]
270 while self.look() == ('|',):
271 self.lex()
272 commands.append(self.parse_command())
273 return Pipeline(commands, negate)
274
275 def parse(self):
276 lhs = self.parse_pipeline()
277
278 while self.look():
279 operator = self.lex()
280 assert isinstance(operator, tuple) and len(operator) == 1
281
282 if not self.look():
283 raise ValueError, "missing argument to operator %r" % operator[0]
284
285 # FIXME: Operator precedence!!
286 lhs = Seq(lhs, operator[0], self.parse_pipeline())
287
288 return lhs
289
290###
291
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000292import unittest
293
294class TestShLexer(unittest.TestCase):
Daniel Dunbare9201f52009-08-03 05:29:22 +0000295 def lex(self, str, *args, **kwargs):
296 return list(ShLexer(str, *args, **kwargs).lex())
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000297
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000298 def test_basic(self):
Daniel Dunbaree41c4d2009-08-01 05:52:04 +0000299 self.assertEqual(self.lex('a|b>c&d<e'),
300 ['a', ('|',), 'b', ('>',), 'c', ('&',), 'd',
301 ('<',), 'e'])
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000302
303 def test_redirection_tokens(self):
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000304 self.assertEqual(self.lex('a2>c'),
305 ['a2', ('>',), 'c'])
306 self.assertEqual(self.lex('a 2>c'),
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000307 ['a', ('>',2), 'c'])
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000308
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000309 def test_quoting(self):
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000310 self.assertEqual(self.lex(""" 'a' """),
311 ['a'])
312 self.assertEqual(self.lex(""" "hello\\"world" """),
313 ['hello"world'])
314 self.assertEqual(self.lex(""" "hello\\'world" """),
315 ["hello\\'world"])
Daniel Dunbaree41c4d2009-08-01 05:52:04 +0000316 self.assertEqual(self.lex(""" "hello\\\\world" """),
317 ["hello\\world"])
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000318 self.assertEqual(self.lex(""" he"llo wo"rld """),
319 ["hello world"])
Daniel Dunbara39be6a2009-08-01 09:41:09 +0000320 self.assertEqual(self.lex(""" a\\ b a\\\\b """),
321 ["a b", "a\\b"])
322 self.assertEqual(self.lex(""" "" "" """),
323 ["", ""])
Daniel Dunbare9201f52009-08-03 05:29:22 +0000324 self.assertEqual(self.lex(""" a\\ b """, win32Escapes = True),
325 ['a\\', 'b'])
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000326
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000327class TestShParse(unittest.TestCase):
328 def parse(self, str):
329 return ShParser(str).parse()
330
331 def test_basic(self):
332 self.assertEqual(self.parse('echo hello'),
333 Pipeline([Command(['echo', 'hello'], [])], False))
Daniel Dunbara39be6a2009-08-01 09:41:09 +0000334 self.assertEqual(self.parse('echo ""'),
335 Pipeline([Command(['echo', ''], [])], False))
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000336
337 def test_redirection(self):
338 self.assertEqual(self.parse('echo hello > c'),
339 Pipeline([Command(['echo', 'hello'],
340 [((('>'),), 'c')])], False))
341 self.assertEqual(self.parse('echo hello > c >> d'),
342 Pipeline([Command(['echo', 'hello'], [(('>',), 'c'),
343 (('>>',), 'd')])], False))
344
345 def test_pipeline(self):
346 self.assertEqual(self.parse('a | b'),
347 Pipeline([Command(['a'], []),
348 Command(['b'], [])],
349 False))
350
351 self.assertEqual(self.parse('a | b | c'),
352 Pipeline([Command(['a'], []),
353 Command(['b'], []),
354 Command(['c'], [])],
355 False))
356
357 self.assertEqual(self.parse('! a'),
358 Pipeline([Command(['a'], [])],
359 True))
360
361 def test_list(self):
362 self.assertEqual(self.parse('a ; b'),
363 Seq(Pipeline([Command(['a'], [])], False),
364 ';',
365 Pipeline([Command(['b'], [])], False)))
366
367 self.assertEqual(self.parse('a & b'),
368 Seq(Pipeline([Command(['a'], [])], False),
369 '&',
370 Pipeline([Command(['b'], [])], False)))
371
372 self.assertEqual(self.parse('a && b'),
373 Seq(Pipeline([Command(['a'], [])], False),
374 '&&',
375 Pipeline([Command(['b'], [])], False)))
376
377 self.assertEqual(self.parse('a || b'),
378 Seq(Pipeline([Command(['a'], [])], False),
379 '||',
380 Pipeline([Command(['b'], [])], False)))
381
382 self.assertEqual(self.parse('a && b || c'),
383 Seq(Seq(Pipeline([Command(['a'], [])], False),
384 '&&',
385 Pipeline([Command(['b'], [])], False)),
386 '||',
387 Pipeline([Command(['c'], [])], False)))
Daniel Dunbara39be6a2009-08-01 09:41:09 +0000388
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000389if __name__ == '__main__':
390 unittest.main()