blob: 76d3f585b1acaf8d0622308c38a0e60461bb0503 [file] [log] [blame]
Daniel Dunbar93fe03f2009-08-01 03:22:27 +00001import itertools
2
Daniel Dunbar7b90be72009-07-31 07:59:05 +00003import Util
4
Daniel Dunbaree41c4d2009-08-01 05:52:04 +00005# FIXME: It would be nice to at least match a few other things like `...`, $(
6# ... ), $VAR, etc., if only so we can nicely say "we don't support this".
7
Daniel Dunbar7b90be72009-07-31 07:59:05 +00008class ShLexer:
9 def __init__(self, data):
10 self.data = data
11 self.pos = 0
12 self.end = len(data)
13
14 def eat(self):
15 c = self.data[self.pos]
16 self.pos += 1
17 return c
18
19 def look(self):
20 return self.data[self.pos]
21
22 def maybe_eat(self, c):
23 """
24 maybe_eat(c) - Consume the character c if it is the next character,
25 returning True if a character was consumed. """
26 if self.data[self.pos] == c:
27 self.pos += 1
28 return True
29 return False
30
Daniel Dunbar93fe03f2009-08-01 03:22:27 +000031 def lex_arg_fast(self, c):
32 # Get the leading whitespace free section.
33 chunk = self.data[self.pos - 1:].split(None, 1)[0]
34
35 # If it has special characters, the fast path failed.
36 if ('|' in chunk or '&' in chunk or
37 '<' in chunk or '>' in chunk or
38 "'" in chunk or '"' in chunk):
39 return None
40
41 self.pos = self.pos - 1 + len(chunk)
42 return chunk
43
44 def lex_arg_slow(self, c):
Daniel Dunbar7b90be72009-07-31 07:59:05 +000045 if c in "'\"":
46 str = self.lex_arg_quoted(c)
47 else:
48 str = c
49 while self.pos != self.end:
50 c = self.look()
Daniel Dunbar93fe03f2009-08-01 03:22:27 +000051 if c.isspace() or c in "|&":
Daniel Dunbar7b90be72009-07-31 07:59:05 +000052 break
Daniel Dunbar93fe03f2009-08-01 03:22:27 +000053 elif c in '><':
54 # This is an annoying case; we treat '2>' as a single token so
55 # we don't have to track whitespace tokens.
56
57 # If the parse string isn't an integer, do the usual thing.
58 if not str.isdigit():
59 break
60
61 # Otherwise, lex the operator and convert to a redirection
62 # token.
63 num = int(str)
64 tok = self.lex_one_token()
65 assert isinstance(tok, tuple) and len(tok) == 1
66 return (tok[0], num)
Daniel Dunbar7b90be72009-07-31 07:59:05 +000067 elif c == '"':
68 self.eat()
69 str += self.lex_arg_quoted('"')
70 else:
71 str += self.eat()
72 return str
73
74 def lex_arg_quoted(self, delim):
75 str = ''
76 while self.pos != self.end:
77 c = self.eat()
78 if c == delim:
79 return str
80 elif c == '\\' and delim == '"':
81 # Shell escaping is just '\"' to avoid termination, no actual
82 # escaping.
83 if self.pos == self.end:
84 Util.warning("escape at end of quoted argument in: %r" %
85 self.data)
86 return str
87 c = self.eat()
Daniel Dunbaree41c4d2009-08-01 05:52:04 +000088 if c == '"': #
89 str += '"'
90 elif c == '\\':
Daniel Dunbar7b90be72009-07-31 07:59:05 +000091 str += '\\'
Daniel Dunbaree41c4d2009-08-01 05:52:04 +000092 else:
93 str += '\\' + c
Daniel Dunbar7b90be72009-07-31 07:59:05 +000094 else:
95 str += c
96 Util.warning("missing quote character in %r" % self.data)
97 return str
Daniel Dunbar93fe03f2009-08-01 03:22:27 +000098
99 def lex_arg_checked(self, c):
100 pos = self.pos
101 res = self.lex_arg_fast(c)
102 end = self.pos
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000103
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000104 self.pos = pos
105 reference = self.lex_arg_slow(c)
106 if res is not None:
107 if res != reference:
108 raise ValueError,"Fast path failure: %r != %r" % (res, reference)
109 if self.pos != end:
110 raise ValueError,"Fast path failure: %r != %r" % (self.pos, end)
111 return reference
112
113 def lex_arg(self, c):
114 return self.lex_arg_fast(c) or self.lex_arg_slow(c)
115
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000116 def lex_one_token(self):
117 """
118 lex_one_token - Lex a single 'sh' token. """
119
120 c = self.eat()
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000121 if c in ';!':
122 return (c,)
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000123 if c == '|':
124 if self.maybe_eat('|'):
125 return ('||',)
126 return (c,)
127 if c == '&':
128 if self.maybe_eat('&'):
129 return ('&&',)
130 if self.maybe_eat('>'):
131 return ('&>',)
132 return (c,)
133 if c == '>':
134 if self.maybe_eat('&'):
135 return ('>&',)
136 if self.maybe_eat('>'):
137 return ('>>',)
138 return (c,)
139 if c == '<':
140 if self.maybe_eat('&'):
141 return ('<&',)
142 if self.maybe_eat('>'):
143 return ('<<',)
Daniel Dunbaree41c4d2009-08-01 05:52:04 +0000144 return (c,)
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000145
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000146 return self.lex_arg(c)
147
148 def lex(self):
149 while self.pos != self.end:
150 if self.look().isspace():
151 self.eat()
152 else:
153 yield self.lex_one_token()
154
155###
156
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000157class Command:
158 def __init__(self, args, redirects):
159 self.args = list(args)
160 self.redirects = list(redirects)
161
162 def __repr__(self):
163 return 'Command(%r, %r)' % (self.args, self.redirects)
164
165 def __cmp__(self, other):
166 if not isinstance(other, Command):
167 return -1
168
169 return cmp((self.args, self.redirects),
170 (other.args, other.redirects))
171
172class Pipeline:
173 def __init__(self, commands, negate):
174 self.commands = commands
175 self.negate = negate
176
177 def __repr__(self):
178 return 'Pipeline(%r, %r)' % (self.commands, self.negate)
179
180 def __cmp__(self, other):
181 if not isinstance(other, Pipeline):
182 return -1
183
184 return cmp((self.commands, self.negate),
185 (other.commands, other.negate))
186
187class Seq:
188 def __init__(self, lhs, op, rhs):
189 assert op in (';', '&', '||', '&&')
190 self.op = op
191 self.lhs = lhs
192 self.rhs = rhs
193
194 def __repr__(self):
195 return 'Seq(%r, %r, %r)' % (self.lhs, self.op, self.rhs)
196
197 def __cmp__(self, other):
198 if not isinstance(other, Seq):
199 return -1
200
201 return cmp((self.lhs, self.op, self.rhs),
202 (other.lhs, other.op, other.rhs))
203
204class ShParser:
205 def __init__(self, data):
206 self.data = data
207 self.tokens = ShLexer(data).lex()
208
209 def lex(self):
210 try:
211 return self.tokens.next()
212 except StopIteration:
213 return None
214
215 def look(self):
216 next = self.lex()
217 if next:
218 self.tokens = itertools.chain([next], self.tokens)
219 return next
220
221 def parse_command(self):
222 tok = self.lex()
223 if not tok:
224 raise ValueError,"empty command!"
225 if isinstance(tok, tuple):
226 raise ValueError,"syntax error near unexpected token %r" % tok[0]
227
228 args = [tok]
229 redirects = []
230 while 1:
231 tok = self.look()
232
233 # EOF?
234 if tok is None:
235 break
236
237 # If this is an argument, just add it to the current command.
238 if isinstance(tok, str):
239 args.append(self.lex())
240 continue
241
242 # Otherwise see if it is a terminator.
243 assert isinstance(tok, tuple)
244 if tok[0] in ('|',';','&','||','&&'):
245 break
246
247 # Otherwise it must be a redirection.
248 op = self.lex()
249 arg = self.lex()
250 if not arg:
251 raise ValueError,"syntax error near token %r" % op[0]
252 redirects.append((op, arg))
253
254 return Command(args, redirects)
255
256 def parse_pipeline(self):
257 negate = False
258 if self.look() == ('!',):
259 self.lex()
260 negate = True
261
262 commands = [self.parse_command()]
263 while self.look() == ('|',):
264 self.lex()
265 commands.append(self.parse_command())
266 return Pipeline(commands, negate)
267
268 def parse(self):
269 lhs = self.parse_pipeline()
270
271 while self.look():
272 operator = self.lex()
273 assert isinstance(operator, tuple) and len(operator) == 1
274
275 if not self.look():
276 raise ValueError, "missing argument to operator %r" % operator[0]
277
278 # FIXME: Operator precedence!!
279 lhs = Seq(lhs, operator[0], self.parse_pipeline())
280
281 return lhs
282
283###
284
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000285import unittest
286
287class TestShLexer(unittest.TestCase):
288 def lex(self, str):
289 return list(ShLexer(str).lex())
290
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000291 def test_basic(self):
Daniel Dunbaree41c4d2009-08-01 05:52:04 +0000292 self.assertEqual(self.lex('a|b>c&d<e'),
293 ['a', ('|',), 'b', ('>',), 'c', ('&',), 'd',
294 ('<',), 'e'])
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000295
296 def test_redirection_tokens(self):
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000297 self.assertEqual(self.lex('a2>c'),
298 ['a2', ('>',), 'c'])
299 self.assertEqual(self.lex('a 2>c'),
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000300 ['a', ('>',2), 'c'])
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000301
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000302 def test_quoting(self):
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000303 self.assertEqual(self.lex(""" 'a' """),
304 ['a'])
305 self.assertEqual(self.lex(""" "hello\\"world" """),
306 ['hello"world'])
307 self.assertEqual(self.lex(""" "hello\\'world" """),
308 ["hello\\'world"])
Daniel Dunbaree41c4d2009-08-01 05:52:04 +0000309 self.assertEqual(self.lex(""" "hello\\\\world" """),
310 ["hello\\world"])
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000311 self.assertEqual(self.lex(""" he"llo wo"rld """),
312 ["hello world"])
313
Daniel Dunbar93fe03f2009-08-01 03:22:27 +0000314class TestShParse(unittest.TestCase):
315 def parse(self, str):
316 return ShParser(str).parse()
317
318 def test_basic(self):
319 self.assertEqual(self.parse('echo hello'),
320 Pipeline([Command(['echo', 'hello'], [])], False))
321
322 def test_redirection(self):
323 self.assertEqual(self.parse('echo hello > c'),
324 Pipeline([Command(['echo', 'hello'],
325 [((('>'),), 'c')])], False))
326 self.assertEqual(self.parse('echo hello > c >> d'),
327 Pipeline([Command(['echo', 'hello'], [(('>',), 'c'),
328 (('>>',), 'd')])], False))
329
330 def test_pipeline(self):
331 self.assertEqual(self.parse('a | b'),
332 Pipeline([Command(['a'], []),
333 Command(['b'], [])],
334 False))
335
336 self.assertEqual(self.parse('a | b | c'),
337 Pipeline([Command(['a'], []),
338 Command(['b'], []),
339 Command(['c'], [])],
340 False))
341
342 self.assertEqual(self.parse('! a'),
343 Pipeline([Command(['a'], [])],
344 True))
345
346 def test_list(self):
347 self.assertEqual(self.parse('a ; b'),
348 Seq(Pipeline([Command(['a'], [])], False),
349 ';',
350 Pipeline([Command(['b'], [])], False)))
351
352 self.assertEqual(self.parse('a & b'),
353 Seq(Pipeline([Command(['a'], [])], False),
354 '&',
355 Pipeline([Command(['b'], [])], False)))
356
357 self.assertEqual(self.parse('a && b'),
358 Seq(Pipeline([Command(['a'], [])], False),
359 '&&',
360 Pipeline([Command(['b'], [])], False)))
361
362 self.assertEqual(self.parse('a || b'),
363 Seq(Pipeline([Command(['a'], [])], False),
364 '||',
365 Pipeline([Command(['b'], [])], False)))
366
367 self.assertEqual(self.parse('a && b || c'),
368 Seq(Seq(Pipeline([Command(['a'], [])], False),
369 '&&',
370 Pipeline([Command(['b'], [])], False)),
371 '||',
372 Pipeline([Command(['c'], [])], False)))
373
Daniel Dunbar7b90be72009-07-31 07:59:05 +0000374if __name__ == '__main__':
375 unittest.main()