blob: b8485b1596f7bb61118a3b94b5fa50ef4fd9b202 [file] [log] [blame]
Daniel Dunbar7b90be72009-07-31 07:59:05 +00001import Util
2
3class ShLexer:
4 def __init__(self, data):
5 self.data = data
6 self.pos = 0
7 self.end = len(data)
8
9 def eat(self):
10 c = self.data[self.pos]
11 self.pos += 1
12 return c
13
14 def look(self):
15 return self.data[self.pos]
16
17 def maybe_eat(self, c):
18 """
19 maybe_eat(c) - Consume the character c if it is the next character,
20 returning True if a character was consumed. """
21 if self.data[self.pos] == c:
22 self.pos += 1
23 return True
24 return False
25
26 def lex_arg(self, c):
27 if c in "'\"":
28 str = self.lex_arg_quoted(c)
29 else:
30 str = c
31 while self.pos != self.end:
32 c = self.look()
33 if c.isspace() or c in "|><&":
34 break
35 elif c == '"':
36 self.eat()
37 str += self.lex_arg_quoted('"')
38 else:
39 str += self.eat()
40 return str
41
42 def lex_arg_quoted(self, delim):
43 str = ''
44 while self.pos != self.end:
45 c = self.eat()
46 if c == delim:
47 return str
48 elif c == '\\' and delim == '"':
49 # Shell escaping is just '\"' to avoid termination, no actual
50 # escaping.
51 if self.pos == self.end:
52 Util.warning("escape at end of quoted argument in: %r" %
53 self.data)
54 return str
55 c = self.eat()
56 if c != delim:
57 str += '\\'
58 str += c
59 else:
60 str += c
61 Util.warning("missing quote character in %r" % self.data)
62 return str
63
64 def lex_one_token(self):
65 """
66 lex_one_token - Lex a single 'sh' token. """
67
68 c = self.eat()
69 if c == ';':
70 return (c)
71 if c == '|':
72 if self.maybe_eat('|'):
73 return ('||',)
74 return (c,)
75 if c == '&':
76 if self.maybe_eat('&'):
77 return ('&&',)
78 if self.maybe_eat('>'):
79 return ('&>',)
80 return (c,)
81 if c == '>':
82 if self.maybe_eat('&'):
83 return ('>&',)
84 if self.maybe_eat('>'):
85 return ('>>',)
86 return (c,)
87 if c == '<':
88 if self.maybe_eat('&'):
89 return ('<&',)
90 if self.maybe_eat('>'):
91 return ('<<',)
92 return self.lex_arg(c)
93
94 def lex(self):
95 while self.pos != self.end:
96 if self.look().isspace():
97 self.eat()
98 else:
99 yield self.lex_one_token()
100
101###
102
103import unittest
104
105class TestShLexer(unittest.TestCase):
106 def lex(self, str):
107 return list(ShLexer(str).lex())
108
109 def testops(self):
110 self.assertEqual(self.lex('a2>c'),
111 ['a2', ('>',), 'c'])
112 self.assertEqual(self.lex('a 2>c'),
113 ['a', '2', ('>',), 'c'])
114
115 def testquoting(self):
116 self.assertEqual(self.lex(""" 'a' """),
117 ['a'])
118 self.assertEqual(self.lex(""" "hello\\"world" """),
119 ['hello"world'])
120 self.assertEqual(self.lex(""" "hello\\'world" """),
121 ["hello\\'world"])
122 self.assertEqual(self.lex(""" he"llo wo"rld """),
123 ["hello world"])
124
125if __name__ == '__main__':
126 unittest.main()