Daniel Dunbar | be7ada7 | 2009-09-08 05:31:18 +0000 | [diff] [blame] | 1 | import itertools |
| 2 | |
| 3 | from ShCommands import Command, Pipeline |
| 4 | |
| 5 | def tcl_preprocess(data): |
| 6 | # Tcl has a preprocessing step to replace escaped newlines. |
| 7 | i = data.find('\\\n') |
| 8 | if i == -1: |
| 9 | return data |
| 10 | |
| 11 | # Replace '\\\n' and subsequent whitespace by a single space. |
| 12 | n = len(data) |
| 13 | str = data[:i] |
| 14 | i += 2 |
| 15 | while i < n and data[i] in ' \t': |
| 16 | i += 1 |
| 17 | return str + ' ' + data[i:] |
| 18 | |
| 19 | class TclLexer: |
| 20 | """TclLexer - Lex a string into "words", following the Tcl syntax.""" |
| 21 | |
| 22 | def __init__(self, data): |
| 23 | self.data = tcl_preprocess(data) |
| 24 | self.pos = 0 |
| 25 | self.end = len(self.data) |
| 26 | |
| 27 | def at_end(self): |
| 28 | return self.pos == self.end |
| 29 | |
| 30 | def eat(self): |
| 31 | c = self.data[self.pos] |
| 32 | self.pos += 1 |
| 33 | return c |
| 34 | |
| 35 | def look(self): |
| 36 | return self.data[self.pos] |
| 37 | |
| 38 | def maybe_eat(self, c): |
| 39 | """ |
| 40 | maybe_eat(c) - Consume the character c if it is the next character, |
| 41 | returning True if a character was consumed. """ |
| 42 | if self.data[self.pos] == c: |
| 43 | self.pos += 1 |
| 44 | return True |
| 45 | return False |
| 46 | |
| 47 | def escape(self, c): |
| 48 | if c == 'a': |
| 49 | return '\x07' |
| 50 | elif c == 'b': |
| 51 | return '\x08' |
| 52 | elif c == 'f': |
| 53 | return '\x0c' |
| 54 | elif c == 'n': |
| 55 | return '\n' |
| 56 | elif c == 'r': |
| 57 | return '\r' |
| 58 | elif c == 't': |
| 59 | return '\t' |
| 60 | elif c == 'v': |
| 61 | return '\x0b' |
| 62 | elif c in 'uxo': |
| 63 | raise ValueError,'Invalid quoted character %r' % c |
| 64 | else: |
| 65 | return c |
| 66 | |
| 67 | def lex_braced(self): |
| 68 | # Lex until whitespace or end of string, the opening brace has already |
| 69 | # been consumed. |
| 70 | |
| 71 | str = '' |
| 72 | while 1: |
| 73 | if self.at_end(): |
| 74 | raise ValueError,"Unterminated '{' quoted word" |
| 75 | |
| 76 | c = self.eat() |
| 77 | if c == '}': |
| 78 | break |
| 79 | elif c == '{': |
| 80 | str += '{' + self.lex_braced() + '}' |
| 81 | elif c == '\\' and self.look() in '{}': |
| 82 | str += self.eat() |
| 83 | else: |
| 84 | str += c |
| 85 | |
| 86 | return str |
| 87 | |
| 88 | def lex_quoted(self): |
| 89 | str = '' |
| 90 | |
| 91 | while 1: |
| 92 | if self.at_end(): |
| 93 | raise ValueError,"Unterminated '\"' quoted word" |
| 94 | |
| 95 | c = self.eat() |
| 96 | if c == '"': |
| 97 | break |
| 98 | elif c == '\\': |
| 99 | if self.at_end(): |
| 100 | raise ValueError,'Missing quoted character' |
| 101 | |
| 102 | str += self.escape(self.eat()) |
| 103 | else: |
| 104 | str += c |
| 105 | |
| 106 | return str |
| 107 | |
| 108 | def lex_unquoted(self, process_all=False): |
| 109 | # Lex until whitespace or end of string. |
| 110 | str = '' |
| 111 | while not self.at_end(): |
| 112 | if not process_all: |
| 113 | if self.look().isspace() or self.look() == ';': |
| 114 | break |
| 115 | |
| 116 | c = self.eat() |
| 117 | if c == '\\': |
| 118 | if self.at_end(): |
| 119 | raise ValueError,'Missing quoted character' |
| 120 | |
| 121 | str += self.escape(self.eat()) |
| 122 | elif c == '[': |
| 123 | raise NotImplementedError, ('Command substitution is ' |
| 124 | 'not supported') |
| 125 | elif c == '$' and not self.at_end() and (self.look().isalpha() or |
| 126 | self.look() == '{'): |
| 127 | raise NotImplementedError, ('Variable substitution is ' |
| 128 | 'not supported') |
| 129 | else: |
| 130 | str += c |
| 131 | |
| 132 | return str |
| 133 | |
| 134 | def lex_one_token(self): |
| 135 | if self.maybe_eat('"'): |
| 136 | return self.lex_quoted() |
| 137 | elif self.maybe_eat('{'): |
| 138 | # Check for argument substitution. |
| 139 | if not self.maybe_eat('*'): |
| 140 | return self.lex_braced() |
| 141 | |
| 142 | if not self.maybe_eat('}'): |
| 143 | return '*' + self.lex_braced() |
| 144 | |
| 145 | if self.at_end() or self.look().isspace(): |
| 146 | return '*' |
| 147 | |
| 148 | raise NotImplementedError, "Argument substitution is unsupported" |
| 149 | else: |
| 150 | return self.lex_unquoted() |
| 151 | |
| 152 | def lex(self): |
| 153 | while not self.at_end(): |
| 154 | c = self.look() |
| 155 | if c in ' \t': |
| 156 | self.eat() |
| 157 | elif c in ';\n': |
| 158 | self.eat() |
| 159 | yield (';',) |
| 160 | else: |
| 161 | yield self.lex_one_token() |
| 162 | |
| 163 | class TclExecCommand: |
| 164 | kRedirectPrefixes1 = ('<', '>') |
| 165 | kRedirectPrefixes2 = ('<@', '<<', '2>', '>&', '>>', '>@') |
| 166 | kRedirectPrefixes3 = ('2>@', '2>>', '>>&', '>&@') |
| 167 | kRedirectPrefixes4 = ('2>@1',) |
| 168 | |
| 169 | def __init__(self, args): |
| 170 | self.args = iter(args) |
| 171 | |
| 172 | def lex(self): |
| 173 | try: |
| 174 | return self.args.next() |
| 175 | except StopIteration: |
| 176 | return None |
| 177 | |
| 178 | def look(self): |
| 179 | next = self.lex() |
| 180 | if next is not None: |
| 181 | self.args = itertools.chain([next], self.args) |
| 182 | return next |
| 183 | |
| 184 | def parse_redirect(self, tok, length): |
| 185 | if len(tok) == length: |
| 186 | arg = self.lex() |
Daniel Dunbar | ba3931b | 2009-09-08 05:46:28 +0000 | [diff] [blame] | 187 | if arg is None: |
Daniel Dunbar | be7ada7 | 2009-09-08 05:31:18 +0000 | [diff] [blame] | 188 | raise ValueError,'Missing argument to %r redirection' % tok |
| 189 | else: |
| 190 | tok,arg = tok[:length],tok[length:] |
| 191 | |
| 192 | if tok[0] == '2': |
| 193 | op = (tok[1:],2) |
| 194 | else: |
| 195 | op = (tok,) |
| 196 | return (op, arg) |
| 197 | |
| 198 | def parse_pipeline(self): |
| 199 | if self.look() is None: |
| 200 | raise ValueError,"Expected at least one argument to exec" |
| 201 | |
| 202 | commands = [Command([],[])] |
| 203 | while 1: |
| 204 | arg = self.lex() |
| 205 | if arg is None: |
| 206 | break |
| 207 | elif arg == '|': |
| 208 | commands.append(Command([],[])) |
| 209 | elif arg == '|&': |
| 210 | # Write this as a redirect of stderr; it must come first because |
| 211 | # stdout may have already been redirected. |
| 212 | commands[-1].redirects.insert(0, (('>&',2),'1')) |
| 213 | commands.append(Command([],[])) |
| 214 | elif arg[:4] in TclExecCommand.kRedirectPrefixes4: |
| 215 | commands[-1].redirects.append(self.parse_redirect(arg, 4)) |
| 216 | elif arg[:3] in TclExecCommand.kRedirectPrefixes3: |
| 217 | commands[-1].redirects.append(self.parse_redirect(arg, 3)) |
| 218 | elif arg[:2] in TclExecCommand.kRedirectPrefixes2: |
| 219 | commands[-1].redirects.append(self.parse_redirect(arg, 2)) |
| 220 | elif arg[:1] in TclExecCommand.kRedirectPrefixes1: |
| 221 | commands[-1].redirects.append(self.parse_redirect(arg, 1)) |
| 222 | else: |
| 223 | commands[-1].args.append(arg) |
| 224 | |
| 225 | return Pipeline(commands, False, pipe_err=True) |
| 226 | |
| 227 | def parse(self): |
| 228 | ignoreStderr = False |
| 229 | keepNewline = False |
| 230 | |
| 231 | # Parse arguments. |
| 232 | while 1: |
| 233 | next = self.look() |
| 234 | if not isinstance(next, str) or next[0] != '-': |
| 235 | break |
| 236 | |
| 237 | if next == '--': |
| 238 | self.lex() |
| 239 | break |
| 240 | elif next == '-ignorestderr': |
| 241 | ignoreStderr = True |
| 242 | elif next == '-keepnewline': |
| 243 | keepNewline = True |
| 244 | else: |
| 245 | raise ValueError,"Invalid exec argument %r" % next |
| 246 | |
| 247 | return (ignoreStderr, keepNewline, self.parse_pipeline()) |
| 248 | |
| 249 | ### |
| 250 | |
| 251 | import unittest |
| 252 | |
| 253 | class TestTclLexer(unittest.TestCase): |
| 254 | def lex(self, str, *args, **kwargs): |
| 255 | return list(TclLexer(str, *args, **kwargs).lex()) |
| 256 | |
| 257 | def test_preprocess(self): |
| 258 | self.assertEqual(tcl_preprocess('a b'), 'a b') |
| 259 | self.assertEqual(tcl_preprocess('a\\\nb c'), 'a b c') |
| 260 | |
| 261 | def test_unquoted(self): |
| 262 | self.assertEqual(self.lex('a b c'), |
| 263 | ['a', 'b', 'c']) |
| 264 | self.assertEqual(self.lex(r'a\nb\tc\ '), |
| 265 | ['a\nb\tc ']) |
| 266 | self.assertEqual(self.lex(r'a \\\$b c $\\'), |
| 267 | ['a', r'\$b', 'c', '$\\']) |
| 268 | |
| 269 | def test_braced(self): |
| 270 | self.assertEqual(self.lex('a {b c} {}'), |
| 271 | ['a', 'b c', '']) |
| 272 | self.assertEqual(self.lex(r'a {b {c\n}}'), |
| 273 | ['a', 'b {c\\n}']) |
| 274 | self.assertEqual(self.lex(r'a {b\{}'), |
| 275 | ['a', 'b{']) |
| 276 | self.assertEqual(self.lex(r'{*}'), ['*']) |
| 277 | self.assertEqual(self.lex(r'{*} a'), ['*', 'a']) |
| 278 | self.assertEqual(self.lex(r'{*} a'), ['*', 'a']) |
| 279 | self.assertEqual(self.lex('{a\\\n b}'), |
| 280 | ['a b']) |
| 281 | |
| 282 | def test_quoted(self): |
| 283 | self.assertEqual(self.lex('a "b c"'), |
| 284 | ['a', 'b c']) |
| 285 | |
| 286 | def test_terminators(self): |
| 287 | self.assertEqual(self.lex('a\nb'), |
| 288 | ['a', (';',), 'b']) |
| 289 | self.assertEqual(self.lex('a;b'), |
| 290 | ['a', (';',), 'b']) |
| 291 | self.assertEqual(self.lex('a ; b'), |
| 292 | ['a', (';',), 'b']) |
| 293 | |
| 294 | class TestTclExecCommand(unittest.TestCase): |
| 295 | def parse(self, str): |
| 296 | return TclExecCommand(list(TclLexer(str).lex())).parse() |
| 297 | |
| 298 | def test_basic(self): |
| 299 | self.assertEqual(self.parse('echo hello'), |
| 300 | (False, False, |
| 301 | Pipeline([Command(['echo', 'hello'], [])], |
| 302 | False, True))) |
| 303 | self.assertEqual(self.parse('echo hello | grep hello'), |
| 304 | (False, False, |
| 305 | Pipeline([Command(['echo', 'hello'], []), |
| 306 | Command(['grep', 'hello'], [])], |
| 307 | False, True))) |
| 308 | |
| 309 | def test_redirect(self): |
| 310 | self.assertEqual(self.parse('echo hello > a >b >>c 2> d |& e'), |
| 311 | (False, False, |
| 312 | Pipeline([Command(['echo', 'hello'], |
Daniel Dunbar | ba3931b | 2009-09-08 05:46:28 +0000 | [diff] [blame] | 313 | [(('>&',2),'1'), |
| 314 | (('>',),'a'), |
Daniel Dunbar | be7ada7 | 2009-09-08 05:31:18 +0000 | [diff] [blame] | 315 | (('>',),'b'), |
| 316 | (('>>',),'c'), |
Daniel Dunbar | ba3931b | 2009-09-08 05:46:28 +0000 | [diff] [blame] | 317 | (('>',2),'d')]), |
Daniel Dunbar | be7ada7 | 2009-09-08 05:31:18 +0000 | [diff] [blame] | 318 | Command(['e'], [])], |
| 319 | False, True))) |
| 320 | |
| 321 | if __name__ == '__main__': |
| 322 | unittest.main() |