| import io |
| import itertools |
| import shlex |
| import string |
| import unittest |
| |
| |
| |
| # The original test data set was from shellwords, by Hartmut Goebel. |
| |
| data = r"""x|x| |
| foo bar|foo|bar| |
| foo bar|foo|bar| |
| foo bar |foo|bar| |
| foo bar bla fasel|foo|bar|bla|fasel| |
| x y z xxxx|x|y|z|xxxx| |
| \x bar|\|x|bar| |
| \ x bar|\|x|bar| |
| \ bar|\|bar| |
| foo \x bar|foo|\|x|bar| |
| foo \ x bar|foo|\|x|bar| |
| foo \ bar|foo|\|bar| |
| foo "bar" bla|foo|"bar"|bla| |
| "foo" "bar" "bla"|"foo"|"bar"|"bla"| |
| "foo" bar "bla"|"foo"|bar|"bla"| |
| "foo" bar bla|"foo"|bar|bla| |
| foo 'bar' bla|foo|'bar'|bla| |
| 'foo' 'bar' 'bla'|'foo'|'bar'|'bla'| |
| 'foo' bar 'bla'|'foo'|bar|'bla'| |
| 'foo' bar bla|'foo'|bar|bla| |
| blurb foo"bar"bar"fasel" baz|blurb|foo"bar"bar"fasel"|baz| |
| blurb foo'bar'bar'fasel' baz|blurb|foo'bar'bar'fasel'|baz| |
| ""|""| |
| ''|''| |
| foo "" bar|foo|""|bar| |
| foo '' bar|foo|''|bar| |
| foo "" "" "" bar|foo|""|""|""|bar| |
| foo '' '' '' bar|foo|''|''|''|bar| |
| \""|\|""| |
| "\"|"\"| |
| "foo\ bar"|"foo\ bar"| |
| "foo\\ bar"|"foo\\ bar"| |
| "foo\\ bar\"|"foo\\ bar\"| |
| "foo\\" bar\""|"foo\\"|bar|\|""| |
| "foo\\ bar\" dfadf"|"foo\\ bar\"|dfadf"| |
| "foo\\\ bar\" dfadf"|"foo\\\ bar\"|dfadf"| |
| "foo\\\x bar\" dfadf"|"foo\\\x bar\"|dfadf"| |
| "foo\x bar\" dfadf"|"foo\x bar\"|dfadf"| |
| \''|\|''| |
| 'foo\ bar'|'foo\ bar'| |
| 'foo\\ bar'|'foo\\ bar'| |
| "foo\\\x bar\" df'a\ 'df'|"foo\\\x bar\"|df'a|\|'df'| |
| \"foo"|\|"foo"| |
| \"foo"\x|\|"foo"|\|x| |
| "foo\x"|"foo\x"| |
| "foo\ "|"foo\ "| |
| foo\ xx|foo|\|xx| |
| foo\ x\x|foo|\|x|\|x| |
| foo\ x\x\""|foo|\|x|\|x|\|""| |
| "foo\ x\x"|"foo\ x\x"| |
| "foo\ x\x\\"|"foo\ x\x\\"| |
| "foo\ x\x\\""foobar"|"foo\ x\x\\"|"foobar"| |
| "foo\ x\x\\"\''"foobar"|"foo\ x\x\\"|\|''|"foobar"| |
| "foo\ x\x\\"\'"fo'obar"|"foo\ x\x\\"|\|'"fo'|obar"| |
| "foo\ x\x\\"\'"fo'obar" 'don'\''t'|"foo\ x\x\\"|\|'"fo'|obar"|'don'|\|''|t'| |
| 'foo\ bar'|'foo\ bar'| |
| 'foo\\ bar'|'foo\\ bar'| |
| foo\ bar|foo|\|bar| |
| foo#bar\nbaz|foobaz| |
| :-) ;-)|:|-|)|;|-|)| |
| áéíóú|á|é|í|ó|ú| |
| """ |
| |
| posix_data = r"""x|x| |
| foo bar|foo|bar| |
| foo bar|foo|bar| |
| foo bar |foo|bar| |
| foo bar bla fasel|foo|bar|bla|fasel| |
| x y z xxxx|x|y|z|xxxx| |
| \x bar|x|bar| |
| \ x bar| x|bar| |
| \ bar| bar| |
| foo \x bar|foo|x|bar| |
| foo \ x bar|foo| x|bar| |
| foo \ bar|foo| bar| |
| foo "bar" bla|foo|bar|bla| |
| "foo" "bar" "bla"|foo|bar|bla| |
| "foo" bar "bla"|foo|bar|bla| |
| "foo" bar bla|foo|bar|bla| |
| foo 'bar' bla|foo|bar|bla| |
| 'foo' 'bar' 'bla'|foo|bar|bla| |
| 'foo' bar 'bla'|foo|bar|bla| |
| 'foo' bar bla|foo|bar|bla| |
| blurb foo"bar"bar"fasel" baz|blurb|foobarbarfasel|baz| |
| blurb foo'bar'bar'fasel' baz|blurb|foobarbarfasel|baz| |
| ""|| |
| ''|| |
| foo "" bar|foo||bar| |
| foo '' bar|foo||bar| |
| foo "" "" "" bar|foo||||bar| |
| foo '' '' '' bar|foo||||bar| |
| \"|"| |
| "\""|"| |
| "foo\ bar"|foo\ bar| |
| "foo\\ bar"|foo\ bar| |
| "foo\\ bar\""|foo\ bar"| |
| "foo\\" bar\"|foo\|bar"| |
| "foo\\ bar\" dfadf"|foo\ bar" dfadf| |
| "foo\\\ bar\" dfadf"|foo\\ bar" dfadf| |
| "foo\\\x bar\" dfadf"|foo\\x bar" dfadf| |
| "foo\x bar\" dfadf"|foo\x bar" dfadf| |
| \'|'| |
| 'foo\ bar'|foo\ bar| |
| 'foo\\ bar'|foo\\ bar| |
| "foo\\\x bar\" df'a\ 'df"|foo\\x bar" df'a\ 'df| |
| \"foo|"foo| |
| \"foo\x|"foox| |
| "foo\x"|foo\x| |
| "foo\ "|foo\ | |
| foo\ xx|foo xx| |
| foo\ x\x|foo xx| |
| foo\ x\x\"|foo xx"| |
| "foo\ x\x"|foo\ x\x| |
| "foo\ x\x\\"|foo\ x\x\| |
| "foo\ x\x\\""foobar"|foo\ x\x\foobar| |
| "foo\ x\x\\"\'"foobar"|foo\ x\x\'foobar| |
| "foo\ x\x\\"\'"fo'obar"|foo\ x\x\'fo'obar| |
| "foo\ x\x\\"\'"fo'obar" 'don'\''t'|foo\ x\x\'fo'obar|don't| |
| "foo\ x\x\\"\'"fo'obar" 'don'\''t' \\|foo\ x\x\'fo'obar|don't|\| |
| 'foo\ bar'|foo\ bar| |
| 'foo\\ bar'|foo\\ bar| |
| foo\ bar|foo bar| |
| foo#bar\nbaz|foo|baz| |
| :-) ;-)|:-)|;-)| |
| áéíóú|áéíóú| |
| """ |
| |
| class ShlexTest(unittest.TestCase): |
| def setUp(self): |
| self.data = [x.split("|")[:-1] |
| for x in data.splitlines()] |
| self.posix_data = [x.split("|")[:-1] |
| for x in posix_data.splitlines()] |
| for item in self.data: |
| item[0] = item[0].replace(r"\n", "\n") |
| for item in self.posix_data: |
| item[0] = item[0].replace(r"\n", "\n") |
| |
| def splitTest(self, data, comments): |
| for i in range(len(data)): |
| l = shlex.split(data[i][0], comments=comments) |
| self.assertEqual(l, data[i][1:], |
| "%s: %s != %s" % |
| (data[i][0], l, data[i][1:])) |
| |
| def oldSplit(self, s): |
| ret = [] |
| lex = shlex.shlex(io.StringIO(s)) |
| tok = lex.get_token() |
| while tok: |
| ret.append(tok) |
| tok = lex.get_token() |
| return ret |
| |
| def testSplitPosix(self): |
| """Test data splitting with posix parser""" |
| self.splitTest(self.posix_data, comments=True) |
| |
| def testCompat(self): |
| """Test compatibility interface""" |
| for i in range(len(self.data)): |
| l = self.oldSplit(self.data[i][0]) |
| self.assertEqual(l, self.data[i][1:], |
| "%s: %s != %s" % |
| (self.data[i][0], l, self.data[i][1:])) |
| |
| def testSyntaxSplitAmpersandAndPipe(self): |
| """Test handling of syntax splitting of &, |""" |
| # Could take these forms: &&, &, |&, ;&, ;;& |
| # of course, the same applies to | and || |
| # these should all parse to the same output |
| for delimiter in ('&&', '&', '|&', ';&', ';;&', |
| '||', '|', '&|', ';|', ';;|'): |
| src = ['echo hi %s echo bye' % delimiter, |
| 'echo hi%secho bye' % delimiter] |
| ref = ['echo', 'hi', delimiter, 'echo', 'bye'] |
| for ss, ws in itertools.product(src, (False, True)): |
| s = shlex.shlex(ss, punctuation_chars=True) |
| s.whitespace_split = ws |
| result = list(s) |
| self.assertEqual(ref, result, |
| "While splitting '%s' [ws=%s]" % (ss, ws)) |
| |
| def testSyntaxSplitSemicolon(self): |
| """Test handling of syntax splitting of ;""" |
| # Could take these forms: ;, ;;, ;&, ;;& |
| # these should all parse to the same output |
| for delimiter in (';', ';;', ';&', ';;&'): |
| src = ['echo hi %s echo bye' % delimiter, |
| 'echo hi%s echo bye' % delimiter, |
| 'echo hi%secho bye' % delimiter] |
| ref = ['echo', 'hi', delimiter, 'echo', 'bye'] |
| for ss, ws in itertools.product(src, (False, True)): |
| s = shlex.shlex(ss, punctuation_chars=True) |
| s.whitespace_split = ws |
| result = list(s) |
| self.assertEqual(ref, result, |
| "While splitting '%s' [ws=%s]" % (ss, ws)) |
| |
| def testSyntaxSplitRedirect(self): |
| """Test handling of syntax splitting of >""" |
| # of course, the same applies to <, | |
| # these should all parse to the same output |
| for delimiter in ('<', '|'): |
| src = ['echo hi %s out' % delimiter, |
| 'echo hi%s out' % delimiter, |
| 'echo hi%sout' % delimiter] |
| ref = ['echo', 'hi', delimiter, 'out'] |
| for ss, ws in itertools.product(src, (False, True)): |
| s = shlex.shlex(ss, punctuation_chars=True) |
| result = list(s) |
| self.assertEqual(ref, result, |
| "While splitting '%s' [ws=%s]" % (ss, ws)) |
| |
| def testSyntaxSplitParen(self): |
| """Test handling of syntax splitting of ()""" |
| # these should all parse to the same output |
| src = ['( echo hi )', |
| '(echo hi)'] |
| ref = ['(', 'echo', 'hi', ')'] |
| for ss, ws in itertools.product(src, (False, True)): |
| s = shlex.shlex(ss, punctuation_chars=True) |
| s.whitespace_split = ws |
| result = list(s) |
| self.assertEqual(ref, result, |
| "While splitting '%s' [ws=%s]" % (ss, ws)) |
| |
| def testSyntaxSplitCustom(self): |
| """Test handling of syntax splitting with custom chars""" |
| ss = "~/a&&b-c --color=auto||d *.py?" |
| ref = ['~/a', '&', '&', 'b-c', '--color=auto', '||', 'd', '*.py?'] |
| s = shlex.shlex(ss, punctuation_chars="|") |
| result = list(s) |
| self.assertEqual(ref, result, "While splitting '%s' [ws=False]" % ss) |
| ref = ['~/a&&b-c', '--color=auto', '||', 'd', '*.py?'] |
| s = shlex.shlex(ss, punctuation_chars="|") |
| s.whitespace_split = True |
| result = list(s) |
| self.assertEqual(ref, result, "While splitting '%s' [ws=True]" % ss) |
| |
| def testTokenTypes(self): |
| """Test that tokens are split with types as expected.""" |
| for source, expected in ( |
| ('a && b || c', |
| [('a', 'a'), ('&&', 'c'), ('b', 'a'), |
| ('||', 'c'), ('c', 'a')]), |
| ): |
| s = shlex.shlex(source, punctuation_chars=True) |
| observed = [] |
| while True: |
| t = s.get_token() |
| if t == s.eof: |
| break |
| if t[0] in s.punctuation_chars: |
| tt = 'c' |
| else: |
| tt = 'a' |
| observed.append((t, tt)) |
| self.assertEqual(observed, expected) |
| |
| def testPunctuationInWordChars(self): |
| """Test that any punctuation chars are removed from wordchars""" |
| s = shlex.shlex('a_b__c', punctuation_chars='_') |
| self.assertNotIn('_', s.wordchars) |
| self.assertEqual(list(s), ['a', '_', 'b', '__', 'c']) |
| |
| def testPunctuationWithWhitespaceSplit(self): |
| """Test that with whitespace_split, behaviour is as expected""" |
| s = shlex.shlex('a && b || c', punctuation_chars='&') |
| # whitespace_split is False, so splitting will be based on |
| # punctuation_chars |
| self.assertEqual(list(s), ['a', '&&', 'b', '|', '|', 'c']) |
| s = shlex.shlex('a && b || c', punctuation_chars='&') |
| s.whitespace_split = True |
| # whitespace_split is True, so splitting will be based on |
| # white space |
| self.assertEqual(list(s), ['a', '&&', 'b', '||', 'c']) |
| |
| def testPunctuationWithPosix(self): |
| """Test that punctuation_chars and posix behave correctly together.""" |
| # see Issue #29132 |
| s = shlex.shlex('f >"abc"', posix=True, punctuation_chars=True) |
| self.assertEqual(list(s), ['f', '>', 'abc']) |
| s = shlex.shlex('f >\\"abc\\"', posix=True, punctuation_chars=True) |
| self.assertEqual(list(s), ['f', '>', '"abc"']) |
| |
| def testEmptyStringHandling(self): |
| """Test that parsing of empty strings is correctly handled.""" |
| # see Issue #21999 |
| expected = ['', ')', 'abc'] |
| for punct in (False, True): |
| s = shlex.shlex("'')abc", posix=True, punctuation_chars=punct) |
| slist = list(s) |
| self.assertEqual(slist, expected) |
| expected = ["''", ')', 'abc'] |
| s = shlex.shlex("'')abc", punctuation_chars=True) |
| self.assertEqual(list(s), expected) |
| |
| def testUnicodeHandling(self): |
| """Test punctuation_chars and whitespace_split handle unicode.""" |
| ss = "\u2119\u01b4\u2602\u210c\u00f8\u1f24" |
| # Should be parsed as one complete token (whitespace_split=True). |
| ref = ['\u2119\u01b4\u2602\u210c\u00f8\u1f24'] |
| s = shlex.shlex(ss, punctuation_chars=True) |
| s.whitespace_split = True |
| self.assertEqual(list(s), ref) |
| # Without whitespace_split, uses wordchars and splits on all. |
| ref = ['\u2119', '\u01b4', '\u2602', '\u210c', '\u00f8', '\u1f24'] |
| s = shlex.shlex(ss, punctuation_chars=True) |
| self.assertEqual(list(s), ref) |
| |
| def testQuote(self): |
| safeunquoted = string.ascii_letters + string.digits + '@%_-+=:,./' |
| unicode_sample = '\xe9\xe0\xdf' # e + acute accent, a + grave, sharp s |
| unsafe = '"`$\\!' + unicode_sample |
| |
| self.assertEqual(shlex.quote(''), "''") |
| self.assertEqual(shlex.quote(safeunquoted), safeunquoted) |
| self.assertEqual(shlex.quote('test file name'), "'test file name'") |
| for u in unsafe: |
| self.assertEqual(shlex.quote('test%sname' % u), |
| "'test%sname'" % u) |
| for u in unsafe: |
| self.assertEqual(shlex.quote("test%s'name'" % u), |
| "'test%s'\"'\"'name'\"'\"''" % u) |
| |
| def testJoin(self): |
| for split_command, command in [ |
| (['a ', 'b'], "'a ' b"), |
| (['a', ' b'], "a ' b'"), |
| (['a', ' ', 'b'], "a ' ' b"), |
| (['"a', 'b"'], '\'"a\' \'b"\''), |
| ]: |
| with self.subTest(command=command): |
| joined = shlex.join(split_command) |
| self.assertEqual(joined, command) |
| |
| def testJoinRoundtrip(self): |
| all_data = self.data + self.posix_data |
| for command, *split_command in all_data: |
| with self.subTest(command=command): |
| joined = shlex.join(split_command) |
| resplit = shlex.split(joined) |
| self.assertEqual(split_command, resplit) |
| |
| |
| # Allow this test to be used with old shlex.py |
| if not getattr(shlex, "split", None): |
| for methname in dir(ShlexTest): |
| if methname.startswith("test") and methname != "testCompat": |
| delattr(ShlexTest, methname) |
| |
| if __name__ == "__main__": |
| unittest.main() |