blob: 3081a785204edc6ade5803ea484813095a12b914 [file] [log] [blame]
Éric Araujo9bce3112011-07-27 18:29:31 +02001import io
Evan56624a92019-06-02 05:09:22 +10002import itertools
Gustavo Niemeyer68d8cef2003-04-17 21:31:33 +00003import shlex
Éric Araujo9bce3112011-07-27 18:29:31 +02004import string
5import unittest
Zackery Spytz975ac322020-04-01 07:58:55 -06006from unittest import mock
Neal Norwitzaa1ac542003-04-17 23:04:22 +00007
Gustavo Niemeyer68d8cef2003-04-17 21:31:33 +00008
9# The original test data set was from shellwords, by Hartmut Goebel.
10
11data = r"""x|x|
12foo bar|foo|bar|
13 foo bar|foo|bar|
14 foo bar |foo|bar|
15foo bar bla fasel|foo|bar|bla|fasel|
16x y z xxxx|x|y|z|xxxx|
17\x bar|\|x|bar|
18\ x bar|\|x|bar|
19\ bar|\|bar|
20foo \x bar|foo|\|x|bar|
21foo \ x bar|foo|\|x|bar|
22foo \ bar|foo|\|bar|
23foo "bar" bla|foo|"bar"|bla|
24"foo" "bar" "bla"|"foo"|"bar"|"bla"|
25"foo" bar "bla"|"foo"|bar|"bla"|
26"foo" bar bla|"foo"|bar|bla|
27foo 'bar' bla|foo|'bar'|bla|
28'foo' 'bar' 'bla'|'foo'|'bar'|'bla'|
29'foo' bar 'bla'|'foo'|bar|'bla'|
30'foo' bar bla|'foo'|bar|bla|
31blurb foo"bar"bar"fasel" baz|blurb|foo"bar"bar"fasel"|baz|
32blurb foo'bar'bar'fasel' baz|blurb|foo'bar'bar'fasel'|baz|
33""|""|
34''|''|
35foo "" bar|foo|""|bar|
36foo '' bar|foo|''|bar|
37foo "" "" "" bar|foo|""|""|""|bar|
38foo '' '' '' bar|foo|''|''|''|bar|
39\""|\|""|
40"\"|"\"|
41"foo\ bar"|"foo\ bar"|
42"foo\\ bar"|"foo\\ bar"|
43"foo\\ bar\"|"foo\\ bar\"|
44"foo\\" bar\""|"foo\\"|bar|\|""|
45"foo\\ bar\" dfadf"|"foo\\ bar\"|dfadf"|
46"foo\\\ bar\" dfadf"|"foo\\\ bar\"|dfadf"|
47"foo\\\x bar\" dfadf"|"foo\\\x bar\"|dfadf"|
48"foo\x bar\" dfadf"|"foo\x bar\"|dfadf"|
49\''|\|''|
50'foo\ bar'|'foo\ bar'|
51'foo\\ bar'|'foo\\ bar'|
52"foo\\\x bar\" df'a\ 'df'|"foo\\\x bar\"|df'a|\|'df'|
53\"foo"|\|"foo"|
54\"foo"\x|\|"foo"|\|x|
55"foo\x"|"foo\x"|
56"foo\ "|"foo\ "|
57foo\ xx|foo|\|xx|
58foo\ x\x|foo|\|x|\|x|
59foo\ x\x\""|foo|\|x|\|x|\|""|
60"foo\ x\x"|"foo\ x\x"|
61"foo\ x\x\\"|"foo\ x\x\\"|
62"foo\ x\x\\""foobar"|"foo\ x\x\\"|"foobar"|
63"foo\ x\x\\"\''"foobar"|"foo\ x\x\\"|\|''|"foobar"|
64"foo\ x\x\\"\'"fo'obar"|"foo\ x\x\\"|\|'"fo'|obar"|
65"foo\ x\x\\"\'"fo'obar" 'don'\''t'|"foo\ x\x\\"|\|'"fo'|obar"|'don'|\|''|t'|
66'foo\ bar'|'foo\ bar'|
67'foo\\ bar'|'foo\\ bar'|
68foo\ bar|foo|\|bar|
69foo#bar\nbaz|foobaz|
70:-) ;-)|:|-|)|;|-|)|
Antoine Pitroud72402e2010-10-27 18:52:48 +000071áéíóú|á|é|í|ó|ú|
Gustavo Niemeyer68d8cef2003-04-17 21:31:33 +000072"""
73
74posix_data = r"""x|x|
75foo bar|foo|bar|
76 foo bar|foo|bar|
77 foo bar |foo|bar|
78foo bar bla fasel|foo|bar|bla|fasel|
79x y z xxxx|x|y|z|xxxx|
80\x bar|x|bar|
81\ x bar| x|bar|
82\ bar| bar|
83foo \x bar|foo|x|bar|
84foo \ x bar|foo| x|bar|
85foo \ bar|foo| bar|
86foo "bar" bla|foo|bar|bla|
87"foo" "bar" "bla"|foo|bar|bla|
88"foo" bar "bla"|foo|bar|bla|
89"foo" bar bla|foo|bar|bla|
90foo 'bar' bla|foo|bar|bla|
91'foo' 'bar' 'bla'|foo|bar|bla|
92'foo' bar 'bla'|foo|bar|bla|
93'foo' bar bla|foo|bar|bla|
94blurb foo"bar"bar"fasel" baz|blurb|foobarbarfasel|baz|
95blurb foo'bar'bar'fasel' baz|blurb|foobarbarfasel|baz|
96""||
97''||
98foo "" bar|foo||bar|
99foo '' bar|foo||bar|
100foo "" "" "" bar|foo||||bar|
101foo '' '' '' bar|foo||||bar|
102\"|"|
103"\""|"|
104"foo\ bar"|foo\ bar|
105"foo\\ bar"|foo\ bar|
106"foo\\ bar\""|foo\ bar"|
107"foo\\" bar\"|foo\|bar"|
108"foo\\ bar\" dfadf"|foo\ bar" dfadf|
109"foo\\\ bar\" dfadf"|foo\\ bar" dfadf|
110"foo\\\x bar\" dfadf"|foo\\x bar" dfadf|
111"foo\x bar\" dfadf"|foo\x bar" dfadf|
112\'|'|
113'foo\ bar'|foo\ bar|
114'foo\\ bar'|foo\\ bar|
115"foo\\\x bar\" df'a\ 'df"|foo\\x bar" df'a\ 'df|
116\"foo|"foo|
117\"foo\x|"foox|
118"foo\x"|foo\x|
119"foo\ "|foo\ |
120foo\ xx|foo xx|
121foo\ x\x|foo xx|
122foo\ x\x\"|foo xx"|
123"foo\ x\x"|foo\ x\x|
124"foo\ x\x\\"|foo\ x\x\|
125"foo\ x\x\\""foobar"|foo\ x\x\foobar|
126"foo\ x\x\\"\'"foobar"|foo\ x\x\'foobar|
127"foo\ x\x\\"\'"fo'obar"|foo\ x\x\'fo'obar|
128"foo\ x\x\\"\'"fo'obar" 'don'\''t'|foo\ x\x\'fo'obar|don't|
129"foo\ x\x\\"\'"fo'obar" 'don'\''t' \\|foo\ x\x\'fo'obar|don't|\|
130'foo\ bar'|foo\ bar|
131'foo\\ bar'|foo\\ bar|
132foo\ bar|foo bar|
133foo#bar\nbaz|foo|baz|
134:-) ;-)|:-)|;-)|
Antoine Pitroud72402e2010-10-27 18:52:48 +0000135áéíóú|áéíóú|
Gustavo Niemeyer68d8cef2003-04-17 21:31:33 +0000136"""
137
138class ShlexTest(unittest.TestCase):
139 def setUp(self):
140 self.data = [x.split("|")[:-1]
141 for x in data.splitlines()]
142 self.posix_data = [x.split("|")[:-1]
143 for x in posix_data.splitlines()]
144 for item in self.data:
145 item[0] = item[0].replace(r"\n", "\n")
146 for item in self.posix_data:
147 item[0] = item[0].replace(r"\n", "\n")
148
Gustavo Niemeyer48f3dcc2003-04-20 01:57:03 +0000149 def splitTest(self, data, comments):
Gustavo Niemeyer68d8cef2003-04-17 21:31:33 +0000150 for i in range(len(data)):
Gustavo Niemeyer48f3dcc2003-04-20 01:57:03 +0000151 l = shlex.split(data[i][0], comments=comments)
Gustavo Niemeyer68d8cef2003-04-17 21:31:33 +0000152 self.assertEqual(l, data[i][1:],
153 "%s: %s != %s" %
154 (data[i][0], l, data[i][1:]))
155
156 def oldSplit(self, s):
157 ret = []
Walter Dörwald2c849f22007-06-12 17:43:43 +0000158 lex = shlex.shlex(io.StringIO(s))
Gustavo Niemeyer68d8cef2003-04-17 21:31:33 +0000159 tok = lex.get_token()
160 while tok:
161 ret.append(tok)
162 tok = lex.get_token()
163 return ret
Tim Peters0eadaac2003-04-24 16:02:54 +0000164
Zackery Spytz975ac322020-04-01 07:58:55 -0600165 @mock.patch('sys.stdin', io.StringIO())
166 def testSplitNoneDeprecation(self):
167 with self.assertWarns(DeprecationWarning):
168 shlex.split(None)
169
Gustavo Niemeyer68d8cef2003-04-17 21:31:33 +0000170 def testSplitPosix(self):
171 """Test data splitting with posix parser"""
Tim Peters0eadaac2003-04-24 16:02:54 +0000172 self.splitTest(self.posix_data, comments=True)
Gustavo Niemeyer68d8cef2003-04-17 21:31:33 +0000173
174 def testCompat(self):
175 """Test compatibility interface"""
176 for i in range(len(self.data)):
177 l = self.oldSplit(self.data[i][0])
178 self.assertEqual(l, self.data[i][1:],
179 "%s: %s != %s" %
180 (self.data[i][0], l, self.data[i][1:]))
181
Vinay Sajipc1f974c2016-07-29 22:35:03 +0100182 def testSyntaxSplitAmpersandAndPipe(self):
183 """Test handling of syntax splitting of &, |"""
184 # Could take these forms: &&, &, |&, ;&, ;;&
185 # of course, the same applies to | and ||
186 # these should all parse to the same output
187 for delimiter in ('&&', '&', '|&', ';&', ';;&',
188 '||', '|', '&|', ';|', ';;|'):
189 src = ['echo hi %s echo bye' % delimiter,
190 'echo hi%secho bye' % delimiter]
191 ref = ['echo', 'hi', delimiter, 'echo', 'bye']
Evan56624a92019-06-02 05:09:22 +1000192 for ss, ws in itertools.product(src, (False, True)):
Vinay Sajipc1f974c2016-07-29 22:35:03 +0100193 s = shlex.shlex(ss, punctuation_chars=True)
Evan56624a92019-06-02 05:09:22 +1000194 s.whitespace_split = ws
Vinay Sajipc1f974c2016-07-29 22:35:03 +0100195 result = list(s)
Evan56624a92019-06-02 05:09:22 +1000196 self.assertEqual(ref, result,
197 "While splitting '%s' [ws=%s]" % (ss, ws))
Vinay Sajipc1f974c2016-07-29 22:35:03 +0100198
199 def testSyntaxSplitSemicolon(self):
200 """Test handling of syntax splitting of ;"""
201 # Could take these forms: ;, ;;, ;&, ;;&
202 # these should all parse to the same output
203 for delimiter in (';', ';;', ';&', ';;&'):
204 src = ['echo hi %s echo bye' % delimiter,
205 'echo hi%s echo bye' % delimiter,
206 'echo hi%secho bye' % delimiter]
207 ref = ['echo', 'hi', delimiter, 'echo', 'bye']
Evan56624a92019-06-02 05:09:22 +1000208 for ss, ws in itertools.product(src, (False, True)):
Vinay Sajipc1f974c2016-07-29 22:35:03 +0100209 s = shlex.shlex(ss, punctuation_chars=True)
Evan56624a92019-06-02 05:09:22 +1000210 s.whitespace_split = ws
Vinay Sajipc1f974c2016-07-29 22:35:03 +0100211 result = list(s)
Evan56624a92019-06-02 05:09:22 +1000212 self.assertEqual(ref, result,
213 "While splitting '%s' [ws=%s]" % (ss, ws))
Vinay Sajipc1f974c2016-07-29 22:35:03 +0100214
215 def testSyntaxSplitRedirect(self):
216 """Test handling of syntax splitting of >"""
217 # of course, the same applies to <, |
218 # these should all parse to the same output
219 for delimiter in ('<', '|'):
220 src = ['echo hi %s out' % delimiter,
221 'echo hi%s out' % delimiter,
222 'echo hi%sout' % delimiter]
223 ref = ['echo', 'hi', delimiter, 'out']
Evan56624a92019-06-02 05:09:22 +1000224 for ss, ws in itertools.product(src, (False, True)):
Vinay Sajipc1f974c2016-07-29 22:35:03 +0100225 s = shlex.shlex(ss, punctuation_chars=True)
226 result = list(s)
Evan56624a92019-06-02 05:09:22 +1000227 self.assertEqual(ref, result,
228 "While splitting '%s' [ws=%s]" % (ss, ws))
Vinay Sajipc1f974c2016-07-29 22:35:03 +0100229
230 def testSyntaxSplitParen(self):
231 """Test handling of syntax splitting of ()"""
232 # these should all parse to the same output
233 src = ['( echo hi )',
234 '(echo hi)']
235 ref = ['(', 'echo', 'hi', ')']
Evan56624a92019-06-02 05:09:22 +1000236 for ss, ws in itertools.product(src, (False, True)):
Vinay Sajipc1f974c2016-07-29 22:35:03 +0100237 s = shlex.shlex(ss, punctuation_chars=True)
Evan56624a92019-06-02 05:09:22 +1000238 s.whitespace_split = ws
Vinay Sajipc1f974c2016-07-29 22:35:03 +0100239 result = list(s)
Evan56624a92019-06-02 05:09:22 +1000240 self.assertEqual(ref, result,
241 "While splitting '%s' [ws=%s]" % (ss, ws))
Vinay Sajipc1f974c2016-07-29 22:35:03 +0100242
243 def testSyntaxSplitCustom(self):
244 """Test handling of syntax splitting with custom chars"""
Evan56624a92019-06-02 05:09:22 +1000245 ss = "~/a&&b-c --color=auto||d *.py?"
Vinay Sajipc1f974c2016-07-29 22:35:03 +0100246 ref = ['~/a', '&', '&', 'b-c', '--color=auto', '||', 'd', '*.py?']
Vinay Sajipc1f974c2016-07-29 22:35:03 +0100247 s = shlex.shlex(ss, punctuation_chars="|")
248 result = list(s)
Evan56624a92019-06-02 05:09:22 +1000249 self.assertEqual(ref, result, "While splitting '%s' [ws=False]" % ss)
250 ref = ['~/a&&b-c', '--color=auto', '||', 'd', '*.py?']
251 s = shlex.shlex(ss, punctuation_chars="|")
252 s.whitespace_split = True
253 result = list(s)
254 self.assertEqual(ref, result, "While splitting '%s' [ws=True]" % ss)
Vinay Sajipc1f974c2016-07-29 22:35:03 +0100255
256 def testTokenTypes(self):
257 """Test that tokens are split with types as expected."""
258 for source, expected in (
259 ('a && b || c',
260 [('a', 'a'), ('&&', 'c'), ('b', 'a'),
261 ('||', 'c'), ('c', 'a')]),
262 ):
263 s = shlex.shlex(source, punctuation_chars=True)
264 observed = []
265 while True:
266 t = s.get_token()
267 if t == s.eof:
268 break
269 if t[0] in s.punctuation_chars:
270 tt = 'c'
271 else:
272 tt = 'a'
273 observed.append((t, tt))
274 self.assertEqual(observed, expected)
275
276 def testPunctuationInWordChars(self):
277 """Test that any punctuation chars are removed from wordchars"""
278 s = shlex.shlex('a_b__c', punctuation_chars='_')
279 self.assertNotIn('_', s.wordchars)
280 self.assertEqual(list(s), ['a', '_', 'b', '__', 'c'])
281
282 def testPunctuationWithWhitespaceSplit(self):
283 """Test that with whitespace_split, behaviour is as expected"""
284 s = shlex.shlex('a && b || c', punctuation_chars='&')
285 # whitespace_split is False, so splitting will be based on
286 # punctuation_chars
287 self.assertEqual(list(s), ['a', '&&', 'b', '|', '|', 'c'])
288 s = shlex.shlex('a && b || c', punctuation_chars='&')
289 s.whitespace_split = True
290 # whitespace_split is True, so splitting will be based on
291 # white space
292 self.assertEqual(list(s), ['a', '&&', 'b', '||', 'c'])
293
Vinay Sajip61eda722017-01-15 10:06:52 +0000294 def testPunctuationWithPosix(self):
295 """Test that punctuation_chars and posix behave correctly together."""
296 # see Issue #29132
297 s = shlex.shlex('f >"abc"', posix=True, punctuation_chars=True)
298 self.assertEqual(list(s), ['f', '>', 'abc'])
299 s = shlex.shlex('f >\\"abc\\"', posix=True, punctuation_chars=True)
300 self.assertEqual(list(s), ['f', '>', '"abc"'])
301
Vinay Sajipc1f974c2016-07-29 22:35:03 +0100302 def testEmptyStringHandling(self):
303 """Test that parsing of empty strings is correctly handled."""
304 # see Issue #21999
305 expected = ['', ')', 'abc']
306 for punct in (False, True):
307 s = shlex.shlex("'')abc", posix=True, punctuation_chars=punct)
308 slist = list(s)
309 self.assertEqual(slist, expected)
310 expected = ["''", ')', 'abc']
311 s = shlex.shlex("'')abc", punctuation_chars=True)
312 self.assertEqual(list(s), expected)
313
Evan56624a92019-06-02 05:09:22 +1000314 def testUnicodeHandling(self):
315 """Test punctuation_chars and whitespace_split handle unicode."""
316 ss = "\u2119\u01b4\u2602\u210c\u00f8\u1f24"
317 # Should be parsed as one complete token (whitespace_split=True).
318 ref = ['\u2119\u01b4\u2602\u210c\u00f8\u1f24']
319 s = shlex.shlex(ss, punctuation_chars=True)
320 s.whitespace_split = True
321 self.assertEqual(list(s), ref)
322 # Without whitespace_split, uses wordchars and splits on all.
323 ref = ['\u2119', '\u01b4', '\u2602', '\u210c', '\u00f8', '\u1f24']
324 s = shlex.shlex(ss, punctuation_chars=True)
325 self.assertEqual(list(s), ref)
326
Éric Araujo9bce3112011-07-27 18:29:31 +0200327 def testQuote(self):
328 safeunquoted = string.ascii_letters + string.digits + '@%_-+=:,./'
Éric Araujo7fc03942011-08-09 23:18:06 +0200329 unicode_sample = '\xe9\xe0\xdf' # e + acute accent, a + grave, sharp s
330 unsafe = '"`$\\!' + unicode_sample
Éric Araujo9bce3112011-07-27 18:29:31 +0200331
332 self.assertEqual(shlex.quote(''), "''")
333 self.assertEqual(shlex.quote(safeunquoted), safeunquoted)
334 self.assertEqual(shlex.quote('test file name'), "'test file name'")
335 for u in unsafe:
336 self.assertEqual(shlex.quote('test%sname' % u),
337 "'test%sname'" % u)
338 for u in unsafe:
339 self.assertEqual(shlex.quote("test%s'name'" % u),
340 "'test%s'\"'\"'name'\"'\"''" % u)
341
Bo Baylesca804952019-05-29 03:06:12 -0500342 def testJoin(self):
343 for split_command, command in [
344 (['a ', 'b'], "'a ' b"),
345 (['a', ' b'], "a ' b'"),
346 (['a', ' ', 'b'], "a ' ' b"),
347 (['"a', 'b"'], '\'"a\' \'b"\''),
348 ]:
349 with self.subTest(command=command):
350 joined = shlex.join(split_command)
351 self.assertEqual(joined, command)
352
353 def testJoinRoundtrip(self):
354 all_data = self.data + self.posix_data
355 for command, *split_command in all_data:
356 with self.subTest(command=command):
357 joined = shlex.join(split_command)
358 resplit = shlex.split(joined)
359 self.assertEqual(split_command, resplit)
360
Alex972cf5c2019-09-11 14:04:04 +0300361 def testPunctuationCharsReadOnly(self):
362 punctuation_chars = "/|$%^"
363 shlex_instance = shlex.shlex(punctuation_chars=punctuation_chars)
364 self.assertEqual(shlex_instance.punctuation_chars, punctuation_chars)
365 with self.assertRaises(AttributeError):
366 shlex_instance.punctuation_chars = False
367
Bo Baylesca804952019-05-29 03:06:12 -0500368
Gustavo Niemeyer68d8cef2003-04-17 21:31:33 +0000369# Allow this test to be used with old shlex.py
370if not getattr(shlex, "split", None):
371 for methname in dir(ShlexTest):
372 if methname.startswith("test") and methname != "testCompat":
373 delattr(ShlexTest, methname)
374
375if __name__ == "__main__":
Zachary Ware38c707e2015-04-13 15:00:43 -0500376 unittest.main()