blob: 0cea2edc32afa29eaceff983903830b9a3bc8da8 [file] [log] [blame]
Guido van Rossum29fd7122007-11-12 01:13:56 +00001r"""Test correct treatment of various string literals by the parser.
2
3There are four types of string literals:
4
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +01005 'abc' -- normal str
6 r'abc' -- raw str
7 b'xyz' -- normal bytes
8 br'xyz' | rb'xyz' -- raw bytes
Guido van Rossum29fd7122007-11-12 01:13:56 +00009
10The difference between normal and raw strings is of course that in a
11raw string, \ escapes (while still used to determine the end of the
12literal) are not interpreted, so that r'\x00' contains four
13characters: a backslash, an x, and two zeros; while '\x00' contains a
14single character (code point zero).
15
16The tricky thing is what should happen when non-ASCII bytes are used
17inside literals. For bytes literals, this is considered illegal. But
18for str literals, those bytes are supposed to be decoded using the
19encoding declared for the file (UTF-8 by default).
20
21We have to test this with various file encodings. We also test it with
22exec()/eval(), which uses a different code path.
23
24This file is really about correct treatment of encodings and
Ezio Melotti13925002011-03-16 11:05:33 +020025backslashes. It doesn't concern itself with issues like single
Guido van Rossum29fd7122007-11-12 01:13:56 +000026vs. double quotes or singly- vs. triply-quoted strings: that's dealt
27with elsewhere (I assume).
28"""
29
30import os
31import sys
32import shutil
33import tempfile
34import unittest
Serhiy Storchaka4c5b6ba2019-08-10 01:34:22 +030035import warnings
Guido van Rossum29fd7122007-11-12 01:13:56 +000036
37
38TEMPLATE = r"""# coding: %s
39a = 'x'
40assert ord(a) == 120
41b = '\x01'
42assert ord(b) == 1
43c = r'\x01'
44assert list(map(ord, c)) == [92, 120, 48, 49]
45d = '\x81'
46assert ord(d) == 0x81
47e = r'\x81'
48assert list(map(ord, e)) == [92, 120, 56, 49]
49f = '\u1881'
50assert ord(f) == 0x1881
51g = r'\u1881'
52assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49]
Serhiy Storchaka5e61f142013-02-10 17:36:00 +020053h = '\U0001d120'
54assert ord(h) == 0x1d120
55i = r'\U0001d120'
56assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48]
Guido van Rossum29fd7122007-11-12 01:13:56 +000057"""
58
59
60def byte(i):
61 return bytes([i])
62
63
64class TestLiterals(unittest.TestCase):
65
Serhiy Storchakae7a4bb52019-02-19 08:30:15 +020066 from test.support import check_syntax_warning
67
Guido van Rossum29fd7122007-11-12 01:13:56 +000068 def setUp(self):
69 self.save_path = sys.path[:]
70 self.tmpdir = tempfile.mkdtemp()
71 sys.path.insert(0, self.tmpdir)
72
73 def tearDown(self):
Georg Brandl242631d2012-02-20 21:36:28 +010074 sys.path[:] = self.save_path
Guido van Rossum29fd7122007-11-12 01:13:56 +000075 shutil.rmtree(self.tmpdir, ignore_errors=True)
76
77 def test_template(self):
78 # Check that the template doesn't contain any non-printables
79 # except for \n.
80 for c in TEMPLATE:
81 assert c == '\n' or ' ' <= c <= '~', repr(c)
82
83 def test_eval_str_normal(self):
84 self.assertEqual(eval(""" 'x' """), 'x')
85 self.assertEqual(eval(r""" '\x01' """), chr(1))
86 self.assertEqual(eval(""" '\x01' """), chr(1))
87 self.assertEqual(eval(r""" '\x81' """), chr(0x81))
88 self.assertEqual(eval(""" '\x81' """), chr(0x81))
89 self.assertEqual(eval(r""" '\u1881' """), chr(0x1881))
90 self.assertEqual(eval(""" '\u1881' """), chr(0x1881))
Serhiy Storchaka5e61f142013-02-10 17:36:00 +020091 self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120))
92 self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120))
93
94 def test_eval_str_incomplete(self):
95 self.assertRaises(SyntaxError, eval, r""" '\x' """)
96 self.assertRaises(SyntaxError, eval, r""" '\x0' """)
97 self.assertRaises(SyntaxError, eval, r""" '\u' """)
98 self.assertRaises(SyntaxError, eval, r""" '\u0' """)
99 self.assertRaises(SyntaxError, eval, r""" '\u00' """)
100 self.assertRaises(SyntaxError, eval, r""" '\u000' """)
101 self.assertRaises(SyntaxError, eval, r""" '\U' """)
102 self.assertRaises(SyntaxError, eval, r""" '\U0' """)
103 self.assertRaises(SyntaxError, eval, r""" '\U00' """)
104 self.assertRaises(SyntaxError, eval, r""" '\U000' """)
105 self.assertRaises(SyntaxError, eval, r""" '\U0000' """)
106 self.assertRaises(SyntaxError, eval, r""" '\U00000' """)
107 self.assertRaises(SyntaxError, eval, r""" '\U000000' """)
108 self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000109
Eric V. Smith56466482016-10-31 14:46:26 -0400110 def test_eval_str_invalid_escape(self):
111 for b in range(1, 128):
112 if b in b"""\n\r"'01234567NU\\abfnrtuvx""":
113 continue
Serhiy Storchaka4c5b6ba2019-08-10 01:34:22 +0300114 with self.assertWarns(DeprecationWarning):
Eric V. Smith56466482016-10-31 14:46:26 -0400115 self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b))
Victor Stinnerf9cca362016-11-15 09:12:10 +0100116
Serhiy Storchaka4c5b6ba2019-08-10 01:34:22 +0300117 with warnings.catch_warnings(record=True) as w:
118 warnings.simplefilter('always', category=DeprecationWarning)
119 eval("'''\n\\z'''")
120 self.assertEqual(len(w), 1)
121 self.assertEqual(w[0].filename, '<string>')
122 self.assertEqual(w[0].lineno, 1)
123
124 with warnings.catch_warnings(record=True) as w:
125 warnings.simplefilter('error', category=DeprecationWarning)
126 with self.assertRaises(SyntaxError) as cm:
127 eval("'''\n\\z'''")
128 exc = cm.exception
129 self.assertEqual(w, [])
130 self.assertEqual(exc.filename, '<string>')
131 self.assertEqual(exc.lineno, 1)
Victor Stinnerf9cca362016-11-15 09:12:10 +0100132
Guido van Rossum29fd7122007-11-12 01:13:56 +0000133 def test_eval_str_raw(self):
134 self.assertEqual(eval(""" r'x' """), 'x')
135 self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
136 self.assertEqual(eval(""" r'\x01' """), chr(1))
137 self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81')
138 self.assertEqual(eval(""" r'\x81' """), chr(0x81))
139 self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881')
140 self.assertEqual(eval(""" r'\u1881' """), chr(0x1881))
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200141 self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120')
142 self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120))
Guido van Rossum29fd7122007-11-12 01:13:56 +0000143
144 def test_eval_bytes_normal(self):
145 self.assertEqual(eval(""" b'x' """), b'x')
146 self.assertEqual(eval(r""" b'\x01' """), byte(1))
147 self.assertEqual(eval(""" b'\x01' """), byte(1))
148 self.assertEqual(eval(r""" b'\x81' """), byte(0x81))
149 self.assertRaises(SyntaxError, eval, """ b'\x81' """)
R David Murray44b548d2016-09-08 13:59:53 -0400150 self.assertEqual(eval(r""" br'\u1881' """), b'\\' + b'u1881')
Guido van Rossum29fd7122007-11-12 01:13:56 +0000151 self.assertRaises(SyntaxError, eval, """ b'\u1881' """)
R David Murray44b548d2016-09-08 13:59:53 -0400152 self.assertEqual(eval(r""" br'\U0001d120' """), b'\\' + b'U0001d120')
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200153 self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """)
154
155 def test_eval_bytes_incomplete(self):
156 self.assertRaises(SyntaxError, eval, r""" b'\x' """)
157 self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000158
Eric V. Smith56466482016-10-31 14:46:26 -0400159 def test_eval_bytes_invalid_escape(self):
160 for b in range(1, 128):
161 if b in b"""\n\r"'01234567\\abfnrtvx""":
162 continue
Serhiy Storchaka4c5b6ba2019-08-10 01:34:22 +0300163 with self.assertWarns(DeprecationWarning):
Eric V. Smith56466482016-10-31 14:46:26 -0400164 self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b]))
Victor Stinnerf9cca362016-11-15 09:12:10 +0100165
Serhiy Storchaka4c5b6ba2019-08-10 01:34:22 +0300166 with warnings.catch_warnings(record=True) as w:
167 warnings.simplefilter('always', category=DeprecationWarning)
168 eval("b'''\n\\z'''")
169 self.assertEqual(len(w), 1)
170 self.assertEqual(w[0].filename, '<string>')
171 self.assertEqual(w[0].lineno, 1)
172
173 with warnings.catch_warnings(record=True) as w:
174 warnings.simplefilter('error', category=DeprecationWarning)
175 with self.assertRaises(SyntaxError) as cm:
176 eval("b'''\n\\z'''")
177 exc = cm.exception
178 self.assertEqual(w, [])
179 self.assertEqual(exc.filename, '<string>')
180 self.assertEqual(exc.lineno, 1)
Victor Stinnerf9cca362016-11-15 09:12:10 +0100181
Guido van Rossum29fd7122007-11-12 01:13:56 +0000182 def test_eval_bytes_raw(self):
183 self.assertEqual(eval(""" br'x' """), b'x')
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100184 self.assertEqual(eval(""" rb'x' """), b'x')
Guido van Rossum29fd7122007-11-12 01:13:56 +0000185 self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100186 self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01')
Guido van Rossum29fd7122007-11-12 01:13:56 +0000187 self.assertEqual(eval(""" br'\x01' """), byte(1))
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100188 self.assertEqual(eval(""" rb'\x01' """), byte(1))
Guido van Rossum29fd7122007-11-12 01:13:56 +0000189 self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100190 self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81")
Guido van Rossum29fd7122007-11-12 01:13:56 +0000191 self.assertRaises(SyntaxError, eval, """ br'\x81' """)
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100192 self.assertRaises(SyntaxError, eval, """ rb'\x81' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000193 self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100194 self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
Guido van Rossum29fd7122007-11-12 01:13:56 +0000195 self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100196 self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200197 self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120")
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200198 self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120")
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200199 self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """)
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200200 self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """)
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100201 self.assertRaises(SyntaxError, eval, """ bb'' """)
202 self.assertRaises(SyntaxError, eval, """ rr'' """)
203 self.assertRaises(SyntaxError, eval, """ brr'' """)
204 self.assertRaises(SyntaxError, eval, """ bbr'' """)
205 self.assertRaises(SyntaxError, eval, """ rrb'' """)
206 self.assertRaises(SyntaxError, eval, """ rbb'' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000207
Christian Heimes0b3847d2012-06-20 11:17:58 +0200208 def test_eval_str_u(self):
209 self.assertEqual(eval(""" u'x' """), 'x')
210 self.assertEqual(eval(""" U'\u00e4' """), 'ä')
211 self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
212 self.assertRaises(SyntaxError, eval, """ ur'' """)
213 self.assertRaises(SyntaxError, eval, """ ru'' """)
214 self.assertRaises(SyntaxError, eval, """ bu'' """)
215 self.assertRaises(SyntaxError, eval, """ ub'' """)
216
Guido van Rossum29fd7122007-11-12 01:13:56 +0000217 def check_encoding(self, encoding, extra=""):
218 modname = "xx_" + encoding.replace("-", "_")
219 fn = os.path.join(self.tmpdir, modname + ".py")
220 f = open(fn, "w", encoding=encoding)
221 try:
222 f.write(TEMPLATE % encoding)
223 f.write(extra)
224 finally:
225 f.close()
226 __import__(modname)
227 del sys.modules[modname]
228
229 def test_file_utf_8(self):
230 extra = "z = '\u1234'; assert ord(z) == 0x1234\n"
231 self.check_encoding("utf-8", extra)
232
233 def test_file_utf_8_error(self):
234 extra = "b'\x80'\n"
235 self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
236
237 def test_file_utf8(self):
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000238 self.check_encoding("utf-8")
Guido van Rossum29fd7122007-11-12 01:13:56 +0000239
240 def test_file_iso_8859_1(self):
241 self.check_encoding("iso-8859-1")
242
243 def test_file_latin_1(self):
244 self.check_encoding("latin-1")
245
246 def test_file_latin9(self):
247 self.check_encoding("latin9")
248
249
250if __name__ == "__main__":
Zachary Ware38c707e2015-04-13 15:00:43 -0500251 unittest.main()