blob: aba4fc46676245409954fb216b98a6db49ada5b2 [file] [log] [blame]
Guido van Rossum29fd7122007-11-12 01:13:56 +00001r"""Test correct treatment of various string literals by the parser.
2
3There are four types of string literals:
4
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +01005 'abc' -- normal str
6 r'abc' -- raw str
7 b'xyz' -- normal bytes
8 br'xyz' | rb'xyz' -- raw bytes
Guido van Rossum29fd7122007-11-12 01:13:56 +00009
10The difference between normal and raw strings is of course that in a
11raw string, \ escapes (while still used to determine the end of the
12literal) are not interpreted, so that r'\x00' contains four
13characters: a backslash, an x, and two zeros; while '\x00' contains a
14single character (code point zero).
15
16The tricky thing is what should happen when non-ASCII bytes are used
17inside literals. For bytes literals, this is considered illegal. But
18for str literals, those bytes are supposed to be decoded using the
19encoding declared for the file (UTF-8 by default).
20
21We have to test this with various file encodings. We also test it with
22exec()/eval(), which uses a different code path.
23
24This file is really about correct treatment of encodings and
Ezio Melotti13925002011-03-16 11:05:33 +020025backslashes. It doesn't concern itself with issues like single
Guido van Rossum29fd7122007-11-12 01:13:56 +000026vs. double quotes or singly- vs. triply-quoted strings: that's dealt
27with elsewhere (I assume).
28"""
29
30import os
31import sys
32import shutil
33import tempfile
Eric V. Smith56466482016-10-31 14:46:26 -040034import warnings
Guido van Rossum29fd7122007-11-12 01:13:56 +000035import unittest
36
37
38TEMPLATE = r"""# coding: %s
39a = 'x'
40assert ord(a) == 120
41b = '\x01'
42assert ord(b) == 1
43c = r'\x01'
44assert list(map(ord, c)) == [92, 120, 48, 49]
45d = '\x81'
46assert ord(d) == 0x81
47e = r'\x81'
48assert list(map(ord, e)) == [92, 120, 56, 49]
49f = '\u1881'
50assert ord(f) == 0x1881
51g = r'\u1881'
52assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49]
Serhiy Storchaka5e61f142013-02-10 17:36:00 +020053h = '\U0001d120'
54assert ord(h) == 0x1d120
55i = r'\U0001d120'
56assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48]
Guido van Rossum29fd7122007-11-12 01:13:56 +000057"""
58
59
60def byte(i):
61 return bytes([i])
62
63
64class TestLiterals(unittest.TestCase):
65
66 def setUp(self):
67 self.save_path = sys.path[:]
68 self.tmpdir = tempfile.mkdtemp()
69 sys.path.insert(0, self.tmpdir)
70
71 def tearDown(self):
Georg Brandl242631d2012-02-20 21:36:28 +010072 sys.path[:] = self.save_path
Guido van Rossum29fd7122007-11-12 01:13:56 +000073 shutil.rmtree(self.tmpdir, ignore_errors=True)
74
75 def test_template(self):
76 # Check that the template doesn't contain any non-printables
77 # except for \n.
78 for c in TEMPLATE:
79 assert c == '\n' or ' ' <= c <= '~', repr(c)
80
81 def test_eval_str_normal(self):
82 self.assertEqual(eval(""" 'x' """), 'x')
83 self.assertEqual(eval(r""" '\x01' """), chr(1))
84 self.assertEqual(eval(""" '\x01' """), chr(1))
85 self.assertEqual(eval(r""" '\x81' """), chr(0x81))
86 self.assertEqual(eval(""" '\x81' """), chr(0x81))
87 self.assertEqual(eval(r""" '\u1881' """), chr(0x1881))
88 self.assertEqual(eval(""" '\u1881' """), chr(0x1881))
Serhiy Storchaka5e61f142013-02-10 17:36:00 +020089 self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120))
90 self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120))
91
92 def test_eval_str_incomplete(self):
93 self.assertRaises(SyntaxError, eval, r""" '\x' """)
94 self.assertRaises(SyntaxError, eval, r""" '\x0' """)
95 self.assertRaises(SyntaxError, eval, r""" '\u' """)
96 self.assertRaises(SyntaxError, eval, r""" '\u0' """)
97 self.assertRaises(SyntaxError, eval, r""" '\u00' """)
98 self.assertRaises(SyntaxError, eval, r""" '\u000' """)
99 self.assertRaises(SyntaxError, eval, r""" '\U' """)
100 self.assertRaises(SyntaxError, eval, r""" '\U0' """)
101 self.assertRaises(SyntaxError, eval, r""" '\U00' """)
102 self.assertRaises(SyntaxError, eval, r""" '\U000' """)
103 self.assertRaises(SyntaxError, eval, r""" '\U0000' """)
104 self.assertRaises(SyntaxError, eval, r""" '\U00000' """)
105 self.assertRaises(SyntaxError, eval, r""" '\U000000' """)
106 self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000107
Eric V. Smith56466482016-10-31 14:46:26 -0400108 def test_eval_str_invalid_escape(self):
109 for b in range(1, 128):
110 if b in b"""\n\r"'01234567NU\\abfnrtuvx""":
111 continue
112 with self.assertWarns(DeprecationWarning):
113 self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b))
Victor Stinnerf9cca362016-11-15 09:12:10 +0100114
Eric V. Smith56466482016-10-31 14:46:26 -0400115 with warnings.catch_warnings(record=True) as w:
116 warnings.simplefilter('always', category=DeprecationWarning)
117 eval("'''\n\\z'''")
118 self.assertEqual(len(w), 1)
119 self.assertEqual(w[0].filename, '<string>')
120 self.assertEqual(w[0].lineno, 2)
121
Victor Stinnerf9cca362016-11-15 09:12:10 +0100122 with warnings.catch_warnings(record=True) as w:
123 warnings.simplefilter('error', category=DeprecationWarning)
124 with self.assertRaises(SyntaxError) as cm:
125 eval("'''\n\\z'''")
126 exc = cm.exception
127 self.assertEqual(w, [])
128 self.assertEqual(exc.filename, '<string>')
129 self.assertEqual(exc.lineno, 2)
130
Guido van Rossum29fd7122007-11-12 01:13:56 +0000131 def test_eval_str_raw(self):
132 self.assertEqual(eval(""" r'x' """), 'x')
133 self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
134 self.assertEqual(eval(""" r'\x01' """), chr(1))
135 self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81')
136 self.assertEqual(eval(""" r'\x81' """), chr(0x81))
137 self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881')
138 self.assertEqual(eval(""" r'\u1881' """), chr(0x1881))
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200139 self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120')
140 self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120))
Guido van Rossum29fd7122007-11-12 01:13:56 +0000141
142 def test_eval_bytes_normal(self):
143 self.assertEqual(eval(""" b'x' """), b'x')
144 self.assertEqual(eval(r""" b'\x01' """), byte(1))
145 self.assertEqual(eval(""" b'\x01' """), byte(1))
146 self.assertEqual(eval(r""" b'\x81' """), byte(0x81))
147 self.assertRaises(SyntaxError, eval, """ b'\x81' """)
R David Murray44b548d2016-09-08 13:59:53 -0400148 self.assertEqual(eval(r""" br'\u1881' """), b'\\' + b'u1881')
Guido van Rossum29fd7122007-11-12 01:13:56 +0000149 self.assertRaises(SyntaxError, eval, """ b'\u1881' """)
R David Murray44b548d2016-09-08 13:59:53 -0400150 self.assertEqual(eval(r""" br'\U0001d120' """), b'\\' + b'U0001d120')
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200151 self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """)
152
153 def test_eval_bytes_incomplete(self):
154 self.assertRaises(SyntaxError, eval, r""" b'\x' """)
155 self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000156
Eric V. Smith56466482016-10-31 14:46:26 -0400157 def test_eval_bytes_invalid_escape(self):
158 for b in range(1, 128):
159 if b in b"""\n\r"'01234567\\abfnrtvx""":
160 continue
161 with self.assertWarns(DeprecationWarning):
162 self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b]))
Victor Stinnerf9cca362016-11-15 09:12:10 +0100163
Eric V. Smith56466482016-10-31 14:46:26 -0400164 with warnings.catch_warnings(record=True) as w:
165 warnings.simplefilter('always', category=DeprecationWarning)
166 eval("b'''\n\\z'''")
167 self.assertEqual(len(w), 1)
168 self.assertEqual(w[0].filename, '<string>')
169 self.assertEqual(w[0].lineno, 2)
170
Victor Stinnerf9cca362016-11-15 09:12:10 +0100171 with warnings.catch_warnings(record=True) as w:
172 warnings.simplefilter('error', category=DeprecationWarning)
173 with self.assertRaises(SyntaxError) as cm:
174 eval("b'''\n\\z'''")
175 exc = cm.exception
176 self.assertEqual(w, [])
177 self.assertEqual(exc.filename, '<string>')
178 self.assertEqual(exc.lineno, 2)
179
Guido van Rossum29fd7122007-11-12 01:13:56 +0000180 def test_eval_bytes_raw(self):
181 self.assertEqual(eval(""" br'x' """), b'x')
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100182 self.assertEqual(eval(""" rb'x' """), b'x')
Guido van Rossum29fd7122007-11-12 01:13:56 +0000183 self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100184 self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01')
Guido van Rossum29fd7122007-11-12 01:13:56 +0000185 self.assertEqual(eval(""" br'\x01' """), byte(1))
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100186 self.assertEqual(eval(""" rb'\x01' """), byte(1))
Guido van Rossum29fd7122007-11-12 01:13:56 +0000187 self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100188 self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81")
Guido van Rossum29fd7122007-11-12 01:13:56 +0000189 self.assertRaises(SyntaxError, eval, """ br'\x81' """)
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100190 self.assertRaises(SyntaxError, eval, """ rb'\x81' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000191 self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100192 self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
Guido van Rossum29fd7122007-11-12 01:13:56 +0000193 self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100194 self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200195 self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120")
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200196 self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120")
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200197 self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """)
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200198 self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """)
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100199 self.assertRaises(SyntaxError, eval, """ bb'' """)
200 self.assertRaises(SyntaxError, eval, """ rr'' """)
201 self.assertRaises(SyntaxError, eval, """ brr'' """)
202 self.assertRaises(SyntaxError, eval, """ bbr'' """)
203 self.assertRaises(SyntaxError, eval, """ rrb'' """)
204 self.assertRaises(SyntaxError, eval, """ rbb'' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000205
Christian Heimes0b3847d2012-06-20 11:17:58 +0200206 def test_eval_str_u(self):
207 self.assertEqual(eval(""" u'x' """), 'x')
208 self.assertEqual(eval(""" U'\u00e4' """), 'ä')
209 self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
210 self.assertRaises(SyntaxError, eval, """ ur'' """)
211 self.assertRaises(SyntaxError, eval, """ ru'' """)
212 self.assertRaises(SyntaxError, eval, """ bu'' """)
213 self.assertRaises(SyntaxError, eval, """ ub'' """)
214
Guido van Rossum29fd7122007-11-12 01:13:56 +0000215 def check_encoding(self, encoding, extra=""):
216 modname = "xx_" + encoding.replace("-", "_")
217 fn = os.path.join(self.tmpdir, modname + ".py")
218 f = open(fn, "w", encoding=encoding)
219 try:
220 f.write(TEMPLATE % encoding)
221 f.write(extra)
222 finally:
223 f.close()
224 __import__(modname)
225 del sys.modules[modname]
226
227 def test_file_utf_8(self):
228 extra = "z = '\u1234'; assert ord(z) == 0x1234\n"
229 self.check_encoding("utf-8", extra)
230
231 def test_file_utf_8_error(self):
232 extra = "b'\x80'\n"
233 self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
234
235 def test_file_utf8(self):
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000236 self.check_encoding("utf-8")
Guido van Rossum29fd7122007-11-12 01:13:56 +0000237
238 def test_file_iso_8859_1(self):
239 self.check_encoding("iso-8859-1")
240
241 def test_file_latin_1(self):
242 self.check_encoding("latin-1")
243
244 def test_file_latin9(self):
245 self.check_encoding("latin9")
246
247
248if __name__ == "__main__":
Zachary Ware38c707e2015-04-13 15:00:43 -0500249 unittest.main()