blob: 048f40d90a4bb742a5f97bf4574283b35f309b33 [file] [log] [blame]
Guido van Rossum29fd7122007-11-12 01:13:56 +00001r"""Test correct treatment of various string literals by the parser.
2
3There are four types of string literals:
4
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +01005 'abc' -- normal str
6 r'abc' -- raw str
7 b'xyz' -- normal bytes
8 br'xyz' | rb'xyz' -- raw bytes
Guido van Rossum29fd7122007-11-12 01:13:56 +00009
10The difference between normal and raw strings is of course that in a
11raw string, \ escapes (while still used to determine the end of the
12literal) are not interpreted, so that r'\x00' contains four
13characters: a backslash, an x, and two zeros; while '\x00' contains a
14single character (code point zero).
15
16The tricky thing is what should happen when non-ASCII bytes are used
17inside literals. For bytes literals, this is considered illegal. But
18for str literals, those bytes are supposed to be decoded using the
19encoding declared for the file (UTF-8 by default).
20
21We have to test this with various file encodings. We also test it with
22exec()/eval(), which uses a different code path.
23
24This file is really about correct treatment of encodings and
Ezio Melotti13925002011-03-16 11:05:33 +020025backslashes. It doesn't concern itself with issues like single
Guido van Rossum29fd7122007-11-12 01:13:56 +000026vs. double quotes or singly- vs. triply-quoted strings: that's dealt
27with elsewhere (I assume).
28"""
29
30import os
31import sys
32import shutil
33import tempfile
Eric V. Smith56466482016-10-31 14:46:26 -040034import warnings
Guido van Rossum29fd7122007-11-12 01:13:56 +000035import unittest
36
37
38TEMPLATE = r"""# coding: %s
39a = 'x'
40assert ord(a) == 120
41b = '\x01'
42assert ord(b) == 1
43c = r'\x01'
44assert list(map(ord, c)) == [92, 120, 48, 49]
45d = '\x81'
46assert ord(d) == 0x81
47e = r'\x81'
48assert list(map(ord, e)) == [92, 120, 56, 49]
49f = '\u1881'
50assert ord(f) == 0x1881
51g = r'\u1881'
52assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49]
Serhiy Storchaka5e61f142013-02-10 17:36:00 +020053h = '\U0001d120'
54assert ord(h) == 0x1d120
55i = r'\U0001d120'
56assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48]
Guido van Rossum29fd7122007-11-12 01:13:56 +000057"""
58
59
60def byte(i):
61 return bytes([i])
62
63
64class TestLiterals(unittest.TestCase):
65
Serhiy Storchakae7a4bb52019-02-19 08:30:15 +020066 from test.support import check_syntax_warning
67
Guido van Rossum29fd7122007-11-12 01:13:56 +000068 def setUp(self):
69 self.save_path = sys.path[:]
70 self.tmpdir = tempfile.mkdtemp()
71 sys.path.insert(0, self.tmpdir)
72
73 def tearDown(self):
Georg Brandl242631d2012-02-20 21:36:28 +010074 sys.path[:] = self.save_path
Guido van Rossum29fd7122007-11-12 01:13:56 +000075 shutil.rmtree(self.tmpdir, ignore_errors=True)
76
77 def test_template(self):
78 # Check that the template doesn't contain any non-printables
79 # except for \n.
80 for c in TEMPLATE:
81 assert c == '\n' or ' ' <= c <= '~', repr(c)
82
83 def test_eval_str_normal(self):
84 self.assertEqual(eval(""" 'x' """), 'x')
85 self.assertEqual(eval(r""" '\x01' """), chr(1))
86 self.assertEqual(eval(""" '\x01' """), chr(1))
87 self.assertEqual(eval(r""" '\x81' """), chr(0x81))
88 self.assertEqual(eval(""" '\x81' """), chr(0x81))
89 self.assertEqual(eval(r""" '\u1881' """), chr(0x1881))
90 self.assertEqual(eval(""" '\u1881' """), chr(0x1881))
Serhiy Storchaka5e61f142013-02-10 17:36:00 +020091 self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120))
92 self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120))
93
94 def test_eval_str_incomplete(self):
95 self.assertRaises(SyntaxError, eval, r""" '\x' """)
96 self.assertRaises(SyntaxError, eval, r""" '\x0' """)
97 self.assertRaises(SyntaxError, eval, r""" '\u' """)
98 self.assertRaises(SyntaxError, eval, r""" '\u0' """)
99 self.assertRaises(SyntaxError, eval, r""" '\u00' """)
100 self.assertRaises(SyntaxError, eval, r""" '\u000' """)
101 self.assertRaises(SyntaxError, eval, r""" '\U' """)
102 self.assertRaises(SyntaxError, eval, r""" '\U0' """)
103 self.assertRaises(SyntaxError, eval, r""" '\U00' """)
104 self.assertRaises(SyntaxError, eval, r""" '\U000' """)
105 self.assertRaises(SyntaxError, eval, r""" '\U0000' """)
106 self.assertRaises(SyntaxError, eval, r""" '\U00000' """)
107 self.assertRaises(SyntaxError, eval, r""" '\U000000' """)
108 self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000109
Eric V. Smith56466482016-10-31 14:46:26 -0400110 def test_eval_str_invalid_escape(self):
111 for b in range(1, 128):
112 if b in b"""\n\r"'01234567NU\\abfnrtuvx""":
113 continue
Serhiy Storchaka65439122018-10-19 17:42:06 +0300114 with self.assertWarns(SyntaxWarning):
Eric V. Smith56466482016-10-31 14:46:26 -0400115 self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b))
Victor Stinnerf9cca362016-11-15 09:12:10 +0100116
Serhiy Storchakae7a4bb52019-02-19 08:30:15 +0200117 self.check_syntax_warning("'''\n\\z'''")
Victor Stinnerf9cca362016-11-15 09:12:10 +0100118
Guido van Rossum29fd7122007-11-12 01:13:56 +0000119 def test_eval_str_raw(self):
120 self.assertEqual(eval(""" r'x' """), 'x')
121 self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
122 self.assertEqual(eval(""" r'\x01' """), chr(1))
123 self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81')
124 self.assertEqual(eval(""" r'\x81' """), chr(0x81))
125 self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881')
126 self.assertEqual(eval(""" r'\u1881' """), chr(0x1881))
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200127 self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120')
128 self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120))
Guido van Rossum29fd7122007-11-12 01:13:56 +0000129
130 def test_eval_bytes_normal(self):
131 self.assertEqual(eval(""" b'x' """), b'x')
132 self.assertEqual(eval(r""" b'\x01' """), byte(1))
133 self.assertEqual(eval(""" b'\x01' """), byte(1))
134 self.assertEqual(eval(r""" b'\x81' """), byte(0x81))
135 self.assertRaises(SyntaxError, eval, """ b'\x81' """)
R David Murray44b548d2016-09-08 13:59:53 -0400136 self.assertEqual(eval(r""" br'\u1881' """), b'\\' + b'u1881')
Guido van Rossum29fd7122007-11-12 01:13:56 +0000137 self.assertRaises(SyntaxError, eval, """ b'\u1881' """)
R David Murray44b548d2016-09-08 13:59:53 -0400138 self.assertEqual(eval(r""" br'\U0001d120' """), b'\\' + b'U0001d120')
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200139 self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """)
140
141 def test_eval_bytes_incomplete(self):
142 self.assertRaises(SyntaxError, eval, r""" b'\x' """)
143 self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000144
Eric V. Smith56466482016-10-31 14:46:26 -0400145 def test_eval_bytes_invalid_escape(self):
146 for b in range(1, 128):
147 if b in b"""\n\r"'01234567\\abfnrtvx""":
148 continue
Serhiy Storchaka65439122018-10-19 17:42:06 +0300149 with self.assertWarns(SyntaxWarning):
Eric V. Smith56466482016-10-31 14:46:26 -0400150 self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b]))
Victor Stinnerf9cca362016-11-15 09:12:10 +0100151
Serhiy Storchakae7a4bb52019-02-19 08:30:15 +0200152 self.check_syntax_warning("b'''\n\\z'''")
Victor Stinnerf9cca362016-11-15 09:12:10 +0100153
Guido van Rossum29fd7122007-11-12 01:13:56 +0000154 def test_eval_bytes_raw(self):
155 self.assertEqual(eval(""" br'x' """), b'x')
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100156 self.assertEqual(eval(""" rb'x' """), b'x')
Guido van Rossum29fd7122007-11-12 01:13:56 +0000157 self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100158 self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01')
Guido van Rossum29fd7122007-11-12 01:13:56 +0000159 self.assertEqual(eval(""" br'\x01' """), byte(1))
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100160 self.assertEqual(eval(""" rb'\x01' """), byte(1))
Guido van Rossum29fd7122007-11-12 01:13:56 +0000161 self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100162 self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81")
Guido van Rossum29fd7122007-11-12 01:13:56 +0000163 self.assertRaises(SyntaxError, eval, """ br'\x81' """)
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100164 self.assertRaises(SyntaxError, eval, """ rb'\x81' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000165 self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100166 self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
Guido van Rossum29fd7122007-11-12 01:13:56 +0000167 self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100168 self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200169 self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120")
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200170 self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120")
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200171 self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """)
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200172 self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """)
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100173 self.assertRaises(SyntaxError, eval, """ bb'' """)
174 self.assertRaises(SyntaxError, eval, """ rr'' """)
175 self.assertRaises(SyntaxError, eval, """ brr'' """)
176 self.assertRaises(SyntaxError, eval, """ bbr'' """)
177 self.assertRaises(SyntaxError, eval, """ rrb'' """)
178 self.assertRaises(SyntaxError, eval, """ rbb'' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000179
Christian Heimes0b3847d2012-06-20 11:17:58 +0200180 def test_eval_str_u(self):
181 self.assertEqual(eval(""" u'x' """), 'x')
182 self.assertEqual(eval(""" U'\u00e4' """), 'ä')
183 self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
184 self.assertRaises(SyntaxError, eval, """ ur'' """)
185 self.assertRaises(SyntaxError, eval, """ ru'' """)
186 self.assertRaises(SyntaxError, eval, """ bu'' """)
187 self.assertRaises(SyntaxError, eval, """ ub'' """)
188
Guido van Rossum29fd7122007-11-12 01:13:56 +0000189 def check_encoding(self, encoding, extra=""):
190 modname = "xx_" + encoding.replace("-", "_")
191 fn = os.path.join(self.tmpdir, modname + ".py")
192 f = open(fn, "w", encoding=encoding)
193 try:
194 f.write(TEMPLATE % encoding)
195 f.write(extra)
196 finally:
197 f.close()
198 __import__(modname)
199 del sys.modules[modname]
200
201 def test_file_utf_8(self):
202 extra = "z = '\u1234'; assert ord(z) == 0x1234\n"
203 self.check_encoding("utf-8", extra)
204
205 def test_file_utf_8_error(self):
206 extra = "b'\x80'\n"
207 self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
208
209 def test_file_utf8(self):
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000210 self.check_encoding("utf-8")
Guido van Rossum29fd7122007-11-12 01:13:56 +0000211
212 def test_file_iso_8859_1(self):
213 self.check_encoding("iso-8859-1")
214
215 def test_file_latin_1(self):
216 self.check_encoding("latin-1")
217
218 def test_file_latin9(self):
219 self.check_encoding("latin9")
220
221
222if __name__ == "__main__":
Zachary Ware38c707e2015-04-13 15:00:43 -0500223 unittest.main()