blob: 87cffe843ab7cc9fef74f27b261dc8da87d672c6 [file] [log] [blame]
Guido van Rossum29fd7122007-11-12 01:13:56 +00001r"""Test correct treatment of various string literals by the parser.
2
3There are four types of string literals:
4
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +01005 'abc' -- normal str
6 r'abc' -- raw str
7 b'xyz' -- normal bytes
8 br'xyz' | rb'xyz' -- raw bytes
Guido van Rossum29fd7122007-11-12 01:13:56 +00009
10The difference between normal and raw strings is of course that in a
11raw string, \ escapes (while still used to determine the end of the
12literal) are not interpreted, so that r'\x00' contains four
13characters: a backslash, an x, and two zeros; while '\x00' contains a
14single character (code point zero).
15
16The tricky thing is what should happen when non-ASCII bytes are used
17inside literals. For bytes literals, this is considered illegal. But
18for str literals, those bytes are supposed to be decoded using the
19encoding declared for the file (UTF-8 by default).
20
21We have to test this with various file encodings. We also test it with
22exec()/eval(), which uses a different code path.
23
24This file is really about correct treatment of encodings and
Ezio Melotti13925002011-03-16 11:05:33 +020025backslashes. It doesn't concern itself with issues like single
Guido van Rossum29fd7122007-11-12 01:13:56 +000026vs. double quotes or singly- vs. triply-quoted strings: that's dealt
27with elsewhere (I assume).
28"""
29
30import os
31import sys
32import shutil
33import tempfile
34import unittest
35
36
37TEMPLATE = r"""# coding: %s
38a = 'x'
39assert ord(a) == 120
40b = '\x01'
41assert ord(b) == 1
42c = r'\x01'
43assert list(map(ord, c)) == [92, 120, 48, 49]
44d = '\x81'
45assert ord(d) == 0x81
46e = r'\x81'
47assert list(map(ord, e)) == [92, 120, 56, 49]
48f = '\u1881'
49assert ord(f) == 0x1881
50g = r'\u1881'
51assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49]
Serhiy Storchaka5e61f142013-02-10 17:36:00 +020052h = '\U0001d120'
53assert ord(h) == 0x1d120
54i = r'\U0001d120'
55assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48]
Guido van Rossum29fd7122007-11-12 01:13:56 +000056"""
57
58
59def byte(i):
60 return bytes([i])
61
62
63class TestLiterals(unittest.TestCase):
64
65 def setUp(self):
66 self.save_path = sys.path[:]
67 self.tmpdir = tempfile.mkdtemp()
68 sys.path.insert(0, self.tmpdir)
69
70 def tearDown(self):
Georg Brandl242631d2012-02-20 21:36:28 +010071 sys.path[:] = self.save_path
Guido van Rossum29fd7122007-11-12 01:13:56 +000072 shutil.rmtree(self.tmpdir, ignore_errors=True)
73
74 def test_template(self):
75 # Check that the template doesn't contain any non-printables
76 # except for \n.
77 for c in TEMPLATE:
78 assert c == '\n' or ' ' <= c <= '~', repr(c)
79
80 def test_eval_str_normal(self):
81 self.assertEqual(eval(""" 'x' """), 'x')
82 self.assertEqual(eval(r""" '\x01' """), chr(1))
83 self.assertEqual(eval(""" '\x01' """), chr(1))
84 self.assertEqual(eval(r""" '\x81' """), chr(0x81))
85 self.assertEqual(eval(""" '\x81' """), chr(0x81))
86 self.assertEqual(eval(r""" '\u1881' """), chr(0x1881))
87 self.assertEqual(eval(""" '\u1881' """), chr(0x1881))
Serhiy Storchaka5e61f142013-02-10 17:36:00 +020088 self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120))
89 self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120))
90
91 def test_eval_str_incomplete(self):
92 self.assertRaises(SyntaxError, eval, r""" '\x' """)
93 self.assertRaises(SyntaxError, eval, r""" '\x0' """)
94 self.assertRaises(SyntaxError, eval, r""" '\u' """)
95 self.assertRaises(SyntaxError, eval, r""" '\u0' """)
96 self.assertRaises(SyntaxError, eval, r""" '\u00' """)
97 self.assertRaises(SyntaxError, eval, r""" '\u000' """)
98 self.assertRaises(SyntaxError, eval, r""" '\U' """)
99 self.assertRaises(SyntaxError, eval, r""" '\U0' """)
100 self.assertRaises(SyntaxError, eval, r""" '\U00' """)
101 self.assertRaises(SyntaxError, eval, r""" '\U000' """)
102 self.assertRaises(SyntaxError, eval, r""" '\U0000' """)
103 self.assertRaises(SyntaxError, eval, r""" '\U00000' """)
104 self.assertRaises(SyntaxError, eval, r""" '\U000000' """)
105 self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000106
107 def test_eval_str_raw(self):
108 self.assertEqual(eval(""" r'x' """), 'x')
109 self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
110 self.assertEqual(eval(""" r'\x01' """), chr(1))
111 self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81')
112 self.assertEqual(eval(""" r'\x81' """), chr(0x81))
113 self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881')
114 self.assertEqual(eval(""" r'\u1881' """), chr(0x1881))
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200115 self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120')
116 self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120))
Guido van Rossum29fd7122007-11-12 01:13:56 +0000117
118 def test_eval_bytes_normal(self):
119 self.assertEqual(eval(""" b'x' """), b'x')
120 self.assertEqual(eval(r""" b'\x01' """), byte(1))
121 self.assertEqual(eval(""" b'\x01' """), byte(1))
122 self.assertEqual(eval(r""" b'\x81' """), byte(0x81))
123 self.assertRaises(SyntaxError, eval, """ b'\x81' """)
124 self.assertEqual(eval(r""" b'\u1881' """), b'\\' + b'u1881')
125 self.assertRaises(SyntaxError, eval, """ b'\u1881' """)
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200126 self.assertEqual(eval(r""" b'\U0001d120' """), b'\\' + b'U0001d120')
127 self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """)
128
129 def test_eval_bytes_incomplete(self):
130 self.assertRaises(SyntaxError, eval, r""" b'\x' """)
131 self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000132
133 def test_eval_bytes_raw(self):
134 self.assertEqual(eval(""" br'x' """), b'x')
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100135 self.assertEqual(eval(""" rb'x' """), b'x')
Guido van Rossum29fd7122007-11-12 01:13:56 +0000136 self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100137 self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01')
Guido van Rossum29fd7122007-11-12 01:13:56 +0000138 self.assertEqual(eval(""" br'\x01' """), byte(1))
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100139 self.assertEqual(eval(""" rb'\x01' """), byte(1))
Guido van Rossum29fd7122007-11-12 01:13:56 +0000140 self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100141 self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81")
Guido van Rossum29fd7122007-11-12 01:13:56 +0000142 self.assertRaises(SyntaxError, eval, """ br'\x81' """)
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100143 self.assertRaises(SyntaxError, eval, """ rb'\x81' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000144 self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100145 self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
Guido van Rossum29fd7122007-11-12 01:13:56 +0000146 self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100147 self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200148 self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120")
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200149 self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120")
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200150 self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """)
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200151 self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """)
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100152 self.assertRaises(SyntaxError, eval, """ bb'' """)
153 self.assertRaises(SyntaxError, eval, """ rr'' """)
154 self.assertRaises(SyntaxError, eval, """ brr'' """)
155 self.assertRaises(SyntaxError, eval, """ bbr'' """)
156 self.assertRaises(SyntaxError, eval, """ rrb'' """)
157 self.assertRaises(SyntaxError, eval, """ rbb'' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000158
Christian Heimes0b3847d2012-06-20 11:17:58 +0200159 def test_eval_str_u(self):
160 self.assertEqual(eval(""" u'x' """), 'x')
161 self.assertEqual(eval(""" U'\u00e4' """), 'ä')
162 self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
163 self.assertRaises(SyntaxError, eval, """ ur'' """)
164 self.assertRaises(SyntaxError, eval, """ ru'' """)
165 self.assertRaises(SyntaxError, eval, """ bu'' """)
166 self.assertRaises(SyntaxError, eval, """ ub'' """)
167
Guido van Rossum29fd7122007-11-12 01:13:56 +0000168 def check_encoding(self, encoding, extra=""):
169 modname = "xx_" + encoding.replace("-", "_")
170 fn = os.path.join(self.tmpdir, modname + ".py")
171 f = open(fn, "w", encoding=encoding)
172 try:
173 f.write(TEMPLATE % encoding)
174 f.write(extra)
175 finally:
176 f.close()
177 __import__(modname)
178 del sys.modules[modname]
179
180 def test_file_utf_8(self):
181 extra = "z = '\u1234'; assert ord(z) == 0x1234\n"
182 self.check_encoding("utf-8", extra)
183
184 def test_file_utf_8_error(self):
185 extra = "b'\x80'\n"
186 self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
187
188 def test_file_utf8(self):
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000189 self.check_encoding("utf-8")
Guido van Rossum29fd7122007-11-12 01:13:56 +0000190
191 def test_file_iso_8859_1(self):
192 self.check_encoding("iso-8859-1")
193
194 def test_file_latin_1(self):
195 self.check_encoding("latin-1")
196
197 def test_file_latin9(self):
198 self.check_encoding("latin9")
199
200
201if __name__ == "__main__":
Zachary Ware38c707e2015-04-13 15:00:43 -0500202 unittest.main()