blob: d01322faa68263e664372782bb677f8cf33c31e6 [file] [log] [blame]
Guido van Rossum29fd7122007-11-12 01:13:56 +00001r"""Test correct treatment of various string literals by the parser.
2
3There are four types of string literals:
4
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +01005 'abc' -- normal str
6 r'abc' -- raw str
7 b'xyz' -- normal bytes
8 br'xyz' | rb'xyz' -- raw bytes
Guido van Rossum29fd7122007-11-12 01:13:56 +00009
10The difference between normal and raw strings is of course that in a
11raw string, \ escapes (while still used to determine the end of the
12literal) are not interpreted, so that r'\x00' contains four
13characters: a backslash, an x, and two zeros; while '\x00' contains a
14single character (code point zero).
15
16The tricky thing is what should happen when non-ASCII bytes are used
17inside literals. For bytes literals, this is considered illegal. But
18for str literals, those bytes are supposed to be decoded using the
19encoding declared for the file (UTF-8 by default).
20
21We have to test this with various file encodings. We also test it with
22exec()/eval(), which uses a different code path.
23
24This file is really about correct treatment of encodings and
Ezio Melotti13925002011-03-16 11:05:33 +020025backslashes. It doesn't concern itself with issues like single
Guido van Rossum29fd7122007-11-12 01:13:56 +000026vs. double quotes or singly- vs. triply-quoted strings: that's dealt
27with elsewhere (I assume).
28"""
29
30import os
31import sys
32import shutil
33import tempfile
34import unittest
Antoine Pitroubbf53612012-01-12 22:36:48 +010035import test.support
Guido van Rossum29fd7122007-11-12 01:13:56 +000036
37
38TEMPLATE = r"""# coding: %s
39a = 'x'
40assert ord(a) == 120
41b = '\x01'
42assert ord(b) == 1
43c = r'\x01'
44assert list(map(ord, c)) == [92, 120, 48, 49]
45d = '\x81'
46assert ord(d) == 0x81
47e = r'\x81'
48assert list(map(ord, e)) == [92, 120, 56, 49]
49f = '\u1881'
50assert ord(f) == 0x1881
51g = r'\u1881'
52assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49]
Serhiy Storchaka5e61f142013-02-10 17:36:00 +020053h = '\U0001d120'
54assert ord(h) == 0x1d120
55i = r'\U0001d120'
56assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48]
Guido van Rossum29fd7122007-11-12 01:13:56 +000057"""
58
59
60def byte(i):
61 return bytes([i])
62
63
64class TestLiterals(unittest.TestCase):
65
66 def setUp(self):
67 self.save_path = sys.path[:]
68 self.tmpdir = tempfile.mkdtemp()
69 sys.path.insert(0, self.tmpdir)
70
71 def tearDown(self):
Georg Brandl242631d2012-02-20 21:36:28 +010072 sys.path[:] = self.save_path
Guido van Rossum29fd7122007-11-12 01:13:56 +000073 shutil.rmtree(self.tmpdir, ignore_errors=True)
74
75 def test_template(self):
76 # Check that the template doesn't contain any non-printables
77 # except for \n.
78 for c in TEMPLATE:
79 assert c == '\n' or ' ' <= c <= '~', repr(c)
80
81 def test_eval_str_normal(self):
82 self.assertEqual(eval(""" 'x' """), 'x')
83 self.assertEqual(eval(r""" '\x01' """), chr(1))
84 self.assertEqual(eval(""" '\x01' """), chr(1))
85 self.assertEqual(eval(r""" '\x81' """), chr(0x81))
86 self.assertEqual(eval(""" '\x81' """), chr(0x81))
87 self.assertEqual(eval(r""" '\u1881' """), chr(0x1881))
88 self.assertEqual(eval(""" '\u1881' """), chr(0x1881))
Serhiy Storchaka5e61f142013-02-10 17:36:00 +020089 self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120))
90 self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120))
91
92 def test_eval_str_incomplete(self):
93 self.assertRaises(SyntaxError, eval, r""" '\x' """)
94 self.assertRaises(SyntaxError, eval, r""" '\x0' """)
95 self.assertRaises(SyntaxError, eval, r""" '\u' """)
96 self.assertRaises(SyntaxError, eval, r""" '\u0' """)
97 self.assertRaises(SyntaxError, eval, r""" '\u00' """)
98 self.assertRaises(SyntaxError, eval, r""" '\u000' """)
99 self.assertRaises(SyntaxError, eval, r""" '\U' """)
100 self.assertRaises(SyntaxError, eval, r""" '\U0' """)
101 self.assertRaises(SyntaxError, eval, r""" '\U00' """)
102 self.assertRaises(SyntaxError, eval, r""" '\U000' """)
103 self.assertRaises(SyntaxError, eval, r""" '\U0000' """)
104 self.assertRaises(SyntaxError, eval, r""" '\U00000' """)
105 self.assertRaises(SyntaxError, eval, r""" '\U000000' """)
106 self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000107
108 def test_eval_str_raw(self):
109 self.assertEqual(eval(""" r'x' """), 'x')
110 self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
111 self.assertEqual(eval(""" r'\x01' """), chr(1))
112 self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81')
113 self.assertEqual(eval(""" r'\x81' """), chr(0x81))
114 self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881')
115 self.assertEqual(eval(""" r'\u1881' """), chr(0x1881))
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200116 self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120')
117 self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120))
Guido van Rossum29fd7122007-11-12 01:13:56 +0000118
119 def test_eval_bytes_normal(self):
120 self.assertEqual(eval(""" b'x' """), b'x')
121 self.assertEqual(eval(r""" b'\x01' """), byte(1))
122 self.assertEqual(eval(""" b'\x01' """), byte(1))
123 self.assertEqual(eval(r""" b'\x81' """), byte(0x81))
124 self.assertRaises(SyntaxError, eval, """ b'\x81' """)
125 self.assertEqual(eval(r""" b'\u1881' """), b'\\' + b'u1881')
126 self.assertRaises(SyntaxError, eval, """ b'\u1881' """)
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200127 self.assertEqual(eval(r""" b'\U0001d120' """), b'\\' + b'U0001d120')
128 self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """)
129
130 def test_eval_bytes_incomplete(self):
131 self.assertRaises(SyntaxError, eval, r""" b'\x' """)
132 self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000133
134 def test_eval_bytes_raw(self):
135 self.assertEqual(eval(""" br'x' """), b'x')
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100136 self.assertEqual(eval(""" rb'x' """), b'x')
Guido van Rossum29fd7122007-11-12 01:13:56 +0000137 self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100138 self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01')
Guido van Rossum29fd7122007-11-12 01:13:56 +0000139 self.assertEqual(eval(""" br'\x01' """), byte(1))
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100140 self.assertEqual(eval(""" rb'\x01' """), byte(1))
Guido van Rossum29fd7122007-11-12 01:13:56 +0000141 self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100142 self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81")
Guido van Rossum29fd7122007-11-12 01:13:56 +0000143 self.assertRaises(SyntaxError, eval, """ br'\x81' """)
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100144 self.assertRaises(SyntaxError, eval, """ rb'\x81' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000145 self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100146 self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
Guido van Rossum29fd7122007-11-12 01:13:56 +0000147 self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100148 self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200149 self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120")
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200150 self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120")
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200151 self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """)
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200152 self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """)
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100153 self.assertRaises(SyntaxError, eval, """ bb'' """)
154 self.assertRaises(SyntaxError, eval, """ rr'' """)
155 self.assertRaises(SyntaxError, eval, """ brr'' """)
156 self.assertRaises(SyntaxError, eval, """ bbr'' """)
157 self.assertRaises(SyntaxError, eval, """ rrb'' """)
158 self.assertRaises(SyntaxError, eval, """ rbb'' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000159
Christian Heimes0b3847d2012-06-20 11:17:58 +0200160 def test_eval_str_u(self):
161 self.assertEqual(eval(""" u'x' """), 'x')
162 self.assertEqual(eval(""" U'\u00e4' """), 'ä')
163 self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
164 self.assertRaises(SyntaxError, eval, """ ur'' """)
165 self.assertRaises(SyntaxError, eval, """ ru'' """)
166 self.assertRaises(SyntaxError, eval, """ bu'' """)
167 self.assertRaises(SyntaxError, eval, """ ub'' """)
168
Guido van Rossum29fd7122007-11-12 01:13:56 +0000169 def check_encoding(self, encoding, extra=""):
170 modname = "xx_" + encoding.replace("-", "_")
171 fn = os.path.join(self.tmpdir, modname + ".py")
172 f = open(fn, "w", encoding=encoding)
173 try:
174 f.write(TEMPLATE % encoding)
175 f.write(extra)
176 finally:
177 f.close()
178 __import__(modname)
179 del sys.modules[modname]
180
181 def test_file_utf_8(self):
182 extra = "z = '\u1234'; assert ord(z) == 0x1234\n"
183 self.check_encoding("utf-8", extra)
184
185 def test_file_utf_8_error(self):
186 extra = "b'\x80'\n"
187 self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
188
189 def test_file_utf8(self):
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000190 self.check_encoding("utf-8")
Guido van Rossum29fd7122007-11-12 01:13:56 +0000191
192 def test_file_iso_8859_1(self):
193 self.check_encoding("iso-8859-1")
194
195 def test_file_latin_1(self):
196 self.check_encoding("latin-1")
197
198 def test_file_latin9(self):
199 self.check_encoding("latin9")
200
201
Antoine Pitroubbf53612012-01-12 22:36:48 +0100202def test_main():
203 test.support.run_unittest(__name__)
204
Guido van Rossum29fd7122007-11-12 01:13:56 +0000205if __name__ == "__main__":
Antoine Pitroubbf53612012-01-12 22:36:48 +0100206 test_main()