blob: 07bc48880a98b979f02ef8befba5c8110bed45ca [file] [log] [blame]
Guido van Rossum29fd7122007-11-12 01:13:56 +00001r"""Test correct treatment of various string literals by the parser.
2
3There are four types of string literals:
4
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +01005 'abc' -- normal str
6 r'abc' -- raw str
7 b'xyz' -- normal bytes
8 br'xyz' | rb'xyz' -- raw bytes
Guido van Rossum29fd7122007-11-12 01:13:56 +00009
10The difference between normal and raw strings is of course that in a
11raw string, \ escapes (while still used to determine the end of the
12literal) are not interpreted, so that r'\x00' contains four
13characters: a backslash, an x, and two zeros; while '\x00' contains a
14single character (code point zero).
15
16The tricky thing is what should happen when non-ASCII bytes are used
17inside literals. For bytes literals, this is considered illegal. But
18for str literals, those bytes are supposed to be decoded using the
19encoding declared for the file (UTF-8 by default).
20
21We have to test this with various file encodings. We also test it with
22exec()/eval(), which uses a different code path.
23
24This file is really about correct treatment of encodings and
Ezio Melotti13925002011-03-16 11:05:33 +020025backslashes. It doesn't concern itself with issues like single
Guido van Rossum29fd7122007-11-12 01:13:56 +000026vs. double quotes or singly- vs. triply-quoted strings: that's dealt
27with elsewhere (I assume).
28"""
29
30import os
31import sys
32import shutil
33import tempfile
34import unittest
Antoine Pitroubbf53612012-01-12 22:36:48 +010035import test.support
Guido van Rossum29fd7122007-11-12 01:13:56 +000036
37
38TEMPLATE = r"""# coding: %s
39a = 'x'
40assert ord(a) == 120
41b = '\x01'
42assert ord(b) == 1
43c = r'\x01'
44assert list(map(ord, c)) == [92, 120, 48, 49]
45d = '\x81'
46assert ord(d) == 0x81
47e = r'\x81'
48assert list(map(ord, e)) == [92, 120, 56, 49]
49f = '\u1881'
50assert ord(f) == 0x1881
51g = r'\u1881'
52assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49]
53"""
54
55
56def byte(i):
57 return bytes([i])
58
59
60class TestLiterals(unittest.TestCase):
61
62 def setUp(self):
63 self.save_path = sys.path[:]
64 self.tmpdir = tempfile.mkdtemp()
65 sys.path.insert(0, self.tmpdir)
66
67 def tearDown(self):
Georg Brandl242631d2012-02-20 21:36:28 +010068 sys.path[:] = self.save_path
Guido van Rossum29fd7122007-11-12 01:13:56 +000069 shutil.rmtree(self.tmpdir, ignore_errors=True)
70
71 def test_template(self):
72 # Check that the template doesn't contain any non-printables
73 # except for \n.
74 for c in TEMPLATE:
75 assert c == '\n' or ' ' <= c <= '~', repr(c)
76
77 def test_eval_str_normal(self):
78 self.assertEqual(eval(""" 'x' """), 'x')
79 self.assertEqual(eval(r""" '\x01' """), chr(1))
80 self.assertEqual(eval(""" '\x01' """), chr(1))
81 self.assertEqual(eval(r""" '\x81' """), chr(0x81))
82 self.assertEqual(eval(""" '\x81' """), chr(0x81))
83 self.assertEqual(eval(r""" '\u1881' """), chr(0x1881))
84 self.assertEqual(eval(""" '\u1881' """), chr(0x1881))
85
86 def test_eval_str_raw(self):
87 self.assertEqual(eval(""" r'x' """), 'x')
88 self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
89 self.assertEqual(eval(""" r'\x01' """), chr(1))
90 self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81')
91 self.assertEqual(eval(""" r'\x81' """), chr(0x81))
92 self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881')
93 self.assertEqual(eval(""" r'\u1881' """), chr(0x1881))
94
95 def test_eval_bytes_normal(self):
96 self.assertEqual(eval(""" b'x' """), b'x')
97 self.assertEqual(eval(r""" b'\x01' """), byte(1))
98 self.assertEqual(eval(""" b'\x01' """), byte(1))
99 self.assertEqual(eval(r""" b'\x81' """), byte(0x81))
100 self.assertRaises(SyntaxError, eval, """ b'\x81' """)
101 self.assertEqual(eval(r""" b'\u1881' """), b'\\' + b'u1881')
102 self.assertRaises(SyntaxError, eval, """ b'\u1881' """)
103
104 def test_eval_bytes_raw(self):
105 self.assertEqual(eval(""" br'x' """), b'x')
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100106 self.assertEqual(eval(""" rb'x' """), b'x')
Guido van Rossum29fd7122007-11-12 01:13:56 +0000107 self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100108 self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01')
Guido van Rossum29fd7122007-11-12 01:13:56 +0000109 self.assertEqual(eval(""" br'\x01' """), byte(1))
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100110 self.assertEqual(eval(""" rb'\x01' """), byte(1))
Guido van Rossum29fd7122007-11-12 01:13:56 +0000111 self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100112 self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81")
Guido van Rossum29fd7122007-11-12 01:13:56 +0000113 self.assertRaises(SyntaxError, eval, """ br'\x81' """)
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100114 self.assertRaises(SyntaxError, eval, """ rb'\x81' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000115 self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100116 self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
Guido van Rossum29fd7122007-11-12 01:13:56 +0000117 self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +0100118 self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
119 self.assertRaises(SyntaxError, eval, """ bb'' """)
120 self.assertRaises(SyntaxError, eval, """ rr'' """)
121 self.assertRaises(SyntaxError, eval, """ brr'' """)
122 self.assertRaises(SyntaxError, eval, """ bbr'' """)
123 self.assertRaises(SyntaxError, eval, """ rrb'' """)
124 self.assertRaises(SyntaxError, eval, """ rbb'' """)
Guido van Rossum29fd7122007-11-12 01:13:56 +0000125
Christian Heimes0b3847d2012-06-20 11:17:58 +0200126 def test_eval_str_u(self):
127 self.assertEqual(eval(""" u'x' """), 'x')
128 self.assertEqual(eval(""" U'\u00e4' """), 'ä')
129 self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
130 self.assertRaises(SyntaxError, eval, """ ur'' """)
131 self.assertRaises(SyntaxError, eval, """ ru'' """)
132 self.assertRaises(SyntaxError, eval, """ bu'' """)
133 self.assertRaises(SyntaxError, eval, """ ub'' """)
134
Guido van Rossum29fd7122007-11-12 01:13:56 +0000135 def check_encoding(self, encoding, extra=""):
136 modname = "xx_" + encoding.replace("-", "_")
137 fn = os.path.join(self.tmpdir, modname + ".py")
138 f = open(fn, "w", encoding=encoding)
139 try:
140 f.write(TEMPLATE % encoding)
141 f.write(extra)
142 finally:
143 f.close()
144 __import__(modname)
145 del sys.modules[modname]
146
147 def test_file_utf_8(self):
148 extra = "z = '\u1234'; assert ord(z) == 0x1234\n"
149 self.check_encoding("utf-8", extra)
150
151 def test_file_utf_8_error(self):
152 extra = "b'\x80'\n"
153 self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
154
155 def test_file_utf8(self):
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000156 self.check_encoding("utf-8")
Guido van Rossum29fd7122007-11-12 01:13:56 +0000157
158 def test_file_iso_8859_1(self):
159 self.check_encoding("iso-8859-1")
160
161 def test_file_latin_1(self):
162 self.check_encoding("latin-1")
163
164 def test_file_latin9(self):
165 self.check_encoding("latin9")
166
167
Antoine Pitroubbf53612012-01-12 22:36:48 +0100168def test_main():
169 test.support.run_unittest(__name__)
170
Guido van Rossum29fd7122007-11-12 01:13:56 +0000171if __name__ == "__main__":
Antoine Pitroubbf53612012-01-12 22:36:48 +0100172 test_main()