blob: fb9cdbbffa46f630b4e75b16256b943f99f0b674 [file] [log] [blame]
Guido van Rossum29fd7122007-11-12 01:13:56 +00001r"""Test correct treatment of various string literals by the parser.
2
3There are four types of string literals:
4
5 'abc' -- normal str
6 r'abc' -- raw str
7 b'xyz' -- normal bytes
8 br'xyz' -- raw bytes
9
10The difference between normal and raw strings is of course that in a
11raw string, \ escapes (while still used to determine the end of the
12literal) are not interpreted, so that r'\x00' contains four
13characters: a backslash, an x, and two zeros; while '\x00' contains a
14single character (code point zero).
15
16The tricky thing is what should happen when non-ASCII bytes are used
17inside literals. For bytes literals, this is considered illegal. But
18for str literals, those bytes are supposed to be decoded using the
19encoding declared for the file (UTF-8 by default).
20
21We have to test this with various file encodings. We also test it with
22exec()/eval(), which uses a different code path.
23
24This file is really about correct treatment of encodings and
Ezio Melotti13925002011-03-16 11:05:33 +020025backslashes. It doesn't concern itself with issues like single
Guido van Rossum29fd7122007-11-12 01:13:56 +000026vs. double quotes or singly- vs. triply-quoted strings: that's dealt
27with elsewhere (I assume).
28"""
29
30import os
31import sys
32import shutil
33import tempfile
34import unittest
35
36
37TEMPLATE = r"""# coding: %s
38a = 'x'
39assert ord(a) == 120
40b = '\x01'
41assert ord(b) == 1
42c = r'\x01'
43assert list(map(ord, c)) == [92, 120, 48, 49]
44d = '\x81'
45assert ord(d) == 0x81
46e = r'\x81'
47assert list(map(ord, e)) == [92, 120, 56, 49]
48f = '\u1881'
49assert ord(f) == 0x1881
50g = r'\u1881'
51assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49]
52"""
53
54
55def byte(i):
56 return bytes([i])
57
58
59class TestLiterals(unittest.TestCase):
60
61 def setUp(self):
62 self.save_path = sys.path[:]
63 self.tmpdir = tempfile.mkdtemp()
64 sys.path.insert(0, self.tmpdir)
65
66 def tearDown(self):
67 sys.path = self.save_path
68 shutil.rmtree(self.tmpdir, ignore_errors=True)
69
70 def test_template(self):
71 # Check that the template doesn't contain any non-printables
72 # except for \n.
73 for c in TEMPLATE:
74 assert c == '\n' or ' ' <= c <= '~', repr(c)
75
76 def test_eval_str_normal(self):
77 self.assertEqual(eval(""" 'x' """), 'x')
78 self.assertEqual(eval(r""" '\x01' """), chr(1))
79 self.assertEqual(eval(""" '\x01' """), chr(1))
80 self.assertEqual(eval(r""" '\x81' """), chr(0x81))
81 self.assertEqual(eval(""" '\x81' """), chr(0x81))
82 self.assertEqual(eval(r""" '\u1881' """), chr(0x1881))
83 self.assertEqual(eval(""" '\u1881' """), chr(0x1881))
84
85 def test_eval_str_raw(self):
86 self.assertEqual(eval(""" r'x' """), 'x')
87 self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
88 self.assertEqual(eval(""" r'\x01' """), chr(1))
89 self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81')
90 self.assertEqual(eval(""" r'\x81' """), chr(0x81))
91 self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881')
92 self.assertEqual(eval(""" r'\u1881' """), chr(0x1881))
93
94 def test_eval_bytes_normal(self):
95 self.assertEqual(eval(""" b'x' """), b'x')
96 self.assertEqual(eval(r""" b'\x01' """), byte(1))
97 self.assertEqual(eval(""" b'\x01' """), byte(1))
98 self.assertEqual(eval(r""" b'\x81' """), byte(0x81))
99 self.assertRaises(SyntaxError, eval, """ b'\x81' """)
100 self.assertEqual(eval(r""" b'\u1881' """), b'\\' + b'u1881')
101 self.assertRaises(SyntaxError, eval, """ b'\u1881' """)
102
103 def test_eval_bytes_raw(self):
104 self.assertEqual(eval(""" br'x' """), b'x')
105 self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
106 self.assertEqual(eval(""" br'\x01' """), byte(1))
107 self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
108 self.assertRaises(SyntaxError, eval, """ br'\x81' """)
109 self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
110 self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
111
112 def check_encoding(self, encoding, extra=""):
113 modname = "xx_" + encoding.replace("-", "_")
114 fn = os.path.join(self.tmpdir, modname + ".py")
115 f = open(fn, "w", encoding=encoding)
116 try:
117 f.write(TEMPLATE % encoding)
118 f.write(extra)
119 finally:
120 f.close()
121 __import__(modname)
122 del sys.modules[modname]
123
124 def test_file_utf_8(self):
125 extra = "z = '\u1234'; assert ord(z) == 0x1234\n"
126 self.check_encoding("utf-8", extra)
127
128 def test_file_utf_8_error(self):
129 extra = "b'\x80'\n"
130 self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
131
132 def test_file_utf8(self):
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000133 self.check_encoding("utf-8")
Guido van Rossum29fd7122007-11-12 01:13:56 +0000134
135 def test_file_iso_8859_1(self):
136 self.check_encoding("iso-8859-1")
137
138 def test_file_latin_1(self):
139 self.check_encoding("latin-1")
140
141 def test_file_latin9(self):
142 self.check_encoding("latin9")
143
144
145if __name__ == "__main__":
146 # Hack so that error messages containing non-ASCII can be printed
147 sys.stdout._encoding = sys.stderr._encoding = "utf-8"
148 unittest.main()