Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 1 | r"""Test correct treatment of various string literals by the parser. |
| 2 | |
| 3 | There are four types of string literals: |
| 4 | |
Antoine Pitrou | 3a5d4cb | 2012-01-12 22:46:19 +0100 | [diff] [blame] | 5 | 'abc' -- normal str |
| 6 | r'abc' -- raw str |
| 7 | b'xyz' -- normal bytes |
| 8 | br'xyz' | rb'xyz' -- raw bytes |
Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 9 | |
| 10 | The difference between normal and raw strings is of course that in a |
| 11 | raw string, \ escapes (while still used to determine the end of the |
| 12 | literal) are not interpreted, so that r'\x00' contains four |
| 13 | characters: a backslash, an x, and two zeros; while '\x00' contains a |
| 14 | single character (code point zero). |
| 15 | |
| 16 | The tricky thing is what should happen when non-ASCII bytes are used |
| 17 | inside literals. For bytes literals, this is considered illegal. But |
| 18 | for str literals, those bytes are supposed to be decoded using the |
| 19 | encoding declared for the file (UTF-8 by default). |
| 20 | |
| 21 | We have to test this with various file encodings. We also test it with |
| 22 | exec()/eval(), which uses a different code path. |
| 23 | |
| 24 | This file is really about correct treatment of encodings and |
Ezio Melotti | 1392500 | 2011-03-16 11:05:33 +0200 | [diff] [blame] | 25 | backslashes. It doesn't concern itself with issues like single |
Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 26 | vs. double quotes or singly- vs. triply-quoted strings: that's dealt |
| 27 | with elsewhere (I assume). |
| 28 | """ |
| 29 | |
| 30 | import os |
| 31 | import sys |
| 32 | import shutil |
| 33 | import tempfile |
| 34 | import unittest |
Serhiy Storchaka | 4c5b6ba | 2019-08-10 01:34:22 +0300 | [diff] [blame] | 35 | import warnings |
Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 36 | |
| 37 | |
| 38 | TEMPLATE = r"""# coding: %s |
| 39 | a = 'x' |
| 40 | assert ord(a) == 120 |
| 41 | b = '\x01' |
| 42 | assert ord(b) == 1 |
| 43 | c = r'\x01' |
| 44 | assert list(map(ord, c)) == [92, 120, 48, 49] |
| 45 | d = '\x81' |
| 46 | assert ord(d) == 0x81 |
| 47 | e = r'\x81' |
| 48 | assert list(map(ord, e)) == [92, 120, 56, 49] |
| 49 | f = '\u1881' |
| 50 | assert ord(f) == 0x1881 |
| 51 | g = r'\u1881' |
| 52 | assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49] |
Serhiy Storchaka | 5e61f14 | 2013-02-10 17:36:00 +0200 | [diff] [blame] | 53 | h = '\U0001d120' |
| 54 | assert ord(h) == 0x1d120 |
| 55 | i = r'\U0001d120' |
| 56 | assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48] |
Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 57 | """ |
| 58 | |
| 59 | |
| 60 | def byte(i): |
| 61 | return bytes([i]) |
| 62 | |
| 63 | |
| 64 | class TestLiterals(unittest.TestCase): |
| 65 | |
Serhiy Storchaka | e7a4bb5 | 2019-02-19 08:30:15 +0200 | [diff] [blame] | 66 | from test.support import check_syntax_warning |
| 67 | |
Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 68 | def setUp(self): |
| 69 | self.save_path = sys.path[:] |
| 70 | self.tmpdir = tempfile.mkdtemp() |
| 71 | sys.path.insert(0, self.tmpdir) |
| 72 | |
| 73 | def tearDown(self): |
Georg Brandl | 242631d | 2012-02-20 21:36:28 +0100 | [diff] [blame] | 74 | sys.path[:] = self.save_path |
Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 75 | shutil.rmtree(self.tmpdir, ignore_errors=True) |
| 76 | |
| 77 | def test_template(self): |
| 78 | # Check that the template doesn't contain any non-printables |
| 79 | # except for \n. |
| 80 | for c in TEMPLATE: |
| 81 | assert c == '\n' or ' ' <= c <= '~', repr(c) |
| 82 | |
| 83 | def test_eval_str_normal(self): |
| 84 | self.assertEqual(eval(""" 'x' """), 'x') |
| 85 | self.assertEqual(eval(r""" '\x01' """), chr(1)) |
| 86 | self.assertEqual(eval(""" '\x01' """), chr(1)) |
| 87 | self.assertEqual(eval(r""" '\x81' """), chr(0x81)) |
| 88 | self.assertEqual(eval(""" '\x81' """), chr(0x81)) |
| 89 | self.assertEqual(eval(r""" '\u1881' """), chr(0x1881)) |
| 90 | self.assertEqual(eval(""" '\u1881' """), chr(0x1881)) |
Serhiy Storchaka | 5e61f14 | 2013-02-10 17:36:00 +0200 | [diff] [blame] | 91 | self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120)) |
| 92 | self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120)) |
| 93 | |
| 94 | def test_eval_str_incomplete(self): |
| 95 | self.assertRaises(SyntaxError, eval, r""" '\x' """) |
| 96 | self.assertRaises(SyntaxError, eval, r""" '\x0' """) |
| 97 | self.assertRaises(SyntaxError, eval, r""" '\u' """) |
| 98 | self.assertRaises(SyntaxError, eval, r""" '\u0' """) |
| 99 | self.assertRaises(SyntaxError, eval, r""" '\u00' """) |
| 100 | self.assertRaises(SyntaxError, eval, r""" '\u000' """) |
| 101 | self.assertRaises(SyntaxError, eval, r""" '\U' """) |
| 102 | self.assertRaises(SyntaxError, eval, r""" '\U0' """) |
| 103 | self.assertRaises(SyntaxError, eval, r""" '\U00' """) |
| 104 | self.assertRaises(SyntaxError, eval, r""" '\U000' """) |
| 105 | self.assertRaises(SyntaxError, eval, r""" '\U0000' """) |
| 106 | self.assertRaises(SyntaxError, eval, r""" '\U00000' """) |
| 107 | self.assertRaises(SyntaxError, eval, r""" '\U000000' """) |
| 108 | self.assertRaises(SyntaxError, eval, r""" '\U0000000' """) |
Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 109 | |
Eric V. Smith | 5646648 | 2016-10-31 14:46:26 -0400 | [diff] [blame] | 110 | def test_eval_str_invalid_escape(self): |
| 111 | for b in range(1, 128): |
| 112 | if b in b"""\n\r"'01234567NU\\abfnrtuvx""": |
| 113 | continue |
Serhiy Storchaka | 4c5b6ba | 2019-08-10 01:34:22 +0300 | [diff] [blame] | 114 | with self.assertWarns(DeprecationWarning): |
Eric V. Smith | 5646648 | 2016-10-31 14:46:26 -0400 | [diff] [blame] | 115 | self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b)) |
Victor Stinner | f9cca36 | 2016-11-15 09:12:10 +0100 | [diff] [blame] | 116 | |
Serhiy Storchaka | 4c5b6ba | 2019-08-10 01:34:22 +0300 | [diff] [blame] | 117 | with warnings.catch_warnings(record=True) as w: |
| 118 | warnings.simplefilter('always', category=DeprecationWarning) |
| 119 | eval("'''\n\\z'''") |
| 120 | self.assertEqual(len(w), 1) |
| 121 | self.assertEqual(w[0].filename, '<string>') |
| 122 | self.assertEqual(w[0].lineno, 1) |
| 123 | |
| 124 | with warnings.catch_warnings(record=True) as w: |
| 125 | warnings.simplefilter('error', category=DeprecationWarning) |
| 126 | with self.assertRaises(SyntaxError) as cm: |
| 127 | eval("'''\n\\z'''") |
| 128 | exc = cm.exception |
| 129 | self.assertEqual(w, []) |
| 130 | self.assertEqual(exc.filename, '<string>') |
| 131 | self.assertEqual(exc.lineno, 1) |
Victor Stinner | f9cca36 | 2016-11-15 09:12:10 +0100 | [diff] [blame] | 132 | |
Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 133 | def test_eval_str_raw(self): |
| 134 | self.assertEqual(eval(""" r'x' """), 'x') |
| 135 | self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01') |
| 136 | self.assertEqual(eval(""" r'\x01' """), chr(1)) |
| 137 | self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81') |
| 138 | self.assertEqual(eval(""" r'\x81' """), chr(0x81)) |
| 139 | self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881') |
| 140 | self.assertEqual(eval(""" r'\u1881' """), chr(0x1881)) |
Serhiy Storchaka | 5e61f14 | 2013-02-10 17:36:00 +0200 | [diff] [blame] | 141 | self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120') |
| 142 | self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120)) |
Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 143 | |
| 144 | def test_eval_bytes_normal(self): |
| 145 | self.assertEqual(eval(""" b'x' """), b'x') |
| 146 | self.assertEqual(eval(r""" b'\x01' """), byte(1)) |
| 147 | self.assertEqual(eval(""" b'\x01' """), byte(1)) |
| 148 | self.assertEqual(eval(r""" b'\x81' """), byte(0x81)) |
| 149 | self.assertRaises(SyntaxError, eval, """ b'\x81' """) |
R David Murray | 44b548d | 2016-09-08 13:59:53 -0400 | [diff] [blame] | 150 | self.assertEqual(eval(r""" br'\u1881' """), b'\\' + b'u1881') |
Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 151 | self.assertRaises(SyntaxError, eval, """ b'\u1881' """) |
R David Murray | 44b548d | 2016-09-08 13:59:53 -0400 | [diff] [blame] | 152 | self.assertEqual(eval(r""" br'\U0001d120' """), b'\\' + b'U0001d120') |
Serhiy Storchaka | 5e61f14 | 2013-02-10 17:36:00 +0200 | [diff] [blame] | 153 | self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """) |
| 154 | |
| 155 | def test_eval_bytes_incomplete(self): |
| 156 | self.assertRaises(SyntaxError, eval, r""" b'\x' """) |
| 157 | self.assertRaises(SyntaxError, eval, r""" b'\x0' """) |
Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 158 | |
Eric V. Smith | 5646648 | 2016-10-31 14:46:26 -0400 | [diff] [blame] | 159 | def test_eval_bytes_invalid_escape(self): |
| 160 | for b in range(1, 128): |
| 161 | if b in b"""\n\r"'01234567\\abfnrtvx""": |
| 162 | continue |
Serhiy Storchaka | 4c5b6ba | 2019-08-10 01:34:22 +0300 | [diff] [blame] | 163 | with self.assertWarns(DeprecationWarning): |
Eric V. Smith | 5646648 | 2016-10-31 14:46:26 -0400 | [diff] [blame] | 164 | self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b])) |
Victor Stinner | f9cca36 | 2016-11-15 09:12:10 +0100 | [diff] [blame] | 165 | |
Serhiy Storchaka | 4c5b6ba | 2019-08-10 01:34:22 +0300 | [diff] [blame] | 166 | with warnings.catch_warnings(record=True) as w: |
| 167 | warnings.simplefilter('always', category=DeprecationWarning) |
| 168 | eval("b'''\n\\z'''") |
| 169 | self.assertEqual(len(w), 1) |
| 170 | self.assertEqual(w[0].filename, '<string>') |
| 171 | self.assertEqual(w[0].lineno, 1) |
| 172 | |
| 173 | with warnings.catch_warnings(record=True) as w: |
| 174 | warnings.simplefilter('error', category=DeprecationWarning) |
| 175 | with self.assertRaises(SyntaxError) as cm: |
| 176 | eval("b'''\n\\z'''") |
| 177 | exc = cm.exception |
| 178 | self.assertEqual(w, []) |
| 179 | self.assertEqual(exc.filename, '<string>') |
| 180 | self.assertEqual(exc.lineno, 1) |
Victor Stinner | f9cca36 | 2016-11-15 09:12:10 +0100 | [diff] [blame] | 181 | |
Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 182 | def test_eval_bytes_raw(self): |
| 183 | self.assertEqual(eval(""" br'x' """), b'x') |
Antoine Pitrou | 3a5d4cb | 2012-01-12 22:46:19 +0100 | [diff] [blame] | 184 | self.assertEqual(eval(""" rb'x' """), b'x') |
Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 185 | self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01') |
Antoine Pitrou | 3a5d4cb | 2012-01-12 22:46:19 +0100 | [diff] [blame] | 186 | self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01') |
Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 187 | self.assertEqual(eval(""" br'\x01' """), byte(1)) |
Antoine Pitrou | 3a5d4cb | 2012-01-12 22:46:19 +0100 | [diff] [blame] | 188 | self.assertEqual(eval(""" rb'\x01' """), byte(1)) |
Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 189 | self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81") |
Antoine Pitrou | 3a5d4cb | 2012-01-12 22:46:19 +0100 | [diff] [blame] | 190 | self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81") |
Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 191 | self.assertRaises(SyntaxError, eval, """ br'\x81' """) |
Antoine Pitrou | 3a5d4cb | 2012-01-12 22:46:19 +0100 | [diff] [blame] | 192 | self.assertRaises(SyntaxError, eval, """ rb'\x81' """) |
Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 193 | self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881") |
Antoine Pitrou | 3a5d4cb | 2012-01-12 22:46:19 +0100 | [diff] [blame] | 194 | self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881") |
Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 195 | self.assertRaises(SyntaxError, eval, """ br'\u1881' """) |
Antoine Pitrou | 3a5d4cb | 2012-01-12 22:46:19 +0100 | [diff] [blame] | 196 | self.assertRaises(SyntaxError, eval, """ rb'\u1881' """) |
Serhiy Storchaka | 5e61f14 | 2013-02-10 17:36:00 +0200 | [diff] [blame] | 197 | self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120") |
Serhiy Storchaka | 801d955 | 2013-02-10 17:42:01 +0200 | [diff] [blame] | 198 | self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120") |
Serhiy Storchaka | 5e61f14 | 2013-02-10 17:36:00 +0200 | [diff] [blame] | 199 | self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """) |
Serhiy Storchaka | 801d955 | 2013-02-10 17:42:01 +0200 | [diff] [blame] | 200 | self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """) |
Antoine Pitrou | 3a5d4cb | 2012-01-12 22:46:19 +0100 | [diff] [blame] | 201 | self.assertRaises(SyntaxError, eval, """ bb'' """) |
| 202 | self.assertRaises(SyntaxError, eval, """ rr'' """) |
| 203 | self.assertRaises(SyntaxError, eval, """ brr'' """) |
| 204 | self.assertRaises(SyntaxError, eval, """ bbr'' """) |
| 205 | self.assertRaises(SyntaxError, eval, """ rrb'' """) |
| 206 | self.assertRaises(SyntaxError, eval, """ rbb'' """) |
Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 207 | |
Christian Heimes | 0b3847d | 2012-06-20 11:17:58 +0200 | [diff] [blame] | 208 | def test_eval_str_u(self): |
| 209 | self.assertEqual(eval(""" u'x' """), 'x') |
| 210 | self.assertEqual(eval(""" U'\u00e4' """), 'ä') |
| 211 | self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä') |
| 212 | self.assertRaises(SyntaxError, eval, """ ur'' """) |
| 213 | self.assertRaises(SyntaxError, eval, """ ru'' """) |
| 214 | self.assertRaises(SyntaxError, eval, """ bu'' """) |
| 215 | self.assertRaises(SyntaxError, eval, """ ub'' """) |
| 216 | |
Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 217 | def check_encoding(self, encoding, extra=""): |
| 218 | modname = "xx_" + encoding.replace("-", "_") |
| 219 | fn = os.path.join(self.tmpdir, modname + ".py") |
| 220 | f = open(fn, "w", encoding=encoding) |
| 221 | try: |
| 222 | f.write(TEMPLATE % encoding) |
| 223 | f.write(extra) |
| 224 | finally: |
| 225 | f.close() |
| 226 | __import__(modname) |
| 227 | del sys.modules[modname] |
| 228 | |
| 229 | def test_file_utf_8(self): |
| 230 | extra = "z = '\u1234'; assert ord(z) == 0x1234\n" |
| 231 | self.check_encoding("utf-8", extra) |
| 232 | |
| 233 | def test_file_utf_8_error(self): |
| 234 | extra = "b'\x80'\n" |
| 235 | self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra) |
| 236 | |
| 237 | def test_file_utf8(self): |
Marc-André Lemburg | 8f36af7 | 2011-02-25 15:42:01 +0000 | [diff] [blame] | 238 | self.check_encoding("utf-8") |
Guido van Rossum | 29fd712 | 2007-11-12 01:13:56 +0000 | [diff] [blame] | 239 | |
| 240 | def test_file_iso_8859_1(self): |
| 241 | self.check_encoding("iso-8859-1") |
| 242 | |
| 243 | def test_file_latin_1(self): |
| 244 | self.check_encoding("latin-1") |
| 245 | |
| 246 | def test_file_latin9(self): |
| 247 | self.check_encoding("latin9") |
| 248 | |
| 249 | |
| 250 | if __name__ == "__main__": |
Zachary Ware | 38c707e | 2015-04-13 15:00:43 -0500 | [diff] [blame] | 251 | unittest.main() |