Blame - Lib/test/test_strlit.py - platform/external/python/cpython2

blob: 9eb30e9c54b670bb92e5bc5b39f253e687a0a1e6 [file] [log] [blame]

Guido van Rossum	29fd712	2007-11-12 01:13:56 +0000	[diff] [blame]	1	r"""Test correct treatment of various string literals by the parser.
				2
				3	There are four types of string literals:
				4
				5	'abc' -- normal str
				6	r'abc' -- raw str
				7	b'xyz' -- normal bytes
				8	br'xyz' -- raw bytes
				9
				10	The difference between normal and raw strings is of course that in a
				11	raw string, \ escapes (while still used to determine the end of the
				12	literal) are not interpreted, so that r'\x00' contains four
				13	characters: a backslash, an x, and two zeros; while '\x00' contains a
				14	single character (code point zero).
				15
				16	The tricky thing is what should happen when non-ASCII bytes are used
				17	inside literals. For bytes literals, this is considered illegal. But
				18	for str literals, those bytes are supposed to be decoded using the
				19	encoding declared for the file (UTF-8 by default).
				20
				21	We have to test this with various file encodings. We also test it with
				22	exec()/eval(), which uses a different code path.
				23
				24	This file is really about correct treatment of encodings and
Ezio Melotti	1392500	2011-03-16 11:05:33 +0200	[diff] [blame^]	25	backslashes. It doesn't concern itself with issues like single
Guido van Rossum	29fd712	2007-11-12 01:13:56 +0000	[diff] [blame]	26	vs. double quotes or singly- vs. triply-quoted strings: that's dealt
				27	with elsewhere (I assume).
				28	"""
				29
				30	import os
				31	import sys
				32	import shutil
				33	import tempfile
				34	import unittest
				35
				36
				37	TEMPLATE = r"""# coding: %s
				38	a = 'x'
				39	assert ord(a) == 120
				40	b = '\x01'
				41	assert ord(b) == 1
				42	c = r'\x01'
				43	assert list(map(ord, c)) == [92, 120, 48, 49]
				44	d = '\x81'
				45	assert ord(d) == 0x81
				46	e = r'\x81'
				47	assert list(map(ord, e)) == [92, 120, 56, 49]
				48	f = '\u1881'
				49	assert ord(f) == 0x1881
				50	g = r'\u1881'
				51	assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49]
				52	"""
				53
				54
				55	def byte(i):
				56	return bytes([i])
				57
				58
				59	class TestLiterals(unittest.TestCase):
				60
				61	def setUp(self):
				62	self.save_path = sys.path[:]
				63	self.tmpdir = tempfile.mkdtemp()
				64	sys.path.insert(0, self.tmpdir)
				65
				66	def tearDown(self):
				67	sys.path = self.save_path
				68	shutil.rmtree(self.tmpdir, ignore_errors=True)
				69
				70	def test_template(self):
				71	# Check that the template doesn't contain any non-printables
				72	# except for \n.
				73	for c in TEMPLATE:
				74	assert c == '\n' or ' ' <= c <= '~', repr(c)
				75
				76	def test_eval_str_normal(self):
				77	self.assertEqual(eval(""" 'x' """), 'x')
				78	self.assertEqual(eval(r""" '\x01' """), chr(1))
				79	self.assertEqual(eval(""" '\x01' """), chr(1))
				80	self.assertEqual(eval(r""" '\x81' """), chr(0x81))
				81	self.assertEqual(eval(""" '\x81' """), chr(0x81))
				82	self.assertEqual(eval(r""" '\u1881' """), chr(0x1881))
				83	self.assertEqual(eval(""" '\u1881' """), chr(0x1881))
				84
				85	def test_eval_str_raw(self):
				86	self.assertEqual(eval(""" r'x' """), 'x')
				87	self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
				88	self.assertEqual(eval(""" r'\x01' """), chr(1))
				89	self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81')
				90	self.assertEqual(eval(""" r'\x81' """), chr(0x81))
				91	self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881')
				92	self.assertEqual(eval(""" r'\u1881' """), chr(0x1881))
				93
				94	def test_eval_bytes_normal(self):
				95	self.assertEqual(eval(""" b'x' """), b'x')
				96	self.assertEqual(eval(r""" b'\x01' """), byte(1))
				97	self.assertEqual(eval(""" b'\x01' """), byte(1))
				98	self.assertEqual(eval(r""" b'\x81' """), byte(0x81))
				99	self.assertRaises(SyntaxError, eval, """ b'\x81' """)
				100	self.assertEqual(eval(r""" b'\u1881' """), b'\\' + b'u1881')
				101	self.assertRaises(SyntaxError, eval, """ b'\u1881' """)
				102
				103	def test_eval_bytes_raw(self):
				104	self.assertEqual(eval(""" br'x' """), b'x')
				105	self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
				106	self.assertEqual(eval(""" br'\x01' """), byte(1))
				107	self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
				108	self.assertRaises(SyntaxError, eval, """ br'\x81' """)
				109	self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
				110	self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
				111
				112	def check_encoding(self, encoding, extra=""):
				113	modname = "xx_" + encoding.replace("-", "_")
				114	fn = os.path.join(self.tmpdir, modname + ".py")
				115	f = open(fn, "w", encoding=encoding)
				116	try:
				117	f.write(TEMPLATE % encoding)
				118	f.write(extra)
				119	finally:
				120	f.close()
				121	__import__(modname)
				122	del sys.modules[modname]
				123
				124	def test_file_utf_8(self):
				125	extra = "z = '\u1234'; assert ord(z) == 0x1234\n"
				126	self.check_encoding("utf-8", extra)
				127
				128	def test_file_utf_8_error(self):
				129	extra = "b'\x80'\n"
				130	self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
				131
				132	def test_file_utf8(self):
				133	self.check_encoding("utf8")
				134
				135	def test_file_iso_8859_1(self):
				136	self.check_encoding("iso-8859-1")
				137
				138	def test_file_latin_1(self):
				139	self.check_encoding("latin-1")
				140
				141	def test_file_latin9(self):
				142	self.check_encoding("latin9")
				143
				144
				145	if __name__ == "__main__":
				146	# Hack so that error messages containing non-ASCII can be printed
				147	sys.stdout._encoding = sys.stderr._encoding = "utf-8"
				148	unittest.main()