Blame - Lib/test/test_tokenize.py - platform/external/python/cpython2

blob: b064967bfdfbccf028ab52daa045ddb9128adae4 [file] [log] [blame]

Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	1	import os, glob, random
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame^]	2	from cStringIO import StringIO
				3	from test.test_support import (verbose, findfile, is_resource_enabled,
				4	TestFailed)
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	5	from tokenize import (tokenize, generate_tokens, untokenize,
				6	NUMBER, NAME, OP, STRING)
Guido van Rossum	0874f7f	1997-10-27 22:15:06 +0000	[diff] [blame]	7
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame^]	8	# Test roundtrip for `untokenize`. `f` is a file path. The source code in f
				9	# is tokenized, converted back to source code via tokenize.untokenize(),
				10	# and tokenized again from the latter. The test fails if the second
				11	# tokenization doesn't match the first.
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	12	def test_roundtrip(f):
				13	## print 'Testing:', f
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame^]	14	fobj = open(f)
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	15	try:
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame^]	16	fulltok = list(generate_tokens(fobj.readline))
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	17	finally:
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame^]	18	fobj.close()
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	19
				20	t1 = [tok[:2] for tok in fulltok]
				21	newtext = untokenize(t1)
				22	readline = iter(newtext.splitlines(1)).next
				23	t2 = [tok[:2] for tok in generate_tokens(readline)]
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame^]	24	if t1 != t2:
				25	raise TestFailed("untokenize() roundtrip failed for %r" % f)
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	26
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame^]	27	# This is an example from the docs, set up as a doctest.
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	28	def decistmt(s):
				29	"""Substitute Decimals for floats in a string of statements.
				30
				31	>>> from decimal import Decimal
				32	>>> s = 'print +21.3e-5*-.1234/81.7'
				33	>>> decistmt(s)
				34	"print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')"
				35
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame^]	36	The format of the exponent is inherited from the platform C library.
				37	Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
				38	we're only showing 12 digits, and the 13th isn't close to 5, the
				39	rest of the output should be platform-independent.
				40
				41	>>> exec(s) #doctest: +ELLIPSIS
				42	-3.21716034272e-0...7
				43
				44	Output from calculations with Decimal should be identical across all
				45	platforms.
				46
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	47	>>> exec(decistmt(s))
				48	-3.217160342717258261933904529E-7
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	49	"""
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame^]	50
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	51	result = []
				52	g = generate_tokens(StringIO(s).readline) # tokenize the string
				53	for toknum, tokval, _, _, _ in g:
				54	if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
				55	result.extend([
				56	(NAME, 'Decimal'),
				57	(OP, '('),
				58	(STRING, repr(tokval)),
				59	(OP, ')')
				60	])
				61	else:
				62	result.append((toknum, tokval))
				63	return untokenize(result)
				64
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame^]	65	def test_main():
				66	if verbose:
				67	print 'starting...'
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	68
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame^]	69	# This displays the tokenization of tokenize_tests.py to stdout, and
				70	# regrtest.py checks that this equals the expected output (in the
				71	# test/output/ directory).
				72	f = open(findfile('tokenize_tests' + os.extsep + 'txt'))
				73	tokenize(f.readline)
				74	f.close()
				75
				76	# Now run test_roundtrip() over tokenize_test.py too, and over all
				77	# (if the "compiler" resource is enabled) or a small random sample (if
				78	# "compiler" is not enabled) of the test*.py files.
				79	f = findfile('tokenize_tests' + os.extsep + 'txt')
				80	test_roundtrip(f)
				81
				82	testdir = os.path.dirname(f) or os.curdir
				83	testfiles = glob.glob(testdir + os.sep + 'test*.py')
				84	if not is_resource_enabled('compiler'):
				85	testfiles = random.sample(testfiles, 10)
				86
				87	for f in testfiles:
				88	test_roundtrip(f)
				89
				90	# Test detecton of IndentationError.
				91	sampleBadText = """\
				92	def foo():
				93	bar
				94	baz
				95	"""
				96
				97	try:
				98	for tok in generate_tokens(StringIO(sampleBadText).readline):
				99	pass
				100	except IndentationError:
				101	pass
				102	else:
				103	raise TestFailed("Did not detect IndentationError:")
				104
				105	# Run the doctests in this module.
				106	from test import test_tokenize # i.e., this module
				107	from test.test_support import run_doctest
				108	run_doctest(test_tokenize)
				109
				110	if verbose:
				111	print 'finished'
				112
				113	if __name__ == "__main__":
				114	test_main()