Blame - Lib/test/test_tokenize.py - platform/external/python/cpython2

blob: a0f61d7cf6a3dd1a38f6bb204fffea8ce48d9678 [file] [log] [blame]

Jeremy Hylton	29bef0b	2006-08-23 18:37:43 +0000	[diff] [blame]	1	"""Tests for the tokenize module.
				2
				3	The tests were originally written in the old Python style, where the
				4	test output was compared to a golden file. This docstring represents
				5	the first steps towards rewriting the entire test as a doctest.
				6
				7	The tests can be really simple. Given a small fragment of source
				8	code, print out a table with the tokens. The ENDMARK is omitted for
				9	brevity.
				10
				11	>>> dump_tokens("1 + 1")
				12	NUMBER '1' (1, 0) (1, 1)
				13	OP '+' (1, 2) (1, 3)
				14	NUMBER '1' (1, 4) (1, 5)
				15
				16	There will be a bunch more tests of specific source patterns.
				17
				18	The tokenize module also defines an untokenize function that should
				19	regenerate the original program text from the tokens. (It doesn't
				20	work very well at the moment.)
				21
				22	>>> roundtrip("if x == 1:\\n"
				23	... " print x\\n")
				24	if x ==1 :
				25	print x
				26	"""
				27
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	28	import os, glob, random
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame]	29	from cStringIO import StringIO
				30	from test.test_support import (verbose, findfile, is_resource_enabled,
				31	TestFailed)
Jeremy Hylton	29bef0b	2006-08-23 18:37:43 +0000	[diff] [blame]	32	from tokenize import (tokenize, generate_tokens, untokenize, tok_name,
				33	ENDMARKER, NUMBER, NAME, OP, STRING)
Guido van Rossum	0874f7f	1997-10-27 22:15:06 +0000	[diff] [blame]	34
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame]	35	# Test roundtrip for `untokenize`. `f` is a file path. The source code in f
				36	# is tokenized, converted back to source code via tokenize.untokenize(),
				37	# and tokenized again from the latter. The test fails if the second
				38	# tokenization doesn't match the first.
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	39	def test_roundtrip(f):
				40	## print 'Testing:', f
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame]	41	fobj = open(f)
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	42	try:
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame]	43	fulltok = list(generate_tokens(fobj.readline))
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	44	finally:
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame]	45	fobj.close()
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	46
				47	t1 = [tok[:2] for tok in fulltok]
				48	newtext = untokenize(t1)
				49	readline = iter(newtext.splitlines(1)).next
				50	t2 = [tok[:2] for tok in generate_tokens(readline)]
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame]	51	if t1 != t2:
				52	raise TestFailed("untokenize() roundtrip failed for %r" % f)
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	53
Jeremy Hylton	29bef0b	2006-08-23 18:37:43 +0000	[diff] [blame]	54	def dump_tokens(s):
				55	"""Print out the tokens in s in a table format.
				56
				57	The ENDMARKER is omitted.
				58	"""
				59	f = StringIO(s)
				60	for type, token, start, end, line in generate_tokens(f.readline):
				61	if type == ENDMARKER:
				62	break
				63	type = tok_name[type]
				64	print "%(type)-10.10s %(token)-10.10r %(start)s %(end)s" % locals()
				65
				66	def roundtrip(s):
				67	f = StringIO(s)
				68	print untokenize(generate_tokens(f.readline)),
				69
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame]	70	# This is an example from the docs, set up as a doctest.
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	71	def decistmt(s):
				72	"""Substitute Decimals for floats in a string of statements.
				73
				74	>>> from decimal import Decimal
				75	>>> s = 'print +21.3e-5*-.1234/81.7'
				76	>>> decistmt(s)
				77	"print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')"
				78
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame]	79	The format of the exponent is inherited from the platform C library.
				80	Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
				81	we're only showing 12 digits, and the 13th isn't close to 5, the
				82	rest of the output should be platform-independent.
				83
				84	>>> exec(s) #doctest: +ELLIPSIS
				85	-3.21716034272e-0...7
				86
				87	Output from calculations with Decimal should be identical across all
				88	platforms.
				89
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	90	>>> exec(decistmt(s))
				91	-3.217160342717258261933904529E-7
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	92	"""
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame]	93
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	94	result = []
				95	g = generate_tokens(StringIO(s).readline) # tokenize the string
				96	for toknum, tokval, _, _, _ in g:
				97	if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
				98	result.extend([
				99	(NAME, 'Decimal'),
				100	(OP, '('),
				101	(STRING, repr(tokval)),
				102	(OP, ')')
				103	])
				104	else:
				105	result.append((toknum, tokval))
				106	return untokenize(result)
				107
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame]	108	def test_main():
				109	if verbose:
				110	print 'starting...'
Raymond Hettinger	68c0453	2005-06-10 11:05:19 +0000	[diff] [blame]	111
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame]	112	# This displays the tokenization of tokenize_tests.py to stdout, and
				113	# regrtest.py checks that this equals the expected output (in the
				114	# test/output/ directory).
				115	f = open(findfile('tokenize_tests' + os.extsep + 'txt'))
				116	tokenize(f.readline)
				117	f.close()
				118
				119	# Now run test_roundtrip() over tokenize_test.py too, and over all
				120	# (if the "compiler" resource is enabled) or a small random sample (if
				121	# "compiler" is not enabled) of the test*.py files.
				122	f = findfile('tokenize_tests' + os.extsep + 'txt')
				123	test_roundtrip(f)
				124
				125	testdir = os.path.dirname(f) or os.curdir
				126	testfiles = glob.glob(testdir + os.sep + 'test*.py')
				127	if not is_resource_enabled('compiler'):
				128	testfiles = random.sample(testfiles, 10)
				129
				130	for f in testfiles:
				131	test_roundtrip(f)
				132
				133	# Test detecton of IndentationError.
				134	sampleBadText = """\
				135	def foo():
				136	bar
				137	baz
				138	"""
				139
				140	try:
				141	for tok in generate_tokens(StringIO(sampleBadText).readline):
				142	pass
				143	except IndentationError:
				144	pass
				145	else:
				146	raise TestFailed("Did not detect IndentationError:")
				147
				148	# Run the doctests in this module.
				149	from test import test_tokenize # i.e., this module
				150	from test.test_support import run_doctest
Jeremy Hylton	29bef0b	2006-08-23 18:37:43 +0000	[diff] [blame]	151	run_doctest(test_tokenize, verbose)
Tim Peters	ef57567	2006-03-31 03:17:30 +0000	[diff] [blame]	152
				153	if verbose:
				154	print 'finished'
				155
				156	if __name__ == "__main__":
				157	test_main()