blob: b064967bfdfbccf028ab52daa045ddb9128adae4 [file] [log] [blame]
Raymond Hettinger68c04532005-06-10 11:05:19 +00001import os, glob, random
Tim Petersef575672006-03-31 03:17:30 +00002from cStringIO import StringIO
3from test.test_support import (verbose, findfile, is_resource_enabled,
4 TestFailed)
Raymond Hettinger68c04532005-06-10 11:05:19 +00005from tokenize import (tokenize, generate_tokens, untokenize,
6 NUMBER, NAME, OP, STRING)
Guido van Rossum0874f7f1997-10-27 22:15:06 +00007
Tim Petersef575672006-03-31 03:17:30 +00008# Test roundtrip for `untokenize`. `f` is a file path. The source code in f
9# is tokenized, converted back to source code via tokenize.untokenize(),
10# and tokenized again from the latter. The test fails if the second
11# tokenization doesn't match the first.
Raymond Hettinger68c04532005-06-10 11:05:19 +000012def test_roundtrip(f):
13 ## print 'Testing:', f
Tim Petersef575672006-03-31 03:17:30 +000014 fobj = open(f)
Raymond Hettinger68c04532005-06-10 11:05:19 +000015 try:
Tim Petersef575672006-03-31 03:17:30 +000016 fulltok = list(generate_tokens(fobj.readline))
Raymond Hettinger68c04532005-06-10 11:05:19 +000017 finally:
Tim Petersef575672006-03-31 03:17:30 +000018 fobj.close()
Raymond Hettinger68c04532005-06-10 11:05:19 +000019
20 t1 = [tok[:2] for tok in fulltok]
21 newtext = untokenize(t1)
22 readline = iter(newtext.splitlines(1)).next
23 t2 = [tok[:2] for tok in generate_tokens(readline)]
Tim Petersef575672006-03-31 03:17:30 +000024 if t1 != t2:
25 raise TestFailed("untokenize() roundtrip failed for %r" % f)
Raymond Hettinger68c04532005-06-10 11:05:19 +000026
Tim Petersef575672006-03-31 03:17:30 +000027# This is an example from the docs, set up as a doctest.
Raymond Hettinger68c04532005-06-10 11:05:19 +000028def decistmt(s):
29 """Substitute Decimals for floats in a string of statements.
30
31 >>> from decimal import Decimal
32 >>> s = 'print +21.3e-5*-.1234/81.7'
33 >>> decistmt(s)
34 "print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')"
35
Tim Petersef575672006-03-31 03:17:30 +000036 The format of the exponent is inherited from the platform C library.
37 Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
38 we're only showing 12 digits, and the 13th isn't close to 5, the
39 rest of the output should be platform-independent.
40
41 >>> exec(s) #doctest: +ELLIPSIS
42 -3.21716034272e-0...7
43
44 Output from calculations with Decimal should be identical across all
45 platforms.
46
Raymond Hettinger68c04532005-06-10 11:05:19 +000047 >>> exec(decistmt(s))
48 -3.217160342717258261933904529E-7
Raymond Hettinger68c04532005-06-10 11:05:19 +000049 """
Tim Petersef575672006-03-31 03:17:30 +000050
Raymond Hettinger68c04532005-06-10 11:05:19 +000051 result = []
52 g = generate_tokens(StringIO(s).readline) # tokenize the string
53 for toknum, tokval, _, _, _ in g:
54 if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
55 result.extend([
56 (NAME, 'Decimal'),
57 (OP, '('),
58 (STRING, repr(tokval)),
59 (OP, ')')
60 ])
61 else:
62 result.append((toknum, tokval))
63 return untokenize(result)
64
Tim Petersef575672006-03-31 03:17:30 +000065def test_main():
66 if verbose:
67 print 'starting...'
Raymond Hettinger68c04532005-06-10 11:05:19 +000068
Tim Petersef575672006-03-31 03:17:30 +000069 # This displays the tokenization of tokenize_tests.py to stdout, and
70 # regrtest.py checks that this equals the expected output (in the
71 # test/output/ directory).
72 f = open(findfile('tokenize_tests' + os.extsep + 'txt'))
73 tokenize(f.readline)
74 f.close()
75
76 # Now run test_roundtrip() over tokenize_test.py too, and over all
77 # (if the "compiler" resource is enabled) or a small random sample (if
78 # "compiler" is not enabled) of the test*.py files.
79 f = findfile('tokenize_tests' + os.extsep + 'txt')
80 test_roundtrip(f)
81
82 testdir = os.path.dirname(f) or os.curdir
83 testfiles = glob.glob(testdir + os.sep + 'test*.py')
84 if not is_resource_enabled('compiler'):
85 testfiles = random.sample(testfiles, 10)
86
87 for f in testfiles:
88 test_roundtrip(f)
89
90 # Test detecton of IndentationError.
91 sampleBadText = """\
92def foo():
93 bar
94 baz
95"""
96
97 try:
98 for tok in generate_tokens(StringIO(sampleBadText).readline):
99 pass
100 except IndentationError:
101 pass
102 else:
103 raise TestFailed("Did not detect IndentationError:")
104
105 # Run the doctests in this module.
106 from test import test_tokenize # i.e., this module
107 from test.test_support import run_doctest
108 run_doctest(test_tokenize)
109
110 if verbose:
111 print 'finished'
112
113if __name__ == "__main__":
114 test_main()