Blame - Lib/test/test_tokenize.py - platform/external/python/cpython3

2008-03-16 00:07:10 +0000

[diff] [blame]

1

doctests = """

2

Tests for the tokenize module.

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

3

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

4

The tests can be really simple. Given a small fragment of source

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

5

code, print out a table with tokens. The ENDMARKER is omitted for

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

6

brevity.

7

Jason R. Coombs

7cf3638

2015-06-20 19:13:50 -0400

[diff] [blame]

8

>>> import glob

9

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

10

>>> dump_tokens("1 + 1")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

11

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

12

NUMBER '1' (1, 0) (1, 1)

13

OP '+' (1, 2) (1, 3)

14

NUMBER '1' (1, 4) (1, 5)

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

15

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

16

>>> dump_tokens("if False:\\n"

17

... " # NL\\n"

18

... " True = False # NEWLINE\\n")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

19

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

20

NAME 'if' (1, 0) (1, 2)

21

NAME 'False' (1, 3) (1, 8)

22

OP ':' (1, 8) (1, 9)

23

NEWLINE '\\n' (1, 9) (1, 10)

24

COMMENT '# NL' (2, 4) (2, 8)

25

NL '\\n' (2, 8) (2, 9)

26

INDENT ' ' (3, 0) (3, 4)

27

NAME 'True' (3, 4) (3, 8)

28

OP '=' (3, 9) (3, 10)

29

NAME 'False' (3, 11) (3, 16)

30

COMMENT '# NEWLINE' (3, 17) (3, 26)

31

NEWLINE '\\n' (3, 26) (3, 27)

32

DEDENT '' (4, 0) (4, 0)

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

33

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

34

>>> indent_error_file = \"""

... def k(x):

... x += 2

... x += 5

... \"""

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

39

>>> readline = BytesIO(indent_error_file.encode('utf-8')).readline

40

>>> for tok in tokenize(readline): pass

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

41

Traceback (most recent call last):

42

...

43

IndentationError: unindent does not match any outer indentation level

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

44

Mark Dickinson

3c0b317

2010-06-29 07:38:37 +0000

[diff] [blame]

45

There are some standard formatting practices that are easy to get right.

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

46

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

47

>>> roundtrip("if x == 1:\\n"

48

... " print(x)\\n")

49

True

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

50

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

51

>>> roundtrip("# This is a comment\\n# This also")

52

True

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

53

54

Some people use different formatting conventions, which makes

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

55

untokenize a little trickier. Note that this test involves trailing

56

whitespace after the colon. Note that we use hex escapes to make the

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

57

two trailing blanks apparent in the expected output.

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

58

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

59

>>> roundtrip("if x == 1 : \\n"

60

... " print(x)\\n")

61

True

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

62

Benjamin Peterson

ee8712c

2008-05-20 21:35:26 +0000

[diff] [blame]

63

>>> f = support.findfile("tokenize_tests.txt")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

64

>>> roundtrip(open(f, 'rb'))

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

65

True

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

66

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

67

>>> roundtrip("if x == 1:\\n"

68

... " # A comment by itself.\\n"

69

... " print(x) # Comment here, too.\\n"

70

... " # Another comment.\\n"

71

... "after_if = True\\n")

72

True

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

73

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

74

>>> roundtrip("if (x # The comments need to go in the right place\\n"

75

... " == 1):\\n"

76

... " print('x==1')\\n")

77

True

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

78

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

79

>>> roundtrip("class Test: # A comment here\\n"

80

... " # A comment with weird indent\\n"

81

... " after_com = 5\\n"

82

... " def x(m): return m*5 # a one liner\\n"

83

... " def y(m): # A whitespace after the colon\\n"

84

... " return y*4 # 3-space indent\\n")

85

True

86

87

Some error-handling code

88

89

>>> roundtrip("try: import somemodule\\n"

90

... "except ImportError: # comment\\n"

Christian Heimes

2008-03-28 00:55:15 +0000

[diff] [blame]

91

... " print('Can not import' # comment2\\n)"

Neal Norwitz

752abd0

2008-05-13 04:55:24 +0000

[diff] [blame]

92

... "else: print('Loaded')\\n")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

93

True

94

Eric Smith

2008-03-17 19:49:19 +0000

[diff] [blame]

95

Balancing continuation

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

96

97

>>> roundtrip("a = (3,4, \\n"

... "5,6)\\n"

... "y = [3, 4,\\n"

... "5]\\n"

... "z = {'a': 5,\\n"

102

... "'b':15, 'c':True}\\n"

103

... "x = len(y) + 5 - a[\\n"

104

... "3] - a[2]\\n"

105

... "+ len(z) - z[\\n"

... "'b']\\n")

True

Ordinary integers and binary operators

110

111

>>> dump_tokens("0xff <= 255")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

112

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

113

NUMBER '0xff' (1, 0) (1, 4)

114

OP '<=' (1, 5) (1, 7)

115

NUMBER '255' (1, 8) (1, 11)

Eric Smith

2008-03-17 19:49:19 +0000

[diff] [blame]

116

>>> dump_tokens("0b10 <= 255")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

117

ENCODING 'utf-8' (0, 0) (0, 0)

Eric Smith

2008-03-17 19:49:19 +0000

[diff] [blame]

118

NUMBER '0b10' (1, 0) (1, 4)

119

OP '<=' (1, 5) (1, 7)

120

NUMBER '255' (1, 8) (1, 11)

121

>>> dump_tokens("0o123 <= 0O123")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

122

ENCODING 'utf-8' (0, 0) (0, 0)

Eric Smith

2008-03-17 19:49:19 +0000

[diff] [blame]

123

NUMBER '0o123' (1, 0) (1, 5)

124

OP '<=' (1, 6) (1, 8)

125

NUMBER '0O123' (1, 9) (1, 14)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

126

>>> dump_tokens("1234567 > ~0x15")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

127

ENCODING 'utf-8' (0, 0) (0, 0)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

128

NUMBER '1234567' (1, 0) (1, 7)

129

OP '>' (1, 8) (1, 9)

130

OP '~' (1, 10) (1, 11)

131

NUMBER '0x15' (1, 11) (1, 15)

132

>>> dump_tokens("2134568 != 1231515")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

133

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

134

NUMBER '2134568' (1, 0) (1, 7)

135

OP '!=' (1, 8) (1, 10)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

136

NUMBER '1231515' (1, 11) (1, 18)

137

>>> dump_tokens("(-124561-1) & 200000000")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

138

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

139

OP '(' (1, 0) (1, 1)

140

OP '-' (1, 1) (1, 2)

141

NUMBER '124561' (1, 2) (1, 8)

142

OP '-' (1, 8) (1, 9)

143

NUMBER '1' (1, 9) (1, 10)

144

OP ')' (1, 10) (1, 11)

145

OP '&' (1, 12) (1, 13)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

146

NUMBER '200000000' (1, 14) (1, 23)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

147

>>> dump_tokens("0xdeadbeef != -1")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

148

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

149

NUMBER '0xdeadbeef' (1, 0) (1, 10)

150

OP '!=' (1, 11) (1, 13)

151

OP '-' (1, 14) (1, 15)

152

NUMBER '1' (1, 15) (1, 16)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

153

>>> dump_tokens("0xdeadc0de & 12345")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

154

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

155

NUMBER '0xdeadc0de' (1, 0) (1, 10)

156

OP '&' (1, 11) (1, 12)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

157

NUMBER '12345' (1, 13) (1, 18)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

158

>>> dump_tokens("0xFF & 0x15 | 1234")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

159

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

160

NUMBER '0xFF' (1, 0) (1, 4)

161

OP '&' (1, 5) (1, 6)

162

NUMBER '0x15' (1, 7) (1, 11)

163

OP '|' (1, 12) (1, 13)

164

NUMBER '1234' (1, 14) (1, 18)

Long integers

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

168

>>> dump_tokens("x = 0")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

169

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

170

NAME 'x' (1, 0) (1, 1)

171

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

172

NUMBER '0' (1, 4) (1, 5)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

173

>>> dump_tokens("x = 0xfffffffffff")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

174

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

175

NAME 'x' (1, 0) (1, 1)

176

OP '=' (1, 2) (1, 3)

177

NUMBER '0xffffffffff (1, 4) (1, 17)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

178

>>> dump_tokens("x = 123141242151251616110")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

179

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

180

NAME 'x' (1, 0) (1, 1)

181

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

182

NUMBER '123141242151 (1, 4) (1, 25)

183

>>> dump_tokens("x = -15921590215012591")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

184

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

185

NAME 'x' (1, 0) (1, 1)

186

OP '=' (1, 2) (1, 3)

187

OP '-' (1, 4) (1, 5)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

188

NUMBER '159215902150 (1, 5) (1, 22)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

189

190

Floating point numbers

191

192

>>> dump_tokens("x = 3.14159")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

193

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

194

NAME 'x' (1, 0) (1, 1)

195

OP '=' (1, 2) (1, 3)

196

NUMBER '3.14159' (1, 4) (1, 11)

197

>>> dump_tokens("x = 314159.")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

198

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

199

NAME 'x' (1, 0) (1, 1)

200

OP '=' (1, 2) (1, 3)

201

NUMBER '314159.' (1, 4) (1, 11)

202

>>> dump_tokens("x = .314159")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

203

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

204

NAME 'x' (1, 0) (1, 1)

205

OP '=' (1, 2) (1, 3)

206

NUMBER '.314159' (1, 4) (1, 11)

207

>>> dump_tokens("x = 3e14159")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

208

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

209

NAME 'x' (1, 0) (1, 1)

210

OP '=' (1, 2) (1, 3)

211

NUMBER '3e14159' (1, 4) (1, 11)

212

>>> dump_tokens("x = 3E123")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

213

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

214

NAME 'x' (1, 0) (1, 1)

215

OP '=' (1, 2) (1, 3)

216

NUMBER '3E123' (1, 4) (1, 9)

217

>>> dump_tokens("x+y = 3e-1230")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

218

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

219

NAME 'x' (1, 0) (1, 1)

220

OP '+' (1, 1) (1, 2)

221

NAME 'y' (1, 2) (1, 3)

222

OP '=' (1, 4) (1, 5)

223

NUMBER '3e-1230' (1, 6) (1, 13)

224

>>> dump_tokens("x = 3.14e159")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

225

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

226

NAME 'x' (1, 0) (1, 1)

227

OP '=' (1, 2) (1, 3)

228

NUMBER '3.14e159' (1, 4) (1, 12)

String literals

>>> dump_tokens("x = ''; y = \\\"\\\"")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

233

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

234

NAME 'x' (1, 0) (1, 1)

235

OP '=' (1, 2) (1, 3)

236

STRING "''" (1, 4) (1, 6)

237

OP ';' (1, 6) (1, 7)

238

NAME 'y' (1, 8) (1, 9)

239

OP '=' (1, 10) (1, 11)

240

STRING '""' (1, 12) (1, 14)

241

>>> dump_tokens("x = '\\\"'; y = \\\"'\\\"")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

242

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

243

NAME 'x' (1, 0) (1, 1)

244

OP '=' (1, 2) (1, 3)

245

STRING '\\'"\\'' (1, 4) (1, 7)

246

OP ';' (1, 7) (1, 8)

247

NAME 'y' (1, 9) (1, 10)

248

OP '=' (1, 11) (1, 12)

249

STRING '"\\'"' (1, 13) (1, 16)

250

>>> dump_tokens("x = \\\"doesn't \\\"shrink\\\", does it\\\"")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

251

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

252

NAME 'x' (1, 0) (1, 1)

253

OP '=' (1, 2) (1, 3)

254

STRING '"doesn\\'t "' (1, 4) (1, 14)

255

NAME 'shrink' (1, 14) (1, 20)

256

STRING '", does it"' (1, 20) (1, 31)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

257

>>> dump_tokens("x = 'abc' + 'ABC'")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

258

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

259

NAME 'x' (1, 0) (1, 1)

260

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

261

STRING "'abc'" (1, 4) (1, 9)

262

OP '+' (1, 10) (1, 11)

263

STRING "'ABC'" (1, 12) (1, 17)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

264

>>> dump_tokens('y = "ABC" + "ABC"')

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

265

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

266

NAME 'y' (1, 0) (1, 1)

267

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

268

STRING '"ABC"' (1, 4) (1, 9)

269

OP '+' (1, 10) (1, 11)

270

STRING '"ABC"' (1, 12) (1, 17)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

271

>>> dump_tokens("x = r'abc' + r'ABC' + R'ABC' + R'ABC'")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

272

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

273

NAME 'x' (1, 0) (1, 1)

274

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

275

STRING "r'abc'" (1, 4) (1, 10)

276

OP '+' (1, 11) (1, 12)

277

STRING "r'ABC'" (1, 13) (1, 19)

278

OP '+' (1, 20) (1, 21)

279

STRING "R'ABC'" (1, 22) (1, 28)

280

OP '+' (1, 29) (1, 30)

281

STRING "R'ABC'" (1, 31) (1, 37)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

282

>>> dump_tokens('y = r"abc" + r"ABC" + R"ABC" + R"ABC"')

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

283

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

284

NAME 'y' (1, 0) (1, 1)

285

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

286

STRING 'r"abc"' (1, 4) (1, 10)

287

OP '+' (1, 11) (1, 12)

288

STRING 'r"ABC"' (1, 13) (1, 19)

289

OP '+' (1, 20) (1, 21)

290

STRING 'R"ABC"' (1, 22) (1, 28)

291

OP '+' (1, 29) (1, 30)

292

STRING 'R"ABC"' (1, 31) (1, 37)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

293

Meador Inge

8d5c0b8

2012-06-16 21:49:08 -0500

[diff] [blame]

294

>>> dump_tokens("u'abc' + U'abc'")

295

ENCODING 'utf-8' (0, 0) (0, 0)

296

STRING "u'abc'" (1, 0) (1, 6)

297

OP '+' (1, 7) (1, 8)

298

STRING "U'abc'" (1, 9) (1, 15)

299

>>> dump_tokens('u"abc" + U"abc"')

300

ENCODING 'utf-8' (0, 0) (0, 0)

301

STRING 'u"abc"' (1, 0) (1, 6)

302

OP '+' (1, 7) (1, 8)

303

STRING 'U"abc"' (1, 9) (1, 15)

Meador Inge

8d5c0b8

2012-06-16 21:49:08 -0500

[diff] [blame]

304

305

>>> dump_tokens("b'abc' + B'abc'")

306

ENCODING 'utf-8' (0, 0) (0, 0)

307

STRING "b'abc'" (1, 0) (1, 6)

308

OP '+' (1, 7) (1, 8)

309

STRING "B'abc'" (1, 9) (1, 15)

310

>>> dump_tokens('b"abc" + B"abc"')

311

ENCODING 'utf-8' (0, 0) (0, 0)

312

STRING 'b"abc"' (1, 0) (1, 6)

313

OP '+' (1, 7) (1, 8)

314

STRING 'B"abc"' (1, 9) (1, 15)

315

>>> dump_tokens("br'abc' + bR'abc' + Br'abc' + BR'abc'")

316

ENCODING 'utf-8' (0, 0) (0, 0)

317

STRING "br'abc'" (1, 0) (1, 7)

318

OP '+' (1, 8) (1, 9)

319

STRING "bR'abc'" (1, 10) (1, 17)

320

OP '+' (1, 18) (1, 19)

321

STRING "Br'abc'" (1, 20) (1, 27)

322

OP '+' (1, 28) (1, 29)

323

STRING "BR'abc'" (1, 30) (1, 37)

324

>>> dump_tokens('br"abc" + bR"abc" + Br"abc" + BR"abc"')

325

ENCODING 'utf-8' (0, 0) (0, 0)

326

STRING 'br"abc"' (1, 0) (1, 7)

327

OP '+' (1, 8) (1, 9)

328

STRING 'bR"abc"' (1, 10) (1, 17)

329

OP '+' (1, 18) (1, 19)

330

STRING 'Br"abc"' (1, 20) (1, 27)

331

OP '+' (1, 28) (1, 29)

332

STRING 'BR"abc"' (1, 30) (1, 37)

333

>>> dump_tokens("rb'abc' + rB'abc' + Rb'abc' + RB'abc'")

334

ENCODING 'utf-8' (0, 0) (0, 0)

335

STRING "rb'abc'" (1, 0) (1, 7)

336

OP '+' (1, 8) (1, 9)

337

STRING "rB'abc'" (1, 10) (1, 17)

338

OP '+' (1, 18) (1, 19)

339

STRING "Rb'abc'" (1, 20) (1, 27)

340

OP '+' (1, 28) (1, 29)

341

STRING "RB'abc'" (1, 30) (1, 37)

342

>>> dump_tokens('rb"abc" + rB"abc" + Rb"abc" + RB"abc"')

343

ENCODING 'utf-8' (0, 0) (0, 0)

344

STRING 'rb"abc"' (1, 0) (1, 7)

345

OP '+' (1, 8) (1, 9)

346

STRING 'rB"abc"' (1, 10) (1, 17)

347

OP '+' (1, 18) (1, 19)

348

STRING 'Rb"abc"' (1, 20) (1, 27)

349

OP '+' (1, 28) (1, 29)

350

STRING 'RB"abc"' (1, 30) (1, 37)

351

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

352

Operators

353

354

>>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

355

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

356

NAME 'def' (1, 0) (1, 3)

357

NAME 'd22' (1, 4) (1, 7)

358

OP '(' (1, 7) (1, 8)

359

NAME 'a' (1, 8) (1, 9)

360

OP ',' (1, 9) (1, 10)

361

NAME 'b' (1, 11) (1, 12)

362

OP ',' (1, 12) (1, 13)

363

NAME 'c' (1, 14) (1, 15)

364

OP '=' (1, 15) (1, 16)

365

NUMBER '2' (1, 16) (1, 17)

366

OP ',' (1, 17) (1, 18)

367

NAME 'd' (1, 19) (1, 20)

368

OP '=' (1, 20) (1, 21)

369

NUMBER '2' (1, 21) (1, 22)

370

OP ',' (1, 22) (1, 23)

371

OP '*' (1, 24) (1, 25)

372

NAME 'k' (1, 25) (1, 26)

373

OP ')' (1, 26) (1, 27)

374

OP ':' (1, 27) (1, 28)

375

NAME 'pass' (1, 29) (1, 33)

376

>>> dump_tokens("def d01v_(a=1, *k, **w): pass")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

377

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

378

NAME 'def' (1, 0) (1, 3)

379

NAME 'd01v_' (1, 4) (1, 9)

380

OP '(' (1, 9) (1, 10)

381

NAME 'a' (1, 10) (1, 11)

382

OP '=' (1, 11) (1, 12)

383

NUMBER '1' (1, 12) (1, 13)

384

OP ',' (1, 13) (1, 14)

385

OP '*' (1, 15) (1, 16)

386

NAME 'k' (1, 16) (1, 17)

387

OP ',' (1, 17) (1, 18)

388

OP '**' (1, 19) (1, 21)

389

NAME 'w' (1, 21) (1, 22)

390

OP ')' (1, 22) (1, 23)

391

OP ':' (1, 23) (1, 24)

392

NAME 'pass' (1, 25) (1, 29)

Comparison

>>> dump_tokens("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " +

397

... "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

398

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

399

NAME 'if' (1, 0) (1, 2)

400

NUMBER '1' (1, 3) (1, 4)

401

OP '<' (1, 5) (1, 6)

402

NUMBER '1' (1, 7) (1, 8)

403

OP '>' (1, 9) (1, 10)

404

NUMBER '1' (1, 11) (1, 12)

405

OP '==' (1, 13) (1, 15)

406

NUMBER '1' (1, 16) (1, 17)

407

OP '>=' (1, 18) (1, 20)

408

NUMBER '5' (1, 21) (1, 22)

409

OP '<=' (1, 23) (1, 25)

410

NUMBER '0x15' (1, 26) (1, 30)

411

OP '<=' (1, 31) (1, 33)

412

NUMBER '0x12' (1, 34) (1, 38)

413

OP '!=' (1, 39) (1, 41)

414

NUMBER '1' (1, 42) (1, 43)

415

NAME 'and' (1, 44) (1, 47)

416

NUMBER '5' (1, 48) (1, 49)

417

NAME 'in' (1, 50) (1, 52)

418

NUMBER '1' (1, 53) (1, 54)

419

NAME 'not' (1, 55) (1, 58)

420

NAME 'in' (1, 59) (1, 61)

421

NUMBER '1' (1, 62) (1, 63)

422

NAME 'is' (1, 64) (1, 66)

423

NUMBER '1' (1, 67) (1, 68)

424

NAME 'or' (1, 69) (1, 71)

425

NUMBER '5' (1, 72) (1, 73)

426

NAME 'is' (1, 74) (1, 76)

427

NAME 'not' (1, 77) (1, 80)

428

NUMBER '1' (1, 81) (1, 82)

429

OP ':' (1, 82) (1, 83)

430

NAME 'pass' (1, 84) (1, 88)

Shift

>>> dump_tokens("x = 1 << 1 >> 5")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

435

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

436

NAME 'x' (1, 0) (1, 1)

437

OP '=' (1, 2) (1, 3)

438

NUMBER '1' (1, 4) (1, 5)

439

OP '<<' (1, 6) (1, 8)

440

NUMBER '1' (1, 9) (1, 10)

441

OP '>>' (1, 11) (1, 13)

442

NUMBER '5' (1, 14) (1, 15)

Additive

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

446

>>> dump_tokens("x = 1 - y + 15 - 1 + 0x124 + z + a[5]")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

447

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

448

NAME 'x' (1, 0) (1, 1)

449

OP '=' (1, 2) (1, 3)

450

NUMBER '1' (1, 4) (1, 5)

451

OP '-' (1, 6) (1, 7)

452

NAME 'y' (1, 8) (1, 9)

453

OP '+' (1, 10) (1, 11)

454

NUMBER '15' (1, 12) (1, 14)

455

OP '-' (1, 15) (1, 16)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

456

NUMBER '1' (1, 17) (1, 18)

457

OP '+' (1, 19) (1, 20)

458

NUMBER '0x124' (1, 21) (1, 26)

459

OP '+' (1, 27) (1, 28)

460

NAME 'z' (1, 29) (1, 30)

461

OP '+' (1, 31) (1, 32)

462

NAME 'a' (1, 33) (1, 34)

463

OP '[' (1, 34) (1, 35)

464

NUMBER '5' (1, 35) (1, 36)

465

OP ']' (1, 36) (1, 37)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

Multiplicative

Benjamin Peterson

2014-04-09 23:55:56 -0400

[diff] [blame]

469

>>> dump_tokens("x = 1//1*1/5*12%0x12@42")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

470

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

471

NAME 'x' (1, 0) (1, 1)

472

OP '=' (1, 2) (1, 3)

473

NUMBER '1' (1, 4) (1, 5)

474

OP '//' (1, 5) (1, 7)

475

NUMBER '1' (1, 7) (1, 8)

476

OP '*' (1, 8) (1, 9)

477

NUMBER '1' (1, 9) (1, 10)

478

OP '/' (1, 10) (1, 11)

479

NUMBER '5' (1, 11) (1, 12)

480

OP '*' (1, 12) (1, 13)

481

NUMBER '12' (1, 13) (1, 15)

482

OP '%' (1, 15) (1, 16)

483

NUMBER '0x12' (1, 16) (1, 20)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

484

OP '@' (1, 20) (1, 21)

485

NUMBER '42' (1, 21) (1, 23)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

Unary

>>> dump_tokens("~1 ^ 1 & 1 |1 ^ -1")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

490

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

491

OP '~' (1, 0) (1, 1)

492

NUMBER '1' (1, 1) (1, 2)

493

OP '^' (1, 3) (1, 4)

494

NUMBER '1' (1, 5) (1, 6)

495

OP '&' (1, 7) (1, 8)

496

NUMBER '1' (1, 9) (1, 10)

497

OP '|' (1, 11) (1, 12)

498

NUMBER '1' (1, 12) (1, 13)

499

OP '^' (1, 14) (1, 15)

500

OP '-' (1, 16) (1, 17)

501

NUMBER '1' (1, 17) (1, 18)

502

>>> dump_tokens("-1*1/1+1*1//1 - ---1**1")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

503

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

504

OP '-' (1, 0) (1, 1)

505

NUMBER '1' (1, 1) (1, 2)

506

OP '*' (1, 2) (1, 3)

507

NUMBER '1' (1, 3) (1, 4)

508

OP '/' (1, 4) (1, 5)

509

NUMBER '1' (1, 5) (1, 6)

510

OP '+' (1, 6) (1, 7)

511

NUMBER '1' (1, 7) (1, 8)

512

OP '*' (1, 8) (1, 9)

513

NUMBER '1' (1, 9) (1, 10)

514

OP '//' (1, 10) (1, 12)

515

NUMBER '1' (1, 12) (1, 13)

516

OP '-' (1, 14) (1, 15)

517

OP '-' (1, 16) (1, 17)

518

OP '-' (1, 17) (1, 18)

519

OP '-' (1, 18) (1, 19)

520

NUMBER '1' (1, 19) (1, 20)

521

OP '**' (1, 20) (1, 22)

522

NUMBER '1' (1, 22) (1, 23)

Selector

>>> dump_tokens("import sys, time\\nx = sys.modules['time'].time()")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

527

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

528

NAME 'import' (1, 0) (1, 6)

529

NAME 'sys' (1, 7) (1, 10)

530

OP ',' (1, 10) (1, 11)

531

NAME 'time' (1, 12) (1, 16)

532

NEWLINE '\\n' (1, 16) (1, 17)

533

NAME 'x' (2, 0) (2, 1)

534

OP '=' (2, 2) (2, 3)

535

NAME 'sys' (2, 4) (2, 7)

536

OP '.' (2, 7) (2, 8)

537

NAME 'modules' (2, 8) (2, 15)

538

OP '[' (2, 15) (2, 16)

539

STRING "'time'" (2, 16) (2, 22)

540

OP ']' (2, 22) (2, 23)

541

OP '.' (2, 23) (2, 24)

542

NAME 'time' (2, 24) (2, 28)

543

OP '(' (2, 28) (2, 29)

544

OP ')' (2, 29) (2, 30)

Methods

>>> dump_tokens("@staticmethod\\ndef foo(x,y): pass")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

549

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

550

OP '@' (1, 0) (1, 1)

551

NAME 'staticmethod (1, 1) (1, 13)

552

NEWLINE '\\n' (1, 13) (1, 14)

553

NAME 'def' (2, 0) (2, 3)

554

NAME 'foo' (2, 4) (2, 7)

555

OP '(' (2, 7) (2, 8)

556

NAME 'x' (2, 8) (2, 9)

557

OP ',' (2, 9) (2, 10)

558

NAME 'y' (2, 10) (2, 11)

559

OP ')' (2, 11) (2, 12)

560

OP ':' (2, 12) (2, 13)

561

NAME 'pass' (2, 14) (2, 18)

562

563

Backslash means line continuation, except for comments

564

565

>>> roundtrip("x=1+\\\\n"

566

... "1\\n"

567

... "# This is a comment\\\\n"

568

... "# This also\\n")

569

True

570

>>> roundtrip("# Comment \\\\nx = 0")

571

True

Christian Heimes

2008-03-28 00:55:15 +0000

[diff] [blame]

572

573

Two string literals on the same line

574

575

>>> roundtrip("'' ''")

576

True

577

578

Test roundtrip on random python modules.

Antoine Pitrou

5bc4fa7

2010-10-14 15:34:31 +0000

[diff] [blame]

579

pass the '-ucpu' option to process the full directory.

Christian Heimes

2008-03-28 00:55:15 +0000

[diff] [blame]

580

581

>>> import random

582

>>> tempdir = os.path.dirname(f) or os.curdir

583

>>> testfiles = glob.glob(os.path.join(tempdir, "test*.py"))

584

Terry Jan Reedy

2014-02-23 18:00:31 -0500

[diff] [blame]

585

Tokenize is broken on test_pep3131.py because regular expressions are

586

broken on the obscure unicode identifiers in it. *sigh*

587

With roundtrip extended to test the 5-tuple mode of untokenize,

588

7 more testfiles fail. Remove them also until the failure is diagnosed.

589

Benjamin Peterson

963e402

2011-08-13 00:33:21 -0500

[diff] [blame]

590

>>> testfiles.remove(os.path.join(tempdir, "test_pep3131.py"))

Terry Jan Reedy

2014-02-23 18:00:31 -0500

[diff] [blame]

591

>>> for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):

592

... testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)

593

...

Antoine Pitrou

5bc4fa7

2010-10-14 15:34:31 +0000

[diff] [blame]

594

>>> if not support.is_resource_enabled("cpu"):

Christian Heimes

2008-03-28 00:55:15 +0000

[diff] [blame]

595

... testfiles = random.sample(testfiles, 10)

596

...

597

>>> for testfile in testfiles:

598

... if not roundtrip(open(testfile, 'rb')):

599

... print("Roundtrip failed for file %s" % testfile)

600

... break

601

... else: True

602

True

Benjamin Peterson

a0dfa82

2009-11-13 02:25:08 +0000

[diff] [blame]

603

604

Evil tabs

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

605

Benjamin Peterson

a0dfa82

2009-11-13 02:25:08 +0000

[diff] [blame]

606

>>> dump_tokens("def f():\\n\\tif x\\n \\tpass")

607

ENCODING 'utf-8' (0, 0) (0, 0)

608

NAME 'def' (1, 0) (1, 3)

609

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

614

INDENT '\\t' (2, 0) (2, 1)

615

NAME 'if' (2, 1) (2, 3)

616

NAME 'x' (2, 4) (2, 5)

617

NEWLINE '\\n' (2, 5) (2, 6)

618

INDENT ' \\t' (3, 0) (3, 9)

619

NAME 'pass' (3, 9) (3, 13)

620

DEDENT '' (4, 0) (4, 0)

621

DEDENT '' (4, 0) (4, 0)

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

622

623

Non-ascii identifiers

624

625

>>> dump_tokens("Örter = 'places'\\ngrün = 'green'")

626

ENCODING 'utf-8' (0, 0) (0, 0)

627

NAME 'Örter' (1, 0) (1, 5)

628

OP '=' (1, 6) (1, 7)

629

STRING "'places'" (1, 8) (1, 16)

630

NEWLINE '\\n' (1, 16) (1, 17)

631

NAME 'grün' (2, 0) (2, 4)

632

OP '=' (2, 5) (2, 6)

633

STRING "'green'" (2, 7) (2, 14)

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

634

635

Legacy unicode literals:

636

Christian Heimes

0b3847d

2012-06-20 11:17:58 +0200

[diff] [blame]

637

>>> dump_tokens("Örter = u'places'\\ngrün = U'green'")

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

638

ENCODING 'utf-8' (0, 0) (0, 0)

639

NAME 'Örter' (1, 0) (1, 5)

640

OP '=' (1, 6) (1, 7)

641

STRING "u'places'" (1, 8) (1, 17)

642

NEWLINE '\\n' (1, 17) (1, 18)

643

NAME 'grün' (2, 0) (2, 4)

644

OP '=' (2, 5) (2, 6)

Christian Heimes

0b3847d

2012-06-20 11:17:58 +0200

[diff] [blame]

645

STRING "U'green'" (2, 7) (2, 15)

Yury Selivanov

7544508

2015-05-11 22:57:16 -0400

[diff] [blame]

646

647

Async/await extension:

648

649

>>> dump_tokens("async = 1")

650

ENCODING 'utf-8' (0, 0) (0, 0)

651

NAME 'async' (1, 0) (1, 5)

652

OP '=' (1, 6) (1, 7)

653

NUMBER '1' (1, 8) (1, 9)

654

655

>>> dump_tokens("a = (async = 1)")

656

ENCODING 'utf-8' (0, 0) (0, 0)

657

NAME 'a' (1, 0) (1, 1)

658

OP '=' (1, 2) (1, 3)

659

OP '(' (1, 4) (1, 5)

660

NAME 'async' (1, 5) (1, 10)

661

OP '=' (1, 11) (1, 12)

662

NUMBER '1' (1, 13) (1, 14)

663

OP ')' (1, 14) (1, 15)

664

665

>>> dump_tokens("async()")

666

ENCODING 'utf-8' (0, 0) (0, 0)

667

NAME 'async' (1, 0) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

>>> dump_tokens("class async(Bar):pass")

672

ENCODING 'utf-8' (0, 0) (0, 0)

673

NAME 'class' (1, 0) (1, 5)

674

NAME 'async' (1, 6) (1, 11)

675

OP '(' (1, 11) (1, 12)

676

NAME 'Bar' (1, 12) (1, 15)

677

OP ')' (1, 15) (1, 16)

678

OP ':' (1, 16) (1, 17)

679

NAME 'pass' (1, 17) (1, 21)

680

681

>>> dump_tokens("class async:pass")

682

ENCODING 'utf-8' (0, 0) (0, 0)

683

NAME 'class' (1, 0) (1, 5)

684

NAME 'async' (1, 6) (1, 11)

685

OP ':' (1, 11) (1, 12)

686

NAME 'pass' (1, 12) (1, 16)

687

688

>>> dump_tokens("await = 1")

689

ENCODING 'utf-8' (0, 0) (0, 0)

690

NAME 'await' (1, 0) (1, 5)

691

OP '=' (1, 6) (1, 7)

692

NUMBER '1' (1, 8) (1, 9)

693

694

>>> dump_tokens("foo.async")

695

ENCODING 'utf-8' (0, 0) (0, 0)

696

NAME 'foo' (1, 0) (1, 3)

697

OP '.' (1, 3) (1, 4)

698

NAME 'async' (1, 4) (1, 9)

699

700

>>> dump_tokens("async for a in b: pass")

701

ENCODING 'utf-8' (0, 0) (0, 0)

702

NAME 'async' (1, 0) (1, 5)

703

NAME 'for' (1, 6) (1, 9)

704

NAME 'a' (1, 10) (1, 11)

705

NAME 'in' (1, 12) (1, 14)

706

NAME 'b' (1, 15) (1, 16)

707

OP ':' (1, 16) (1, 17)

708

NAME 'pass' (1, 18) (1, 22)

709

710

>>> dump_tokens("async with a as b: pass")

711

ENCODING 'utf-8' (0, 0) (0, 0)

712

NAME 'async' (1, 0) (1, 5)

713

NAME 'with' (1, 6) (1, 10)

714

NAME 'a' (1, 11) (1, 12)

715

NAME 'as' (1, 13) (1, 15)

716

NAME 'b' (1, 16) (1, 17)

717

OP ':' (1, 17) (1, 18)

718

NAME 'pass' (1, 19) (1, 23)

719

720

>>> dump_tokens("async.foo")

721

ENCODING 'utf-8' (0, 0) (0, 0)

722

NAME 'async' (1, 0) (1, 5)

723

OP '.' (1, 5) (1, 6)

724

NAME 'foo' (1, 6) (1, 9)

725

726

>>> dump_tokens("async")

727

ENCODING 'utf-8' (0, 0) (0, 0)

728

NAME 'async' (1, 0) (1, 5)

729

730

>>> dump_tokens("async\\n#comment\\nawait")

731

ENCODING 'utf-8' (0, 0) (0, 0)

732

NAME 'async' (1, 0) (1, 5)

733

NEWLINE '\\n' (1, 5) (1, 6)

734

COMMENT '#comment' (2, 0) (2, 8)

735

NL '\\n' (2, 8) (2, 9)

736

NAME 'await' (3, 0) (3, 5)

737

738

>>> dump_tokens("async\\n...\\nawait")

739

ENCODING 'utf-8' (0, 0) (0, 0)

740

NAME 'async' (1, 0) (1, 5)

741

NEWLINE '\\n' (1, 5) (1, 6)

742

OP '...' (2, 0) (2, 3)

743

NEWLINE '\\n' (2, 3) (2, 4)

744

NAME 'await' (3, 0) (3, 5)

745

746

>>> dump_tokens("async\\nawait")

747

ENCODING 'utf-8' (0, 0) (0, 0)

748

NAME 'async' (1, 0) (1, 5)

749

NEWLINE '\\n' (1, 5) (1, 6)

750

NAME 'await' (2, 0) (2, 5)

751

752

>>> dump_tokens("foo.async + 1")

753

ENCODING 'utf-8' (0, 0) (0, 0)

754

NAME 'foo' (1, 0) (1, 3)

755

OP '.' (1, 3) (1, 4)

756

NAME 'async' (1, 4) (1, 9)

757

OP '+' (1, 10) (1, 11)

758

NUMBER '1' (1, 12) (1, 13)

759

760

>>> dump_tokens("async def foo(): pass")

761

ENCODING 'utf-8' (0, 0) (0, 0)

762

ASYNC 'async' (1, 0) (1, 5)

763

NAME 'def' (1, 6) (1, 9)

764

NAME 'foo' (1, 10) (1, 13)

765

OP '(' (1, 13) (1, 14)

766

OP ')' (1, 14) (1, 15)

767

OP ':' (1, 15) (1, 16)

768

NAME 'pass' (1, 17) (1, 21)

769

770

>>> dump_tokens('''async def foo():

... def foo(await):

... await = 1

... if 1:

... await

... async += 1

... ''')

ENCODING 'utf-8' (0, 0) (0, 0)

778

ASYNC 'async' (1, 0) (1, 5)

779

NAME 'def' (1, 6) (1, 9)

780

NAME 'foo' (1, 10) (1, 13)

781

OP '(' (1, 13) (1, 14)

782

OP ')' (1, 14) (1, 15)

783

OP ':' (1, 15) (1, 16)

784

NEWLINE '\\n' (1, 16) (1, 17)

785

INDENT ' ' (2, 0) (2, 2)

786

NAME 'def' (2, 2) (2, 5)

787

NAME 'foo' (2, 6) (2, 9)

788

OP '(' (2, 9) (2, 10)

Yury Selivanov

8fb307c

2015-07-22 13:33:45 +0300

[diff] [blame]

789

AWAIT 'await' (2, 10) (2, 15)

Yury Selivanov

7544508

2015-05-11 22:57:16 -0400

[diff] [blame]

790

OP ')' (2, 15) (2, 16)

791

OP ':' (2, 16) (2, 17)

792

NEWLINE '\\n' (2, 17) (2, 18)

793

INDENT ' ' (3, 0) (3, 4)

Yury Selivanov

8fb307c

2015-07-22 13:33:45 +0300

[diff] [blame]

794

AWAIT 'await' (3, 4) (3, 9)

Yury Selivanov

7544508

2015-05-11 22:57:16 -0400

[diff] [blame]

795

OP '=' (3, 10) (3, 11)

796

NUMBER '1' (3, 12) (3, 13)

797

NEWLINE '\\n' (3, 13) (3, 14)

798

DEDENT '' (4, 2) (4, 2)

799

NAME 'if' (4, 2) (4, 4)

800

NUMBER '1' (4, 5) (4, 6)

801

OP ':' (4, 6) (4, 7)

802

NEWLINE '\\n' (4, 7) (4, 8)

803

INDENT ' ' (5, 0) (5, 4)

804

AWAIT 'await' (5, 4) (5, 9)

805

NEWLINE '\\n' (5, 9) (5, 10)

806

DEDENT '' (6, 0) (6, 0)

807

DEDENT '' (6, 0) (6, 0)

808

NAME 'async' (6, 0) (6, 5)

809

OP '+=' (6, 6) (6, 8)

810

NUMBER '1' (6, 9) (6, 10)

811

NEWLINE '\\n' (6, 10) (6, 11)

812

813

>>> dump_tokens('''async def foo():

814

... async for i in 1: pass''')

815

ENCODING 'utf-8' (0, 0) (0, 0)

816

ASYNC 'async' (1, 0) (1, 5)

817

NAME 'def' (1, 6) (1, 9)

818

NAME 'foo' (1, 10) (1, 13)

819

OP '(' (1, 13) (1, 14)

820

OP ')' (1, 14) (1, 15)

821

OP ':' (1, 15) (1, 16)

822

NEWLINE '\\n' (1, 16) (1, 17)

823

INDENT ' ' (2, 0) (2, 2)

824

ASYNC 'async' (2, 2) (2, 7)

825

NAME 'for' (2, 8) (2, 11)

826

NAME 'i' (2, 12) (2, 13)

827

NAME 'in' (2, 14) (2, 16)

828

NUMBER '1' (2, 17) (2, 18)

829

OP ':' (2, 18) (2, 19)

830

NAME 'pass' (2, 20) (2, 24)

831

DEDENT '' (3, 0) (3, 0)

Yury Selivanov

8fb307c

2015-07-22 13:33:45 +0300

[diff] [blame]

832

833

>>> dump_tokens('''async def foo(async): await''')

834

ENCODING 'utf-8' (0, 0) (0, 0)

835

ASYNC 'async' (1, 0) (1, 5)

836

NAME 'def' (1, 6) (1, 9)

837

NAME 'foo' (1, 10) (1, 13)

838

OP '(' (1, 13) (1, 14)

839

ASYNC 'async' (1, 14) (1, 19)

840

OP ')' (1, 19) (1, 20)

841

OP ':' (1, 20) (1, 21)

842

AWAIT 'await' (1, 22) (1, 27)

Yury Selivanov

96ec934

2015-07-23 15:01:58 +0300

[diff] [blame]

843

844

>>> dump_tokens('''def f():

845

...

846

... def baz(): pass

847

... async def bar(): pass

848

...

849

... await = 2''')

850

ENCODING 'utf-8' (0, 0) (0, 0)

851

NAME 'def' (1, 0) (1, 3)

852

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

857

NL '\\n' (2, 0) (2, 1)

858

INDENT ' ' (3, 0) (3, 2)

859

NAME 'def' (3, 2) (3, 5)

860

NAME 'baz' (3, 6) (3, 9)

861

OP '(' (3, 9) (3, 10)

862

OP ')' (3, 10) (3, 11)

863

OP ':' (3, 11) (3, 12)

864

NAME 'pass' (3, 13) (3, 17)

865

NEWLINE '\\n' (3, 17) (3, 18)

866

ASYNC 'async' (4, 2) (4, 7)

867

NAME 'def' (4, 8) (4, 11)

868

NAME 'bar' (4, 12) (4, 15)

869

OP '(' (4, 15) (4, 16)

870

OP ')' (4, 16) (4, 17)

871

OP ':' (4, 17) (4, 18)

872

NAME 'pass' (4, 19) (4, 23)

873

NEWLINE '\\n' (4, 23) (4, 24)

874

NL '\\n' (5, 0) (5, 1)

875

NAME 'await' (6, 2) (6, 7)

876

OP '=' (6, 8) (6, 9)

877

NUMBER '2' (6, 10) (6, 11)

878

DEDENT '' (7, 0) (7, 0)

879

880

>>> dump_tokens('''async def f():

881

...

882

... def baz(): pass

883

... async def bar(): pass

884

...

885

... await = 2''')

886

ENCODING 'utf-8' (0, 0) (0, 0)

887

ASYNC 'async' (1, 0) (1, 5)

888

NAME 'def' (1, 6) (1, 9)

889

NAME 'f' (1, 10) (1, 11)

890

OP '(' (1, 11) (1, 12)

891

OP ')' (1, 12) (1, 13)

892

OP ':' (1, 13) (1, 14)

893

NEWLINE '\\n' (1, 14) (1, 15)

894

NL '\\n' (2, 0) (2, 1)

895

INDENT ' ' (3, 0) (3, 2)

896

NAME 'def' (3, 2) (3, 5)

897

NAME 'baz' (3, 6) (3, 9)

898

OP '(' (3, 9) (3, 10)

899

OP ')' (3, 10) (3, 11)

900

OP ':' (3, 11) (3, 12)

901

NAME 'pass' (3, 13) (3, 17)

902

NEWLINE '\\n' (3, 17) (3, 18)

903

ASYNC 'async' (4, 2) (4, 7)

904

NAME 'def' (4, 8) (4, 11)

905

NAME 'bar' (4, 12) (4, 15)

906

OP '(' (4, 15) (4, 16)

907

OP ')' (4, 16) (4, 17)

908

OP ':' (4, 17) (4, 18)

909

NAME 'pass' (4, 19) (4, 23)

910

NEWLINE '\\n' (4, 23) (4, 24)

911

NL '\\n' (5, 0) (5, 1)

912

AWAIT 'await' (6, 2) (6, 7)

913

OP '=' (6, 8) (6, 9)

914

NUMBER '2' (6, 10) (6, 11)

915

DEDENT '' (7, 0) (7, 0)

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

916

"""

917

Benjamin Peterson

ee8712c

2008-05-20 21:35:26 +0000

[diff] [blame]

918

from test import support

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

919

from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

920

STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

921

open as tokenize_open, Untokenizer)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

922

from io import BytesIO

Victor Stinner

387729e

2015-05-26 00:43:58 +0200

[diff] [blame]

923

from unittest import TestCase, mock

Jason R. Coombs

7cf3638

2015-06-20 19:13:50 -0400

[diff] [blame]

924

import os

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

925

import token

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

926

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

927

def dump_tokens(s):

928

"""Print out the tokens in s in a table format.

929

930

The ENDMARKER is omitted.

931

"""

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

932

f = BytesIO(s.encode('utf-8'))

933

for type, token, start, end, line in tokenize(f.readline):

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

934

if type == ENDMARKER:

935

break

936

type = tok_name[type]

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

937

print("%(type)-10.10s %(token)-13.13r %(start)s %(end)s" % locals())

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

938

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

939

def roundtrip(f):

940

"""

941

Test roundtrip for `untokenize`. `f` is an open file or a string.

Terry Jan Reedy

2014-02-23 18:00:31 -0500

[diff] [blame]

942

The source code in f is tokenized to both 5- and 2-tuples.

943

Both sequences are converted back to source code via

944

tokenize.untokenize(), and the latter tokenized again to 2-tuples.

945

The test fails if the 3 pair tokenizations do not match.

946

947

When untokenize bugs are fixed, untokenize with 5-tuples should

948

reproduce code that does not contain a backslash continuation

949

following spaces. A proper test should test this.

950

951

This function would be more useful for correcting bugs if it reported

952

the first point of failure, like assertEqual, rather than just

953

returning False -- or if it were only used in unittests and not

954

doctest and actually used assertEqual.

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

955

"""

Terry Jan Reedy

2014-02-23 18:00:31 -0500

[diff] [blame]

956

# Get source code and original tokenizations

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

957

if isinstance(f, str):

Terry Jan Reedy

2014-02-23 18:00:31 -0500

[diff] [blame]

958

code = f.encode('utf-8')

959

else:

960

code = f.read()

Brian Curtin

9f5f65c

2010-10-30 21:35:28 +0000

[diff] [blame]

961

f.close()

Terry Jan Reedy

2014-02-23 18:00:31 -0500

[diff] [blame]

962

readline = iter(code.splitlines(keepends=True)).__next__

963

tokens5 = list(tokenize(readline))

964

tokens2 = [tok[:2] for tok in tokens5]

965

# Reproduce tokens2 from pairs

966

bytes_from2 = untokenize(tokens2)

967

readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__

968

tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]

969

# Reproduce tokens2 from 5-tuples

970

bytes_from5 = untokenize(tokens5)

971

readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__

972

tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]

973

# Compare 3 versions

974

return tokens2 == tokens2_from2 == tokens2_from5

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

975

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

976

# This is an example from the docs, set up as a doctest.

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

977

def decistmt(s):

978

"""Substitute Decimals for floats in a string of statements.

979

980

>>> from decimal import Decimal

Georg Brandl

88fc664

2007-02-09 21:28:07 +0000

[diff] [blame]

981

>>> s = 'print(+21.3e-5*-.1234/81.7)'

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

982

>>> decistmt(s)

Georg Brandl

88fc664

2007-02-09 21:28:07 +0000

[diff] [blame]

983

"print (+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7'))"

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

984

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

985

The format of the exponent is inherited from the platform C library.

986

Known cases are "e-007" (Windows) and "e-07" (not Windows). Since

Mark Dickinson

388122d

2010-08-04 20:56:28 +0000

[diff] [blame]

987

we're only showing 11 digits, and the 12th isn't close to 5, the

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

988

rest of the output should be platform-independent.

989

990

>>> exec(s) #doctest: +ELLIPSIS

Mark Dickinson

388122d

2010-08-04 20:56:28 +0000

[diff] [blame]

991

-3.2171603427...e-0...7

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

992

993

Output from calculations with Decimal should be identical across all

994

platforms.

995

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

996

>>> exec(decistmt(s))

997

-3.217160342717258261933904529E-7

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

998

"""

999

result = []

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1000

g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

1001

for toknum, tokval, _, _, _ in g:

1002

if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens

result.extend([

(NAME, 'Decimal'),

(OP, '('),

(STRING, repr(tokval)),

(OP, ')')

])

else:

result.append((toknum, tokval))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1011

return untokenize(result).decode('utf-8')

1012

1013

1014

class TestTokenizerAdheresToPep0263(TestCase):

1015

"""

1016

Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.

1017

"""

1018

1019

def _testFile(self, filename):

1020

path = os.path.join(os.path.dirname(__file__), filename)

1021

return roundtrip(open(path, 'rb'))

1022

1023

def test_utf8_coding_cookie_and_no_utf8_bom(self):

Ned Deily

2ea6fcc

2011-07-19 16:15:27 -0700

[diff] [blame]

1024

f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1025

self.assertTrue(self._testFile(f))

1026

1027

def test_latin1_coding_cookie_and_utf8_bom(self):

1028

"""

1029

As per PEP 0263, if a file starts with a utf-8 BOM signature, the only

1030

allowed encoding for the comment is 'utf-8'. The text file used in

1031

this test starts with a BOM signature, but specifies latin1 as the

1032

coding, so verify that a SyntaxError is raised, which matches the

1033

behaviour of the interpreter when it encounters a similar condition.

1034

"""

1035

f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'

Benjamin Peterson

c9c0f20

2009-06-30 23:06:06 +0000

[diff] [blame]

1036

self.assertRaises(SyntaxError, self._testFile, f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1037

1038

def test_no_coding_cookie_and_utf8_bom(self):

1039

f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'

1040

self.assertTrue(self._testFile(f))

1041

1042

def test_utf8_coding_cookie_and_utf8_bom(self):

1043

f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'

1044

self.assertTrue(self._testFile(f))

1045

Florent Xicluna

11f0b41

2012-07-07 12:13:35 +0200

[diff] [blame]

1046

def test_bad_coding_cookie(self):

1047

self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')

1048

self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')

1049

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1050

1051

class Test_Tokenize(TestCase):

1052

1053

def test__tokenize_decodes_with_specified_encoding(self):

1054

literal = '"ЉЊЈЁЂ"'

1055

line = literal.encode('utf-8')

first = False

def readline():

nonlocal first

if not first:

first = True

return line

else:

return b''

# skip the initial encoding token and the end token

1066

tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]

1067

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1068

self.assertEqual(tokens, expected_tokens,

1069

"bytes not decoded with encoding")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1070

1071

def test__tokenize_does_not_decode_with_encoding_none(self):

literal = '"ЉЊЈЁЂ"'

first = False

def readline():

nonlocal first

if not first:

first = True

return literal

else:

return b''

# skip the end token

tokens = list(_tokenize(readline, encoding=None))[:-1]

1084

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1085

self.assertEqual(tokens, expected_tokens,

1086

"string not tokenized when encoding is None")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1087

1088

1089

class TestDetectEncoding(TestCase):

1090

1091

def get_readline(self, lines):

index = 0

def readline():

nonlocal index

if index == len(lines):

raise StopIteration

line = lines[index]

index += 1

return line

return readline

def test_no_bom_no_encoding_cookie(self):

1103

lines = (

1104

b'# something\n',

1105

b'print(something)\n',

1106

b'do_something(else)\n'

1107

)

1108

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1109

self.assertEqual(encoding, 'utf-8')

1110

self.assertEqual(consumed_lines, list(lines[:2]))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1111

1112

def test_bom_no_cookie(self):

1113

lines = (

1114

b'\xef\xbb\xbf# something\n',

1115

b'print(something)\n',

1116

b'do_something(else)\n'

1117

)

1118

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1119

self.assertEqual(encoding, 'utf-8-sig')

1120

self.assertEqual(consumed_lines,

1121

[b'# something\n', b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1122

1123

def test_cookie_first_line_no_bom(self):

1124

lines = (

1125

b'# -*- coding: latin-1 -*-\n',

1126

b'print(something)\n',

1127

b'do_something(else)\n'

1128

)

1129

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1130

self.assertEqual(encoding, 'iso-8859-1')

1131

self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1132

1133

def test_matched_bom_and_cookie_first_line(self):

1134

lines = (

1135

b'\xef\xbb\xbf# coding=utf-8\n',

1136

b'print(something)\n',

1137

b'do_something(else)\n'

1138

)

1139

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1140

self.assertEqual(encoding, 'utf-8-sig')

1141

self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1142

1143

def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):

1144

lines = (

1145

b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',

1146

b'print(something)\n',

1147

b'do_something(else)\n'

1148

)

1149

readline = self.get_readline(lines)

1150

self.assertRaises(SyntaxError, detect_encoding, readline)

1151

1152

def test_cookie_second_line_no_bom(self):

1153

lines = (

1154

b'#! something\n',

1155

b'# vim: set fileencoding=ascii :\n',

1156

b'print(something)\n',

1157

b'do_something(else)\n'

1158

)

1159

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1160

self.assertEqual(encoding, 'ascii')

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1161

expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1162

self.assertEqual(consumed_lines, expected)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1163

1164

def test_matched_bom_and_cookie_second_line(self):

1165

lines = (

1166

b'\xef\xbb\xbf#! something\n',

1167

b'f# coding=utf-8\n',

1168

b'print(something)\n',

1169

b'do_something(else)\n'

1170

)

1171

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1172

self.assertEqual(encoding, 'utf-8-sig')

1173

self.assertEqual(consumed_lines,

1174

[b'#! something\n', b'f# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1175

1176

def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):

1177

lines = (

1178

b'\xef\xbb\xbf#! something\n',

1179

b'# vim: set fileencoding=ascii :\n',

1180

b'print(something)\n',

1181

b'do_something(else)\n'

1182

)

1183

readline = self.get_readline(lines)

1184

self.assertRaises(SyntaxError, detect_encoding, readline)

1185

Serhiy Storchaka

768c16c

2014-01-09 18:36:09 +0200

[diff] [blame]

1186

def test_cookie_second_line_noncommented_first_line(self):

1187

lines = (

1188

b"print('\xc2\xa3')\n",

1189

b'# vim: set fileencoding=iso8859-15 :\n',

1190

b"print('\xe2\x82\xac')\n"

1191

)

1192

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1193

self.assertEqual(encoding, 'utf-8')

1194

expected = [b"print('\xc2\xa3')\n"]

1195

self.assertEqual(consumed_lines, expected)

1196

1197

def test_cookie_second_line_commented_first_line(self):

1198

lines = (

1199

b"#print('\xc2\xa3')\n",

1200

b'# vim: set fileencoding=iso8859-15 :\n',

1201

b"print('\xe2\x82\xac')\n"

1202

)

1203

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1204

self.assertEqual(encoding, 'iso8859-15')

1205

expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']

1206

self.assertEqual(consumed_lines, expected)

1207

1208

def test_cookie_second_line_empty_first_line(self):

1209

lines = (

1210

b'\n',

1211

b'# vim: set fileencoding=iso8859-15 :\n',

1212

b"print('\xe2\x82\xac')\n"

1213

)

1214

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1215

self.assertEqual(encoding, 'iso8859-15')

1216

expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']

1217

self.assertEqual(consumed_lines, expected)

1218

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1219

def test_latin1_normalization(self):

1220

# See get_normal_name() in tokenizer.c.

1221

encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",

1222

"iso-8859-1-unix", "iso-latin-1-mac")

1223

for encoding in encodings:

1224

for rep in ("-", "_"):

1225

enc = encoding.replace("-", rep)

1226

lines = (b"#!/usr/bin/python\n",

1227

b"# coding: " + enc.encode("ascii") + b"\n",

1228

b"print(things)\n",

1229

b"do_something += 4\n")

1230

rl = self.get_readline(lines)

1231

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1232

self.assertEqual(found, "iso-8859-1")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1233

Martin v. Löwis

63674f4

2012-04-20 14:36:47 +0200

[diff] [blame]

1234

def test_syntaxerror_latin1(self):

1235

# Issue 14629: need to raise SyntaxError if the first

1236

# line(s) have non-UTF-8 characters

1237

lines = (

1238

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1239

)

1240

readline = self.get_readline(lines)

1241

self.assertRaises(SyntaxError, detect_encoding, readline)

1242

1243

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1244

def test_utf8_normalization(self):

1245

# See get_normal_name() in tokenizer.c.

1246

encodings = ("utf-8", "utf-8-mac", "utf-8-unix")

1247

for encoding in encodings:

1248

for rep in ("-", "_"):

1249

enc = encoding.replace("-", rep)

1250

lines = (b"#!/usr/bin/python\n",

1251

b"# coding: " + enc.encode("ascii") + b"\n",

1252

b"1 + 3\n")

1253

rl = self.get_readline(lines)

1254

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1255

self.assertEqual(found, "utf-8")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1256

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1257

def test_short_files(self):

1258

readline = self.get_readline((b'print(something)\n',))

1259

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1260

self.assertEqual(encoding, 'utf-8')

1261

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1262

1263

encoding, consumed_lines = detect_encoding(self.get_readline(()))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1264

self.assertEqual(encoding, 'utf-8')

1265

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1266

1267

readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))

1268

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1269

self.assertEqual(encoding, 'utf-8-sig')

1270

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1271

1272

readline = self.get_readline((b'\xef\xbb\xbf',))

1273

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1274

self.assertEqual(encoding, 'utf-8-sig')

1275

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1276

Benjamin Peterson

433f32c

2008-12-12 01:25:05 +0000

[diff] [blame]

1277

readline = self.get_readline((b'# coding: bad\n',))

1278

self.assertRaises(SyntaxError, detect_encoding, readline)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1279

Serhiy Storchaka

dafea85

2013-09-16 23:51:56 +0300

[diff] [blame]

1280

def test_false_encoding(self):

1281

# Issue 18873: "Encoding" detected in non-comment lines

1282

readline = self.get_readline((b'print("#coding=fake")',))

1283

encoding, consumed_lines = detect_encoding(readline)

1284

self.assertEqual(encoding, 'utf-8')

1285

self.assertEqual(consumed_lines, [b'print("#coding=fake")'])

1286

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1287

def test_open(self):

1288

filename = support.TESTFN + '.py'

1289

self.addCleanup(support.unlink, filename)

1290

1291

# test coding cookie

1292

for encoding in ('iso-8859-15', 'utf-8'):

1293

with open(filename, 'w', encoding=encoding) as fp:

1294

print("# coding: %s" % encoding, file=fp)

1295

print("print('euro:\u20ac')", file=fp)

1296

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1297

self.assertEqual(fp.encoding, encoding)

1298

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1299

1300

# test BOM (no coding cookie)

1301

with open(filename, 'w', encoding='utf-8-sig') as fp:

1302

print("print('euro:\u20ac')", file=fp)

1303

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1304

self.assertEqual(fp.encoding, 'utf-8-sig')

1305

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1306

Brett Cannon

c33f3f2

2012-04-20 13:23:54 -0400

[diff] [blame]

1307

def test_filename_in_exception(self):

1308

# When possible, include the file name in the exception.

1309

path = 'some_file_path'

1310

lines = (

1311

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1312

)

1313

class Bunk:

1314

def __init__(self, lines, path):

self.name = path

self._lines = lines

self._index = 0

def readline(self):

if self._index == len(lines):

1321

raise StopIteration

1322

line = lines[self._index]

self._index += 1

return line

with self.assertRaises(SyntaxError):

1327

ins = Bunk(lines, path)

1328

# Make sure lacking a name isn't an issue.

1329

del ins.name

1330

detect_encoding(ins.readline)

1331

with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):

1332

ins = Bunk(lines, path)

1333

detect_encoding(ins.readline)

1334

Victor Stinner

387729e

2015-05-26 00:43:58 +0200

[diff] [blame]

1335

def test_open_error(self):

1336

# Issue #23840: open() must close the binary file on error

1337

m = BytesIO(b'#coding:xxx')

1338

with mock.patch('tokenize._builtin_open', return_value=m):

1339

self.assertRaises(SyntaxError, tokenize_open, 'foobar')

1340

self.assertTrue(m.closed)

1341

1342

Brett Cannon

c33f3f2

2012-04-20 13:23:54 -0400

[diff] [blame]

1343

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1344

class TestTokenize(TestCase):

1345

1346

def test_tokenize(self):

1347

import tokenize as tokenize_module

1348

encoding = object()

1349

encoding_used = None

1350

def mock_detect_encoding(readline):

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1351

return encoding, [b'first', b'second']

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1352

1353

def mock__tokenize(readline, encoding):

1354

nonlocal encoding_used

1355

encoding_used = encoding

1356

out = []

1357

while True:

1358

next_line = readline()

1359

if next_line:

1360

out.append(next_line)

continue

return out

counter = 0

def mock_readline():

nonlocal counter

counter += 1

if counter == 5:

return b''

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1370

return str(counter).encode()

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1371

1372

orig_detect_encoding = tokenize_module.detect_encoding

1373

orig__tokenize = tokenize_module._tokenize

1374

tokenize_module.detect_encoding = mock_detect_encoding

1375

tokenize_module._tokenize = mock__tokenize

1376

try:

1377

results = tokenize(mock_readline)

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1378

self.assertEqual(list(results),

1379

[b'first', b'second', b'1', b'2', b'3', b'4'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1380

finally:

1381

tokenize_module.detect_encoding = orig_detect_encoding

1382

tokenize_module._tokenize = orig__tokenize

1383

1384

self.assertTrue(encoding_used, encoding)

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

1385

Yury Selivanov

8085b80

2015-05-18 12:50:52 -0400

[diff] [blame]

1386

def test_oneline_defs(self):

1387

buf = []

1388

for i in range(500):

1389

buf.append('def i{i}(): return {i}'.format(i=i))

buf.append('OK')

buf = '\n'.join(buf)

# Test that 500 consequent, one-line defs is OK

1394

toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))

1395

self.assertEqual(toks[-2].string, 'OK') # [-1] is always ENDMARKER

1396

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1397

def assertExactTypeEqual(self, opstr, *optypes):

1398

tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))

1399

num_optypes = len(optypes)

1400

self.assertEqual(len(tokens), 2 + num_optypes)

1401

self.assertEqual(token.tok_name[tokens[0].exact_type],

1402

token.tok_name[ENCODING])

1403

for i in range(num_optypes):

1404

self.assertEqual(token.tok_name[tokens[i + 1].exact_type],

1405

token.tok_name[optypes[i]])

1406

self.assertEqual(token.tok_name[tokens[1 + num_optypes].exact_type],

1407

token.tok_name[token.ENDMARKER])

1408

1409

def test_exact_type(self):

1410

self.assertExactTypeEqual('()', token.LPAR, token.RPAR)

1411

self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)

1412

self.assertExactTypeEqual(':', token.COLON)

1413

self.assertExactTypeEqual(',', token.COMMA)

1414

self.assertExactTypeEqual(';', token.SEMI)

1415

self.assertExactTypeEqual('+', token.PLUS)

1416

self.assertExactTypeEqual('-', token.MINUS)

1417

self.assertExactTypeEqual('*', token.STAR)

1418

self.assertExactTypeEqual('/', token.SLASH)

1419

self.assertExactTypeEqual('|', token.VBAR)

1420

self.assertExactTypeEqual('&', token.AMPER)

1421

self.assertExactTypeEqual('<', token.LESS)

1422

self.assertExactTypeEqual('>', token.GREATER)

1423

self.assertExactTypeEqual('=', token.EQUAL)

1424

self.assertExactTypeEqual('.', token.DOT)

1425

self.assertExactTypeEqual('%', token.PERCENT)

1426

self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)

1427

self.assertExactTypeEqual('==', token.EQEQUAL)

1428

self.assertExactTypeEqual('!=', token.NOTEQUAL)

1429

self.assertExactTypeEqual('<=', token.LESSEQUAL)

1430

self.assertExactTypeEqual('>=', token.GREATEREQUAL)

1431

self.assertExactTypeEqual('~', token.TILDE)

1432

self.assertExactTypeEqual('^', token.CIRCUMFLEX)

1433

self.assertExactTypeEqual('<<', token.LEFTSHIFT)

1434

self.assertExactTypeEqual('>>', token.RIGHTSHIFT)

1435

self.assertExactTypeEqual('**', token.DOUBLESTAR)

1436

self.assertExactTypeEqual('+=', token.PLUSEQUAL)

1437

self.assertExactTypeEqual('-=', token.MINEQUAL)

1438

self.assertExactTypeEqual('*=', token.STAREQUAL)

1439

self.assertExactTypeEqual('/=', token.SLASHEQUAL)

1440

self.assertExactTypeEqual('%=', token.PERCENTEQUAL)

1441

self.assertExactTypeEqual('&=', token.AMPEREQUAL)

1442

self.assertExactTypeEqual('|=', token.VBAREQUAL)

1443

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1444

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1445

self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)

1446

self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)

1447

self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)

1448

self.assertExactTypeEqual('//', token.DOUBLESLASH)

1449

self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)

1450

self.assertExactTypeEqual('@', token.AT)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

1451

self.assertExactTypeEqual('@=', token.ATEQUAL)

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1452

1453

self.assertExactTypeEqual('a**2+b**2==c**2',

1454

NAME, token.DOUBLESTAR, NUMBER,

1455

token.PLUS,

1456

NAME, token.DOUBLESTAR, NUMBER,

1457

token.EQEQUAL,

1458

NAME, token.DOUBLESTAR, NUMBER)

1459

self.assertExactTypeEqual('{1, 2, 3}',

1460

token.LBRACE,

1461

token.NUMBER, token.COMMA,

1462

token.NUMBER, token.COMMA,

1463

token.NUMBER,

1464

token.RBRACE)

1465

self.assertExactTypeEqual('^(x & 0x1)',

1466

token.CIRCUMFLEX,

1467

token.LPAR,

1468

token.NAME, token.AMPER, token.NUMBER,

1469

token.RPAR)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1470

Ezio Melotti

fafa8b7

2012-11-03 17:46:51 +0200

[diff] [blame]

1471

def test_pathological_trailing_whitespace(self):

1472

# See http://bugs.python.org/issue16152

1473

self.assertExactTypeEqual('@ ', token.AT)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1474

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1475

class UntokenizeTest(TestCase):

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1476

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1477

def test_bad_input_order(self):

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1478

# raise if previous row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

u = Untokenizer()

u.prev_row = 2

u.prev_col = 2

with self.assertRaises(ValueError) as cm:

1483

u.add_whitespace((1,3))

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1484

self.assertEqual(cm.exception.args[0],

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1485

'start (1,3) precedes previous end (2,2)')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1486

# raise if previous column in row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1487

self.assertRaises(ValueError, u.add_whitespace, (2,1))

1488

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1489

def test_backslash_continuation(self):

1490

# The problem is that <whitespace>\<newline> leaves no token

u = Untokenizer()

u.prev_row = 1

u.prev_col = 1

u.tokens = []

u.add_whitespace((2, 0))

1496

self.assertEqual(u.tokens, ['\\\n'])

1497

u.prev_row = 2

1498

u.add_whitespace((4, 4))

1499

self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])

1500

self.assertTrue(roundtrip('a\n b\n c\n \\\n c\n'))

1501

Terry Jan Reedy

5b8d2c3

2014-02-17 23:12:16 -0500

[diff] [blame]

1502

def test_iter_compat(self):

1503

u = Untokenizer()

1504

token = (NAME, 'Hello')

1505

tokens = [(ENCODING, 'utf-8'), token]

1506

u.compat(token, iter([]))

1507

self.assertEqual(u.tokens, ["Hello "])

1508

u = Untokenizer()

1509

self.assertEqual(u.untokenize(iter([token])), 'Hello ')

1510

u = Untokenizer()

1511

self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')

1512

self.assertEqual(u.encoding, 'utf-8')

1513

self.assertEqual(untokenize(iter(tokens)), b'Hello ')

1514

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1515

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1516

class TestRoundtrip(TestCase):

1517

def roundtrip(self, code):

1518

if isinstance(code, str):

1519

code = code.encode('utf-8')

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1520

return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1521

1522

def test_indentation_semantics_retained(self):

1523

"""

1524

Ensure that although whitespace might be mutated in a roundtrip,

1525

the semantic meaning of the indentation remains consistent.

1526

"""

1527

code = "if False:\n\tx=3\n\tx=3\n"

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1528

codelines = self.roundtrip(code).split('\n')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1529

self.assertEqual(codelines[1], codelines[2])

1530

1531

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1532

__test__ = {"doctests" : doctests, 'decistmt': decistmt}

1533

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

1534

def test_main():

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1535

from test import test_tokenize

Benjamin Peterson

ee8712c

2008-05-20 21:35:26 +0000

[diff] [blame]

1536

support.run_doctest(test_tokenize, True)

1537

support.run_unittest(TestTokenizerAdheresToPep0263)

1538

support.run_unittest(Test_Tokenize)

1539

support.run_unittest(TestDetectEncoding)

1540

support.run_unittest(TestTokenize)

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1541

support.run_unittest(UntokenizeTest)

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1542

support.run_unittest(TestRoundtrip)

Neal Norwitz

c150536

2006-12-28 06:47:50 +0000

[diff] [blame]

1543

Thomas Wouters