Blame - Lib/test/test_tokenize.py - platform/external/python/cpython3

2008-03-16 00:07:10 +0000

[diff] [blame]

1

doctests = """

2

Tests for the tokenize module.

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

3

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

4

The tests can be really simple. Given a small fragment of source

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

5

code, print out a table with tokens. The ENDMARKER is omitted for

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

6

brevity.

7

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

8

>>> dump_tokens("1 + 1")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

9

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

10

NUMBER '1' (1, 0) (1, 1)

11

OP '+' (1, 2) (1, 3)

12

NUMBER '1' (1, 4) (1, 5)

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

13

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

14

>>> dump_tokens("if False:\\n"

15

... " # NL\\n"

16

... " True = False # NEWLINE\\n")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

17

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

18

NAME 'if' (1, 0) (1, 2)

19

NAME 'False' (1, 3) (1, 8)

20

OP ':' (1, 8) (1, 9)

21

NEWLINE '\\n' (1, 9) (1, 10)

22

COMMENT '# NL' (2, 4) (2, 8)

23

NL '\\n' (2, 8) (2, 9)

24

INDENT ' ' (3, 0) (3, 4)

25

NAME 'True' (3, 4) (3, 8)

26

OP '=' (3, 9) (3, 10)

27

NAME 'False' (3, 11) (3, 16)

28

COMMENT '# NEWLINE' (3, 17) (3, 26)

29

NEWLINE '\\n' (3, 26) (3, 27)

30

DEDENT '' (4, 0) (4, 0)

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

31

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

32

>>> indent_error_file = \"""

... def k(x):

... x += 2

... x += 5

... \"""

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

37

>>> readline = BytesIO(indent_error_file.encode('utf-8')).readline

38

>>> for tok in tokenize(readline): pass

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

39

Traceback (most recent call last):

40

...

41

IndentationError: unindent does not match any outer indentation level

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

42

Mark Dickinson

3c0b317

2010-06-29 07:38:37 +0000

[diff] [blame]

43

There are some standard formatting practices that are easy to get right.

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

44

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

45

>>> roundtrip("if x == 1:\\n"

46

... " print(x)\\n")

47

True

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

48

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

49

>>> roundtrip("# This is a comment\\n# This also")

50

True

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

51

52

Some people use different formatting conventions, which makes

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

53

untokenize a little trickier. Note that this test involves trailing

54

whitespace after the colon. Note that we use hex escapes to make the

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

55

two trailing blanks apparent in the expected output.

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

56

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

57

>>> roundtrip("if x == 1 : \\n"

58

... " print(x)\\n")

59

True

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

60

Benjamin Peterson

ee8712c

2008-05-20 21:35:26 +0000

[diff] [blame]

61

>>> f = support.findfile("tokenize_tests.txt")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

62

>>> roundtrip(open(f, 'rb'))

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

63

True

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

64

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

65

>>> roundtrip("if x == 1:\\n"

66

... " # A comment by itself.\\n"

67

... " print(x) # Comment here, too.\\n"

68

... " # Another comment.\\n"

69

... "after_if = True\\n")

70

True

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

71

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

72

>>> roundtrip("if (x # The comments need to go in the right place\\n"

73

... " == 1):\\n"

74

... " print('x==1')\\n")

75

True

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

76

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

77

>>> roundtrip("class Test: # A comment here\\n"

78

... " # A comment with weird indent\\n"

79

... " after_com = 5\\n"

80

... " def x(m): return m*5 # a one liner\\n"

81

... " def y(m): # A whitespace after the colon\\n"

82

... " return y*4 # 3-space indent\\n")

83

True

84

85

Some error-handling code

86

87

>>> roundtrip("try: import somemodule\\n"

88

... "except ImportError: # comment\\n"

Christian Heimes

2008-03-28 00:55:15 +0000

[diff] [blame]

89

... " print('Can not import' # comment2\\n)"

Neal Norwitz

752abd0

2008-05-13 04:55:24 +0000

[diff] [blame]

90

... "else: print('Loaded')\\n")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

91

True

92

Eric Smith

2008-03-17 19:49:19 +0000

[diff] [blame]

93

Balancing continuation

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

94

95

>>> roundtrip("a = (3,4, \\n"

... "5,6)\\n"

... "y = [3, 4,\\n"

... "5]\\n"

... "z = {'a': 5,\\n"

100

... "'b':15, 'c':True}\\n"

101

... "x = len(y) + 5 - a[\\n"

102

... "3] - a[2]\\n"

103

... "+ len(z) - z[\\n"

... "'b']\\n")

True

Ordinary integers and binary operators

108

109

>>> dump_tokens("0xff <= 255")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

110

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

111

NUMBER '0xff' (1, 0) (1, 4)

112

OP '<=' (1, 5) (1, 7)

113

NUMBER '255' (1, 8) (1, 11)

Eric Smith

2008-03-17 19:49:19 +0000

[diff] [blame]

114

>>> dump_tokens("0b10 <= 255")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

115

ENCODING 'utf-8' (0, 0) (0, 0)

Eric Smith

2008-03-17 19:49:19 +0000

[diff] [blame]

116

NUMBER '0b10' (1, 0) (1, 4)

117

OP '<=' (1, 5) (1, 7)

118

NUMBER '255' (1, 8) (1, 11)

119

>>> dump_tokens("0o123 <= 0O123")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

120

ENCODING 'utf-8' (0, 0) (0, 0)

Eric Smith

2008-03-17 19:49:19 +0000

[diff] [blame]

121

NUMBER '0o123' (1, 0) (1, 5)

122

OP '<=' (1, 6) (1, 8)

123

NUMBER '0O123' (1, 9) (1, 14)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

124

>>> dump_tokens("1234567 > ~0x15")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

125

ENCODING 'utf-8' (0, 0) (0, 0)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

126

NUMBER '1234567' (1, 0) (1, 7)

127

OP '>' (1, 8) (1, 9)

128

OP '~' (1, 10) (1, 11)

129

NUMBER '0x15' (1, 11) (1, 15)

130

>>> dump_tokens("2134568 != 1231515")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

131

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

132

NUMBER '2134568' (1, 0) (1, 7)

133

OP '!=' (1, 8) (1, 10)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

134

NUMBER '1231515' (1, 11) (1, 18)

135

>>> dump_tokens("(-124561-1) & 200000000")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

136

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

137

OP '(' (1, 0) (1, 1)

138

OP '-' (1, 1) (1, 2)

139

NUMBER '124561' (1, 2) (1, 8)

140

OP '-' (1, 8) (1, 9)

141

NUMBER '1' (1, 9) (1, 10)

142

OP ')' (1, 10) (1, 11)

143

OP '&' (1, 12) (1, 13)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

144

NUMBER '200000000' (1, 14) (1, 23)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

145

>>> dump_tokens("0xdeadbeef != -1")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

146

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

147

NUMBER '0xdeadbeef' (1, 0) (1, 10)

148

OP '!=' (1, 11) (1, 13)

149

OP '-' (1, 14) (1, 15)

150

NUMBER '1' (1, 15) (1, 16)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

151

>>> dump_tokens("0xdeadc0de & 12345")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

152

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

153

NUMBER '0xdeadc0de' (1, 0) (1, 10)

154

OP '&' (1, 11) (1, 12)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

155

NUMBER '12345' (1, 13) (1, 18)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

156

>>> dump_tokens("0xFF & 0x15 | 1234")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

157

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

158

NUMBER '0xFF' (1, 0) (1, 4)

159

OP '&' (1, 5) (1, 6)

160

NUMBER '0x15' (1, 7) (1, 11)

161

OP '|' (1, 12) (1, 13)

162

NUMBER '1234' (1, 14) (1, 18)

Long integers

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

166

>>> dump_tokens("x = 0")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

167

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

168

NAME 'x' (1, 0) (1, 1)

169

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

170

NUMBER '0' (1, 4) (1, 5)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

171

>>> dump_tokens("x = 0xfffffffffff")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

172

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

173

NAME 'x' (1, 0) (1, 1)

174

OP '=' (1, 2) (1, 3)

175

NUMBER '0xffffffffff (1, 4) (1, 17)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

176

>>> dump_tokens("x = 123141242151251616110")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

177

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

178

NAME 'x' (1, 0) (1, 1)

179

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

180

NUMBER '123141242151 (1, 4) (1, 25)

181

>>> dump_tokens("x = -15921590215012591")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

182

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

183

NAME 'x' (1, 0) (1, 1)

184

OP '=' (1, 2) (1, 3)

185

OP '-' (1, 4) (1, 5)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

186

NUMBER '159215902150 (1, 5) (1, 22)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

187

188

Floating point numbers

189

190

>>> dump_tokens("x = 3.14159")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

191

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

192

NAME 'x' (1, 0) (1, 1)

193

OP '=' (1, 2) (1, 3)

194

NUMBER '3.14159' (1, 4) (1, 11)

195

>>> dump_tokens("x = 314159.")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

196

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

197

NAME 'x' (1, 0) (1, 1)

198

OP '=' (1, 2) (1, 3)

199

NUMBER '314159.' (1, 4) (1, 11)

200

>>> dump_tokens("x = .314159")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

201

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

202

NAME 'x' (1, 0) (1, 1)

203

OP '=' (1, 2) (1, 3)

204

NUMBER '.314159' (1, 4) (1, 11)

205

>>> dump_tokens("x = 3e14159")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

206

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

207

NAME 'x' (1, 0) (1, 1)

208

OP '=' (1, 2) (1, 3)

209

NUMBER '3e14159' (1, 4) (1, 11)

210

>>> dump_tokens("x = 3E123")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

211

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

212

NAME 'x' (1, 0) (1, 1)

213

OP '=' (1, 2) (1, 3)

214

NUMBER '3E123' (1, 4) (1, 9)

215

>>> dump_tokens("x+y = 3e-1230")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

216

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

217

NAME 'x' (1, 0) (1, 1)

218

OP '+' (1, 1) (1, 2)

219

NAME 'y' (1, 2) (1, 3)

220

OP '=' (1, 4) (1, 5)

221

NUMBER '3e-1230' (1, 6) (1, 13)

222

>>> dump_tokens("x = 3.14e159")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

223

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

224

NAME 'x' (1, 0) (1, 1)

225

OP '=' (1, 2) (1, 3)

226

NUMBER '3.14e159' (1, 4) (1, 12)

String literals

>>> dump_tokens("x = ''; y = \\\"\\\"")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

231

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

232

NAME 'x' (1, 0) (1, 1)

233

OP '=' (1, 2) (1, 3)

234

STRING "''" (1, 4) (1, 6)

235

OP ';' (1, 6) (1, 7)

236

NAME 'y' (1, 8) (1, 9)

237

OP '=' (1, 10) (1, 11)

238

STRING '""' (1, 12) (1, 14)

239

>>> dump_tokens("x = '\\\"'; y = \\\"'\\\"")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

240

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

241

NAME 'x' (1, 0) (1, 1)

242

OP '=' (1, 2) (1, 3)

243

STRING '\\'"\\'' (1, 4) (1, 7)

244

OP ';' (1, 7) (1, 8)

245

NAME 'y' (1, 9) (1, 10)

246

OP '=' (1, 11) (1, 12)

247

STRING '"\\'"' (1, 13) (1, 16)

248

>>> dump_tokens("x = \\\"doesn't \\\"shrink\\\", does it\\\"")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

249

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

250

NAME 'x' (1, 0) (1, 1)

251

OP '=' (1, 2) (1, 3)

252

STRING '"doesn\\'t "' (1, 4) (1, 14)

253

NAME 'shrink' (1, 14) (1, 20)

254

STRING '", does it"' (1, 20) (1, 31)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

255

>>> dump_tokens("x = 'abc' + 'ABC'")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

256

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

257

NAME 'x' (1, 0) (1, 1)

258

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

259

STRING "'abc'" (1, 4) (1, 9)

260

OP '+' (1, 10) (1, 11)

261

STRING "'ABC'" (1, 12) (1, 17)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

262

>>> dump_tokens('y = "ABC" + "ABC"')

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

263

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

264

NAME 'y' (1, 0) (1, 1)

265

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

266

STRING '"ABC"' (1, 4) (1, 9)

267

OP '+' (1, 10) (1, 11)

268

STRING '"ABC"' (1, 12) (1, 17)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

269

>>> dump_tokens("x = r'abc' + r'ABC' + R'ABC' + R'ABC'")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

270

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

271

NAME 'x' (1, 0) (1, 1)

272

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

273

STRING "r'abc'" (1, 4) (1, 10)

274

OP '+' (1, 11) (1, 12)

275

STRING "r'ABC'" (1, 13) (1, 19)

276

OP '+' (1, 20) (1, 21)

277

STRING "R'ABC'" (1, 22) (1, 28)

278

OP '+' (1, 29) (1, 30)

279

STRING "R'ABC'" (1, 31) (1, 37)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

280

>>> dump_tokens('y = r"abc" + r"ABC" + R"ABC" + R"ABC"')

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

281

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

282

NAME 'y' (1, 0) (1, 1)

283

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

284

STRING 'r"abc"' (1, 4) (1, 10)

285

OP '+' (1, 11) (1, 12)

286

STRING 'r"ABC"' (1, 13) (1, 19)

287

OP '+' (1, 20) (1, 21)

288

STRING 'R"ABC"' (1, 22) (1, 28)

289

OP '+' (1, 29) (1, 30)

290

STRING 'R"ABC"' (1, 31) (1, 37)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

291

Meador Inge

8d5c0b8

2012-06-16 21:49:08 -0500

[diff] [blame]

292

>>> dump_tokens("u'abc' + U'abc'")

293

ENCODING 'utf-8' (0, 0) (0, 0)

294

STRING "u'abc'" (1, 0) (1, 6)

295

OP '+' (1, 7) (1, 8)

296

STRING "U'abc'" (1, 9) (1, 15)

297

>>> dump_tokens('u"abc" + U"abc"')

298

ENCODING 'utf-8' (0, 0) (0, 0)

299

STRING 'u"abc"' (1, 0) (1, 6)

300

OP '+' (1, 7) (1, 8)

301

STRING 'U"abc"' (1, 9) (1, 15)

Meador Inge

8d5c0b8

2012-06-16 21:49:08 -0500

[diff] [blame]

302

303

>>> dump_tokens("b'abc' + B'abc'")

304

ENCODING 'utf-8' (0, 0) (0, 0)

305

STRING "b'abc'" (1, 0) (1, 6)

306

OP '+' (1, 7) (1, 8)

307

STRING "B'abc'" (1, 9) (1, 15)

308

>>> dump_tokens('b"abc" + B"abc"')

309

ENCODING 'utf-8' (0, 0) (0, 0)

310

STRING 'b"abc"' (1, 0) (1, 6)

311

OP '+' (1, 7) (1, 8)

312

STRING 'B"abc"' (1, 9) (1, 15)

313

>>> dump_tokens("br'abc' + bR'abc' + Br'abc' + BR'abc'")

314

ENCODING 'utf-8' (0, 0) (0, 0)

315

STRING "br'abc'" (1, 0) (1, 7)

316

OP '+' (1, 8) (1, 9)

317

STRING "bR'abc'" (1, 10) (1, 17)

318

OP '+' (1, 18) (1, 19)

319

STRING "Br'abc'" (1, 20) (1, 27)

320

OP '+' (1, 28) (1, 29)

321

STRING "BR'abc'" (1, 30) (1, 37)

322

>>> dump_tokens('br"abc" + bR"abc" + Br"abc" + BR"abc"')

323

ENCODING 'utf-8' (0, 0) (0, 0)

324

STRING 'br"abc"' (1, 0) (1, 7)

325

OP '+' (1, 8) (1, 9)

326

STRING 'bR"abc"' (1, 10) (1, 17)

327

OP '+' (1, 18) (1, 19)

328

STRING 'Br"abc"' (1, 20) (1, 27)

329

OP '+' (1, 28) (1, 29)

330

STRING 'BR"abc"' (1, 30) (1, 37)

331

>>> dump_tokens("rb'abc' + rB'abc' + Rb'abc' + RB'abc'")

332

ENCODING 'utf-8' (0, 0) (0, 0)

333

STRING "rb'abc'" (1, 0) (1, 7)

334

OP '+' (1, 8) (1, 9)

335

STRING "rB'abc'" (1, 10) (1, 17)

336

OP '+' (1, 18) (1, 19)

337

STRING "Rb'abc'" (1, 20) (1, 27)

338

OP '+' (1, 28) (1, 29)

339

STRING "RB'abc'" (1, 30) (1, 37)

340

>>> dump_tokens('rb"abc" + rB"abc" + Rb"abc" + RB"abc"')

341

ENCODING 'utf-8' (0, 0) (0, 0)

342

STRING 'rb"abc"' (1, 0) (1, 7)

343

OP '+' (1, 8) (1, 9)

344

STRING 'rB"abc"' (1, 10) (1, 17)

345

OP '+' (1, 18) (1, 19)

346

STRING 'Rb"abc"' (1, 20) (1, 27)

347

OP '+' (1, 28) (1, 29)

348

STRING 'RB"abc"' (1, 30) (1, 37)

349

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

350

Operators

351

352

>>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

353

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

354

NAME 'def' (1, 0) (1, 3)

355

NAME 'd22' (1, 4) (1, 7)

356

OP '(' (1, 7) (1, 8)

357

NAME 'a' (1, 8) (1, 9)

358

OP ',' (1, 9) (1, 10)

359

NAME 'b' (1, 11) (1, 12)

360

OP ',' (1, 12) (1, 13)

361

NAME 'c' (1, 14) (1, 15)

362

OP '=' (1, 15) (1, 16)

363

NUMBER '2' (1, 16) (1, 17)

364

OP ',' (1, 17) (1, 18)

365

NAME 'd' (1, 19) (1, 20)

366

OP '=' (1, 20) (1, 21)

367

NUMBER '2' (1, 21) (1, 22)

368

OP ',' (1, 22) (1, 23)

369

OP '*' (1, 24) (1, 25)

370

NAME 'k' (1, 25) (1, 26)

371

OP ')' (1, 26) (1, 27)

372

OP ':' (1, 27) (1, 28)

373

NAME 'pass' (1, 29) (1, 33)

374

>>> dump_tokens("def d01v_(a=1, *k, **w): pass")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

375

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

376

NAME 'def' (1, 0) (1, 3)

377

NAME 'd01v_' (1, 4) (1, 9)

378

OP '(' (1, 9) (1, 10)

379

NAME 'a' (1, 10) (1, 11)

380

OP '=' (1, 11) (1, 12)

381

NUMBER '1' (1, 12) (1, 13)

382

OP ',' (1, 13) (1, 14)

383

OP '*' (1, 15) (1, 16)

384

NAME 'k' (1, 16) (1, 17)

385

OP ',' (1, 17) (1, 18)

386

OP '**' (1, 19) (1, 21)

387

NAME 'w' (1, 21) (1, 22)

388

OP ')' (1, 22) (1, 23)

389

OP ':' (1, 23) (1, 24)

390

NAME 'pass' (1, 25) (1, 29)

Comparison

>>> dump_tokens("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " +

395

... "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

396

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

397

NAME 'if' (1, 0) (1, 2)

398

NUMBER '1' (1, 3) (1, 4)

399

OP '<' (1, 5) (1, 6)

400

NUMBER '1' (1, 7) (1, 8)

401

OP '>' (1, 9) (1, 10)

402

NUMBER '1' (1, 11) (1, 12)

403

OP '==' (1, 13) (1, 15)

404

NUMBER '1' (1, 16) (1, 17)

405

OP '>=' (1, 18) (1, 20)

406

NUMBER '5' (1, 21) (1, 22)

407

OP '<=' (1, 23) (1, 25)

408

NUMBER '0x15' (1, 26) (1, 30)

409

OP '<=' (1, 31) (1, 33)

410

NUMBER '0x12' (1, 34) (1, 38)

411

OP '!=' (1, 39) (1, 41)

412

NUMBER '1' (1, 42) (1, 43)

413

NAME 'and' (1, 44) (1, 47)

414

NUMBER '5' (1, 48) (1, 49)

415

NAME 'in' (1, 50) (1, 52)

416

NUMBER '1' (1, 53) (1, 54)

417

NAME 'not' (1, 55) (1, 58)

418

NAME 'in' (1, 59) (1, 61)

419

NUMBER '1' (1, 62) (1, 63)

420

NAME 'is' (1, 64) (1, 66)

421

NUMBER '1' (1, 67) (1, 68)

422

NAME 'or' (1, 69) (1, 71)

423

NUMBER '5' (1, 72) (1, 73)

424

NAME 'is' (1, 74) (1, 76)

425

NAME 'not' (1, 77) (1, 80)

426

NUMBER '1' (1, 81) (1, 82)

427

OP ':' (1, 82) (1, 83)

428

NAME 'pass' (1, 84) (1, 88)

Shift

>>> dump_tokens("x = 1 << 1 >> 5")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

433

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

434

NAME 'x' (1, 0) (1, 1)

435

OP '=' (1, 2) (1, 3)

436

NUMBER '1' (1, 4) (1, 5)

437

OP '<<' (1, 6) (1, 8)

438

NUMBER '1' (1, 9) (1, 10)

439

OP '>>' (1, 11) (1, 13)

440

NUMBER '5' (1, 14) (1, 15)

Additive

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

444

>>> dump_tokens("x = 1 - y + 15 - 1 + 0x124 + z + a[5]")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

445

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

446

NAME 'x' (1, 0) (1, 1)

447

OP '=' (1, 2) (1, 3)

448

NUMBER '1' (1, 4) (1, 5)

449

OP '-' (1, 6) (1, 7)

450

NAME 'y' (1, 8) (1, 9)

451

OP '+' (1, 10) (1, 11)

452

NUMBER '15' (1, 12) (1, 14)

453

OP '-' (1, 15) (1, 16)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

454

NUMBER '1' (1, 17) (1, 18)

455

OP '+' (1, 19) (1, 20)

456

NUMBER '0x124' (1, 21) (1, 26)

457

OP '+' (1, 27) (1, 28)

458

NAME 'z' (1, 29) (1, 30)

459

OP '+' (1, 31) (1, 32)

460

NAME 'a' (1, 33) (1, 34)

461

OP '[' (1, 34) (1, 35)

462

NUMBER '5' (1, 35) (1, 36)

463

OP ']' (1, 36) (1, 37)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

Multiplicative

Benjamin Peterson

2014-04-09 23:55:56 -0400

[diff] [blame]

467

>>> dump_tokens("x = 1//1*1/5*12%0x12@42")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

468

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

469

NAME 'x' (1, 0) (1, 1)

470

OP '=' (1, 2) (1, 3)

471

NUMBER '1' (1, 4) (1, 5)

472

OP '//' (1, 5) (1, 7)

473

NUMBER '1' (1, 7) (1, 8)

474

OP '*' (1, 8) (1, 9)

475

NUMBER '1' (1, 9) (1, 10)

476

OP '/' (1, 10) (1, 11)

477

NUMBER '5' (1, 11) (1, 12)

478

OP '*' (1, 12) (1, 13)

479

NUMBER '12' (1, 13) (1, 15)

480

OP '%' (1, 15) (1, 16)

481

NUMBER '0x12' (1, 16) (1, 20)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

482

OP '@' (1, 20) (1, 21)

483

NUMBER '42' (1, 21) (1, 23)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

Unary

>>> dump_tokens("~1 ^ 1 & 1 |1 ^ -1")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

488

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

489

OP '~' (1, 0) (1, 1)

490

NUMBER '1' (1, 1) (1, 2)

491

OP '^' (1, 3) (1, 4)

492

NUMBER '1' (1, 5) (1, 6)

493

OP '&' (1, 7) (1, 8)

494

NUMBER '1' (1, 9) (1, 10)

495

OP '|' (1, 11) (1, 12)

496

NUMBER '1' (1, 12) (1, 13)

497

OP '^' (1, 14) (1, 15)

498

OP '-' (1, 16) (1, 17)

499

NUMBER '1' (1, 17) (1, 18)

500

>>> dump_tokens("-1*1/1+1*1//1 - ---1**1")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

501

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

502

OP '-' (1, 0) (1, 1)

503

NUMBER '1' (1, 1) (1, 2)

504

OP '*' (1, 2) (1, 3)

505

NUMBER '1' (1, 3) (1, 4)

506

OP '/' (1, 4) (1, 5)

507

NUMBER '1' (1, 5) (1, 6)

508

OP '+' (1, 6) (1, 7)

509

NUMBER '1' (1, 7) (1, 8)

510

OP '*' (1, 8) (1, 9)

511

NUMBER '1' (1, 9) (1, 10)

512

OP '//' (1, 10) (1, 12)

513

NUMBER '1' (1, 12) (1, 13)

514

OP '-' (1, 14) (1, 15)

515

OP '-' (1, 16) (1, 17)

516

OP '-' (1, 17) (1, 18)

517

OP '-' (1, 18) (1, 19)

518

NUMBER '1' (1, 19) (1, 20)

519

OP '**' (1, 20) (1, 22)

520

NUMBER '1' (1, 22) (1, 23)

Selector

>>> dump_tokens("import sys, time\\nx = sys.modules['time'].time()")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

525

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

526

NAME 'import' (1, 0) (1, 6)

527

NAME 'sys' (1, 7) (1, 10)

528

OP ',' (1, 10) (1, 11)

529

NAME 'time' (1, 12) (1, 16)

530

NEWLINE '\\n' (1, 16) (1, 17)

531

NAME 'x' (2, 0) (2, 1)

532

OP '=' (2, 2) (2, 3)

533

NAME 'sys' (2, 4) (2, 7)

534

OP '.' (2, 7) (2, 8)

535

NAME 'modules' (2, 8) (2, 15)

536

OP '[' (2, 15) (2, 16)

537

STRING "'time'" (2, 16) (2, 22)

538

OP ']' (2, 22) (2, 23)

539

OP '.' (2, 23) (2, 24)

540

NAME 'time' (2, 24) (2, 28)

541

OP '(' (2, 28) (2, 29)

542

OP ')' (2, 29) (2, 30)

Methods

>>> dump_tokens("@staticmethod\\ndef foo(x,y): pass")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

547

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

548

OP '@' (1, 0) (1, 1)

549

NAME 'staticmethod (1, 1) (1, 13)

550

NEWLINE '\\n' (1, 13) (1, 14)

551

NAME 'def' (2, 0) (2, 3)

552

NAME 'foo' (2, 4) (2, 7)

553

OP '(' (2, 7) (2, 8)

554

NAME 'x' (2, 8) (2, 9)

555

OP ',' (2, 9) (2, 10)

556

NAME 'y' (2, 10) (2, 11)

557

OP ')' (2, 11) (2, 12)

558

OP ':' (2, 12) (2, 13)

559

NAME 'pass' (2, 14) (2, 18)

560

561

Backslash means line continuation, except for comments

562

563

>>> roundtrip("x=1+\\\\n"

564

... "1\\n"

565

... "# This is a comment\\\\n"

566

... "# This also\\n")

567

True

568

>>> roundtrip("# Comment \\\\nx = 0")

569

True

Christian Heimes

2008-03-28 00:55:15 +0000

[diff] [blame]

570

571

Two string literals on the same line

572

573

>>> roundtrip("'' ''")

574

True

575

576

Test roundtrip on random python modules.

Antoine Pitrou

5bc4fa7

2010-10-14 15:34:31 +0000

[diff] [blame]

577

pass the '-ucpu' option to process the full directory.

Christian Heimes

2008-03-28 00:55:15 +0000

[diff] [blame]

578

579

>>> import random

580

>>> tempdir = os.path.dirname(f) or os.curdir

581

>>> testfiles = glob.glob(os.path.join(tempdir, "test*.py"))

582

Terry Jan Reedy

2014-02-23 18:00:31 -0500

[diff] [blame]

583

Tokenize is broken on test_pep3131.py because regular expressions are

584

broken on the obscure unicode identifiers in it. *sigh*

585

With roundtrip extended to test the 5-tuple mode of untokenize,

586

7 more testfiles fail. Remove them also until the failure is diagnosed.

587

Benjamin Peterson

963e402

2011-08-13 00:33:21 -0500

[diff] [blame]

588

>>> testfiles.remove(os.path.join(tempdir, "test_pep3131.py"))

Terry Jan Reedy

2014-02-23 18:00:31 -0500

[diff] [blame]

589

>>> for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):

590

... testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)

591

...

Antoine Pitrou

5bc4fa7

2010-10-14 15:34:31 +0000

[diff] [blame]

592

>>> if not support.is_resource_enabled("cpu"):

Christian Heimes

2008-03-28 00:55:15 +0000

[diff] [blame]

593

... testfiles = random.sample(testfiles, 10)

594

...

595

>>> for testfile in testfiles:

596

... if not roundtrip(open(testfile, 'rb')):

597

... print("Roundtrip failed for file %s" % testfile)

598

... break

599

... else: True

600

True

Benjamin Peterson

a0dfa82

2009-11-13 02:25:08 +0000

[diff] [blame]

601

602

Evil tabs

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

603

Benjamin Peterson

a0dfa82

2009-11-13 02:25:08 +0000

[diff] [blame]

604

>>> dump_tokens("def f():\\n\\tif x\\n \\tpass")

605

ENCODING 'utf-8' (0, 0) (0, 0)

606

NAME 'def' (1, 0) (1, 3)

607

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

612

INDENT '\\t' (2, 0) (2, 1)

613

NAME 'if' (2, 1) (2, 3)

614

NAME 'x' (2, 4) (2, 5)

615

NEWLINE '\\n' (2, 5) (2, 6)

616

INDENT ' \\t' (3, 0) (3, 9)

617

NAME 'pass' (3, 9) (3, 13)

618

DEDENT '' (4, 0) (4, 0)

619

DEDENT '' (4, 0) (4, 0)

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

620

621

Non-ascii identifiers

622

623

>>> dump_tokens("Örter = 'places'\\ngrün = 'green'")

624

ENCODING 'utf-8' (0, 0) (0, 0)

625

NAME 'Örter' (1, 0) (1, 5)

626

OP '=' (1, 6) (1, 7)

627

STRING "'places'" (1, 8) (1, 16)

628

NEWLINE '\\n' (1, 16) (1, 17)

629

NAME 'grün' (2, 0) (2, 4)

630

OP '=' (2, 5) (2, 6)

631

STRING "'green'" (2, 7) (2, 14)

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

632

633

Legacy unicode literals:

634

Christian Heimes

0b3847d

2012-06-20 11:17:58 +0200

[diff] [blame]

635

>>> dump_tokens("Örter = u'places'\\ngrün = U'green'")

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

636

ENCODING 'utf-8' (0, 0) (0, 0)

637

NAME 'Örter' (1, 0) (1, 5)

638

OP '=' (1, 6) (1, 7)

639

STRING "u'places'" (1, 8) (1, 17)

640

NEWLINE '\\n' (1, 17) (1, 18)

641

NAME 'grün' (2, 0) (2, 4)

642

OP '=' (2, 5) (2, 6)

Christian Heimes

0b3847d

2012-06-20 11:17:58 +0200

[diff] [blame]

643

STRING "U'green'" (2, 7) (2, 15)

Yury Selivanov

7544508

2015-05-11 22:57:16 -0400

[diff] [blame]

644

645

Async/await extension:

646

647

>>> dump_tokens("async = 1")

648

ENCODING 'utf-8' (0, 0) (0, 0)

649

NAME 'async' (1, 0) (1, 5)

650

OP '=' (1, 6) (1, 7)

651

NUMBER '1' (1, 8) (1, 9)

652

653

>>> dump_tokens("a = (async = 1)")

654

ENCODING 'utf-8' (0, 0) (0, 0)

655

NAME 'a' (1, 0) (1, 1)

656

OP '=' (1, 2) (1, 3)

657

OP '(' (1, 4) (1, 5)

658

NAME 'async' (1, 5) (1, 10)

659

OP '=' (1, 11) (1, 12)

660

NUMBER '1' (1, 13) (1, 14)

661

OP ')' (1, 14) (1, 15)

662

663

>>> dump_tokens("async()")

664

ENCODING 'utf-8' (0, 0) (0, 0)

665

NAME 'async' (1, 0) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

>>> dump_tokens("class async(Bar):pass")

670

ENCODING 'utf-8' (0, 0) (0, 0)

671

NAME 'class' (1, 0) (1, 5)

672

NAME 'async' (1, 6) (1, 11)

673

OP '(' (1, 11) (1, 12)

674

NAME 'Bar' (1, 12) (1, 15)

675

OP ')' (1, 15) (1, 16)

676

OP ':' (1, 16) (1, 17)

677

NAME 'pass' (1, 17) (1, 21)

678

679

>>> dump_tokens("class async:pass")

680

ENCODING 'utf-8' (0, 0) (0, 0)

681

NAME 'class' (1, 0) (1, 5)

682

NAME 'async' (1, 6) (1, 11)

683

OP ':' (1, 11) (1, 12)

684

NAME 'pass' (1, 12) (1, 16)

685

686

>>> dump_tokens("await = 1")

687

ENCODING 'utf-8' (0, 0) (0, 0)

688

NAME 'await' (1, 0) (1, 5)

689

OP '=' (1, 6) (1, 7)

690

NUMBER '1' (1, 8) (1, 9)

691

692

>>> dump_tokens("foo.async")

693

ENCODING 'utf-8' (0, 0) (0, 0)

694

NAME 'foo' (1, 0) (1, 3)

695

OP '.' (1, 3) (1, 4)

696

NAME 'async' (1, 4) (1, 9)

697

698

>>> dump_tokens("async for a in b: pass")

699

ENCODING 'utf-8' (0, 0) (0, 0)

700

NAME 'async' (1, 0) (1, 5)

701

NAME 'for' (1, 6) (1, 9)

702

NAME 'a' (1, 10) (1, 11)

703

NAME 'in' (1, 12) (1, 14)

704

NAME 'b' (1, 15) (1, 16)

705

OP ':' (1, 16) (1, 17)

706

NAME 'pass' (1, 18) (1, 22)

707

708

>>> dump_tokens("async with a as b: pass")

709

ENCODING 'utf-8' (0, 0) (0, 0)

710

NAME 'async' (1, 0) (1, 5)

711

NAME 'with' (1, 6) (1, 10)

712

NAME 'a' (1, 11) (1, 12)

713

NAME 'as' (1, 13) (1, 15)

714

NAME 'b' (1, 16) (1, 17)

715

OP ':' (1, 17) (1, 18)

716

NAME 'pass' (1, 19) (1, 23)

717

718

>>> dump_tokens("async.foo")

719

ENCODING 'utf-8' (0, 0) (0, 0)

720

NAME 'async' (1, 0) (1, 5)

721

OP '.' (1, 5) (1, 6)

722

NAME 'foo' (1, 6) (1, 9)

723

724

>>> dump_tokens("async")

725

ENCODING 'utf-8' (0, 0) (0, 0)

726

NAME 'async' (1, 0) (1, 5)

727

728

>>> dump_tokens("async\\n#comment\\nawait")

729

ENCODING 'utf-8' (0, 0) (0, 0)

730

NAME 'async' (1, 0) (1, 5)

731

NEWLINE '\\n' (1, 5) (1, 6)

732

COMMENT '#comment' (2, 0) (2, 8)

733

NL '\\n' (2, 8) (2, 9)

734

NAME 'await' (3, 0) (3, 5)

735

736

>>> dump_tokens("async\\n...\\nawait")

737

ENCODING 'utf-8' (0, 0) (0, 0)

738

NAME 'async' (1, 0) (1, 5)

739

NEWLINE '\\n' (1, 5) (1, 6)

740

OP '...' (2, 0) (2, 3)

741

NEWLINE '\\n' (2, 3) (2, 4)

742

NAME 'await' (3, 0) (3, 5)

743

744

>>> dump_tokens("async\\nawait")

745

ENCODING 'utf-8' (0, 0) (0, 0)

746

NAME 'async' (1, 0) (1, 5)

747

NEWLINE '\\n' (1, 5) (1, 6)

748

NAME 'await' (2, 0) (2, 5)

749

750

>>> dump_tokens("foo.async + 1")

751

ENCODING 'utf-8' (0, 0) (0, 0)

752

NAME 'foo' (1, 0) (1, 3)

753

OP '.' (1, 3) (1, 4)

754

NAME 'async' (1, 4) (1, 9)

755

OP '+' (1, 10) (1, 11)

756

NUMBER '1' (1, 12) (1, 13)

757

758

>>> dump_tokens("async def foo(): pass")

759

ENCODING 'utf-8' (0, 0) (0, 0)

760

ASYNC 'async' (1, 0) (1, 5)

761

NAME 'def' (1, 6) (1, 9)

762

NAME 'foo' (1, 10) (1, 13)

763

OP '(' (1, 13) (1, 14)

764

OP ')' (1, 14) (1, 15)

765

OP ':' (1, 15) (1, 16)

766

NAME 'pass' (1, 17) (1, 21)

767

768

>>> dump_tokens('''async def foo():

... def foo(await):

... await = 1

... if 1:

... await

... async += 1

... ''')

ENCODING 'utf-8' (0, 0) (0, 0)

776

ASYNC 'async' (1, 0) (1, 5)

777

NAME 'def' (1, 6) (1, 9)

778

NAME 'foo' (1, 10) (1, 13)

779

OP '(' (1, 13) (1, 14)

780

OP ')' (1, 14) (1, 15)

781

OP ':' (1, 15) (1, 16)

782

NEWLINE '\\n' (1, 16) (1, 17)

783

INDENT ' ' (2, 0) (2, 2)

784

NAME 'def' (2, 2) (2, 5)

785

NAME 'foo' (2, 6) (2, 9)

786

OP '(' (2, 9) (2, 10)

787

NAME 'await' (2, 10) (2, 15)

788

OP ')' (2, 15) (2, 16)

789

OP ':' (2, 16) (2, 17)

790

NEWLINE '\\n' (2, 17) (2, 18)

791

INDENT ' ' (3, 0) (3, 4)

792

NAME 'await' (3, 4) (3, 9)

793

OP '=' (3, 10) (3, 11)

794

NUMBER '1' (3, 12) (3, 13)

795

NEWLINE '\\n' (3, 13) (3, 14)

796

DEDENT '' (4, 2) (4, 2)

797

NAME 'if' (4, 2) (4, 4)

798

NUMBER '1' (4, 5) (4, 6)

799

OP ':' (4, 6) (4, 7)

800

NEWLINE '\\n' (4, 7) (4, 8)

801

INDENT ' ' (5, 0) (5, 4)

802

AWAIT 'await' (5, 4) (5, 9)

803

NEWLINE '\\n' (5, 9) (5, 10)

804

DEDENT '' (6, 0) (6, 0)

805

DEDENT '' (6, 0) (6, 0)

806

NAME 'async' (6, 0) (6, 5)

807

OP '+=' (6, 6) (6, 8)

808

NUMBER '1' (6, 9) (6, 10)

809

NEWLINE '\\n' (6, 10) (6, 11)

810

811

>>> dump_tokens('''async def foo():

812

... async for i in 1: pass''')

813

ENCODING 'utf-8' (0, 0) (0, 0)

814

ASYNC 'async' (1, 0) (1, 5)

815

NAME 'def' (1, 6) (1, 9)

816

NAME 'foo' (1, 10) (1, 13)

817

OP '(' (1, 13) (1, 14)

818

OP ')' (1, 14) (1, 15)

819

OP ':' (1, 15) (1, 16)

820

NEWLINE '\\n' (1, 16) (1, 17)

821

INDENT ' ' (2, 0) (2, 2)

822

ASYNC 'async' (2, 2) (2, 7)

823

NAME 'for' (2, 8) (2, 11)

824

NAME 'i' (2, 12) (2, 13)

825

NAME 'in' (2, 14) (2, 16)

826

NUMBER '1' (2, 17) (2, 18)

827

OP ':' (2, 18) (2, 19)

828

NAME 'pass' (2, 20) (2, 24)

829

DEDENT '' (3, 0) (3, 0)

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

830

"""

831

Benjamin Peterson

ee8712c

2008-05-20 21:35:26 +0000

[diff] [blame]

832

from test import support

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

833

from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

834

STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

835

open as tokenize_open, Untokenizer)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

836

from io import BytesIO

Victor Stinner

387729e

2015-05-26 00:43:58 +0200

[diff] [blame]

837

from unittest import TestCase, mock

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

838

import os, sys, glob

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

839

import token

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

840

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

841

def dump_tokens(s):

842

"""Print out the tokens in s in a table format.

843

844

The ENDMARKER is omitted.

845

"""

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

846

f = BytesIO(s.encode('utf-8'))

847

for type, token, start, end, line in tokenize(f.readline):

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

848

if type == ENDMARKER:

849

break

850

type = tok_name[type]

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

851

print("%(type)-10.10s %(token)-13.13r %(start)s %(end)s" % locals())

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

852

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

853

def roundtrip(f):

854

"""

855

Test roundtrip for `untokenize`. `f` is an open file or a string.

Terry Jan Reedy

2014-02-23 18:00:31 -0500

[diff] [blame]

856

The source code in f is tokenized to both 5- and 2-tuples.

857

Both sequences are converted back to source code via

858

tokenize.untokenize(), and the latter tokenized again to 2-tuples.

859

The test fails if the 3 pair tokenizations do not match.

860

861

When untokenize bugs are fixed, untokenize with 5-tuples should

862

reproduce code that does not contain a backslash continuation

863

following spaces. A proper test should test this.

864

865

This function would be more useful for correcting bugs if it reported

866

the first point of failure, like assertEqual, rather than just

867

returning False -- or if it were only used in unittests and not

868

doctest and actually used assertEqual.

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

869

"""

Terry Jan Reedy

2014-02-23 18:00:31 -0500

[diff] [blame]

870

# Get source code and original tokenizations

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

871

if isinstance(f, str):

Terry Jan Reedy

2014-02-23 18:00:31 -0500

[diff] [blame]

872

code = f.encode('utf-8')

873

else:

874

code = f.read()

Brian Curtin

9f5f65c

2010-10-30 21:35:28 +0000

[diff] [blame]

875

f.close()

Terry Jan Reedy

2014-02-23 18:00:31 -0500

[diff] [blame]

876

readline = iter(code.splitlines(keepends=True)).__next__

877

tokens5 = list(tokenize(readline))

878

tokens2 = [tok[:2] for tok in tokens5]

879

# Reproduce tokens2 from pairs

880

bytes_from2 = untokenize(tokens2)

881

readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__

882

tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]

883

# Reproduce tokens2 from 5-tuples

884

bytes_from5 = untokenize(tokens5)

885

readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__

886

tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]

887

# Compare 3 versions

888

return tokens2 == tokens2_from2 == tokens2_from5

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

889

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

890

# This is an example from the docs, set up as a doctest.

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

891

def decistmt(s):

892

"""Substitute Decimals for floats in a string of statements.

893

894

>>> from decimal import Decimal

Georg Brandl

88fc664

2007-02-09 21:28:07 +0000

[diff] [blame]

895

>>> s = 'print(+21.3e-5*-.1234/81.7)'

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

896

>>> decistmt(s)

Georg Brandl

88fc664

2007-02-09 21:28:07 +0000

[diff] [blame]

897

"print (+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7'))"

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

898

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

899

The format of the exponent is inherited from the platform C library.

900

Known cases are "e-007" (Windows) and "e-07" (not Windows). Since

Mark Dickinson

388122d

2010-08-04 20:56:28 +0000

[diff] [blame]

901

we're only showing 11 digits, and the 12th isn't close to 5, the

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

902

rest of the output should be platform-independent.

903

904

>>> exec(s) #doctest: +ELLIPSIS

Mark Dickinson

388122d

2010-08-04 20:56:28 +0000

[diff] [blame]

905

-3.2171603427...e-0...7

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

906

907

Output from calculations with Decimal should be identical across all

908

platforms.

909

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

910

>>> exec(decistmt(s))

911

-3.217160342717258261933904529E-7

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

912

"""

913

result = []

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

914

g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

915

for toknum, tokval, _, _, _ in g:

916

if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens

result.extend([

(NAME, 'Decimal'),

(OP, '('),

(STRING, repr(tokval)),

(OP, ')')

])

else:

result.append((toknum, tokval))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

925

return untokenize(result).decode('utf-8')

926

927

928

class TestTokenizerAdheresToPep0263(TestCase):

929

"""

930

Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.

931

"""

932

933

def _testFile(self, filename):

934

path = os.path.join(os.path.dirname(__file__), filename)

935

return roundtrip(open(path, 'rb'))

936

937

def test_utf8_coding_cookie_and_no_utf8_bom(self):

Ned Deily

2ea6fcc

2011-07-19 16:15:27 -0700

[diff] [blame]

938

f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

939

self.assertTrue(self._testFile(f))

940

941

def test_latin1_coding_cookie_and_utf8_bom(self):

942

"""

943

As per PEP 0263, if a file starts with a utf-8 BOM signature, the only

944

allowed encoding for the comment is 'utf-8'. The text file used in

945

this test starts with a BOM signature, but specifies latin1 as the

946

coding, so verify that a SyntaxError is raised, which matches the

947

behaviour of the interpreter when it encounters a similar condition.

948

"""

949

f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'

Benjamin Peterson

c9c0f20

2009-06-30 23:06:06 +0000

[diff] [blame]

950

self.assertRaises(SyntaxError, self._testFile, f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

951

952

def test_no_coding_cookie_and_utf8_bom(self):

953

f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'

954

self.assertTrue(self._testFile(f))

955

956

def test_utf8_coding_cookie_and_utf8_bom(self):

957

f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'

958

self.assertTrue(self._testFile(f))

959

Florent Xicluna

11f0b41

2012-07-07 12:13:35 +0200

[diff] [blame]

960

def test_bad_coding_cookie(self):

961

self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')

962

self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')

963

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

964

965

class Test_Tokenize(TestCase):

966

967

def test__tokenize_decodes_with_specified_encoding(self):

968

literal = '"ЉЊЈЁЂ"'

969

line = literal.encode('utf-8')

first = False

def readline():

nonlocal first

if not first:

first = True

return line

else:

return b''

# skip the initial encoding token and the end token

980

tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]

981

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

982

self.assertEqual(tokens, expected_tokens,

983

"bytes not decoded with encoding")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

984

985

def test__tokenize_does_not_decode_with_encoding_none(self):

literal = '"ЉЊЈЁЂ"'

first = False

def readline():

nonlocal first

if not first:

first = True

return literal

else:

return b''

# skip the end token

tokens = list(_tokenize(readline, encoding=None))[:-1]

998

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

999

self.assertEqual(tokens, expected_tokens,

1000

"string not tokenized when encoding is None")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1001

1002

1003

class TestDetectEncoding(TestCase):

1004

1005

def get_readline(self, lines):

index = 0

def readline():

nonlocal index

if index == len(lines):

raise StopIteration

line = lines[index]

index += 1

return line

return readline

def test_no_bom_no_encoding_cookie(self):

1017

lines = (

1018

b'# something\n',

1019

b'print(something)\n',

1020

b'do_something(else)\n'

1021

)

1022

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1023

self.assertEqual(encoding, 'utf-8')

1024

self.assertEqual(consumed_lines, list(lines[:2]))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1025

1026

def test_bom_no_cookie(self):

1027

lines = (

1028

b'\xef\xbb\xbf# something\n',

1029

b'print(something)\n',

1030

b'do_something(else)\n'

1031

)

1032

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1033

self.assertEqual(encoding, 'utf-8-sig')

1034

self.assertEqual(consumed_lines,

1035

[b'# something\n', b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1036

1037

def test_cookie_first_line_no_bom(self):

1038

lines = (

1039

b'# -*- coding: latin-1 -*-\n',

1040

b'print(something)\n',

1041

b'do_something(else)\n'

1042

)

1043

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1044

self.assertEqual(encoding, 'iso-8859-1')

1045

self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1046

1047

def test_matched_bom_and_cookie_first_line(self):

1048

lines = (

1049

b'\xef\xbb\xbf# coding=utf-8\n',

1050

b'print(something)\n',

1051

b'do_something(else)\n'

1052

)

1053

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1054

self.assertEqual(encoding, 'utf-8-sig')

1055

self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1056

1057

def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):

1058

lines = (

1059

b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',

1060

b'print(something)\n',

1061

b'do_something(else)\n'

1062

)

1063

readline = self.get_readline(lines)

1064

self.assertRaises(SyntaxError, detect_encoding, readline)

1065

1066

def test_cookie_second_line_no_bom(self):

1067

lines = (

1068

b'#! something\n',

1069

b'# vim: set fileencoding=ascii :\n',

1070

b'print(something)\n',

1071

b'do_something(else)\n'

1072

)

1073

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1074

self.assertEqual(encoding, 'ascii')

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1075

expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1076

self.assertEqual(consumed_lines, expected)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1077

1078

def test_matched_bom_and_cookie_second_line(self):

1079

lines = (

1080

b'\xef\xbb\xbf#! something\n',

1081

b'f# coding=utf-8\n',

1082

b'print(something)\n',

1083

b'do_something(else)\n'

1084

)

1085

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1086

self.assertEqual(encoding, 'utf-8-sig')

1087

self.assertEqual(consumed_lines,

1088

[b'#! something\n', b'f# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1089

1090

def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):

1091

lines = (

1092

b'\xef\xbb\xbf#! something\n',

1093

b'# vim: set fileencoding=ascii :\n',

1094

b'print(something)\n',

1095

b'do_something(else)\n'

1096

)

1097

readline = self.get_readline(lines)

1098

self.assertRaises(SyntaxError, detect_encoding, readline)

1099

Serhiy Storchaka

768c16c

2014-01-09 18:36:09 +0200

[diff] [blame]

1100

def test_cookie_second_line_noncommented_first_line(self):

1101

lines = (

1102

b"print('\xc2\xa3')\n",

1103

b'# vim: set fileencoding=iso8859-15 :\n',

1104

b"print('\xe2\x82\xac')\n"

1105

)

1106

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1107

self.assertEqual(encoding, 'utf-8')

1108

expected = [b"print('\xc2\xa3')\n"]

1109

self.assertEqual(consumed_lines, expected)

1110

1111

def test_cookie_second_line_commented_first_line(self):

1112

lines = (

1113

b"#print('\xc2\xa3')\n",

1114

b'# vim: set fileencoding=iso8859-15 :\n',

1115

b"print('\xe2\x82\xac')\n"

1116

)

1117

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1118

self.assertEqual(encoding, 'iso8859-15')

1119

expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']

1120

self.assertEqual(consumed_lines, expected)

1121

1122

def test_cookie_second_line_empty_first_line(self):

1123

lines = (

1124

b'\n',

1125

b'# vim: set fileencoding=iso8859-15 :\n',

1126

b"print('\xe2\x82\xac')\n"

1127

)

1128

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1129

self.assertEqual(encoding, 'iso8859-15')

1130

expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']

1131

self.assertEqual(consumed_lines, expected)

1132

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1133

def test_latin1_normalization(self):

1134

# See get_normal_name() in tokenizer.c.

1135

encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",

1136

"iso-8859-1-unix", "iso-latin-1-mac")

1137

for encoding in encodings:

1138

for rep in ("-", "_"):

1139

enc = encoding.replace("-", rep)

1140

lines = (b"#!/usr/bin/python\n",

1141

b"# coding: " + enc.encode("ascii") + b"\n",

1142

b"print(things)\n",

1143

b"do_something += 4\n")

1144

rl = self.get_readline(lines)

1145

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1146

self.assertEqual(found, "iso-8859-1")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1147

Martin v. Löwis

63674f4

2012-04-20 14:36:47 +0200

[diff] [blame]

1148

def test_syntaxerror_latin1(self):

1149

# Issue 14629: need to raise SyntaxError if the first

1150

# line(s) have non-UTF-8 characters

1151

lines = (

1152

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1153

)

1154

readline = self.get_readline(lines)

1155

self.assertRaises(SyntaxError, detect_encoding, readline)

1156

1157

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1158

def test_utf8_normalization(self):

1159

# See get_normal_name() in tokenizer.c.

1160

encodings = ("utf-8", "utf-8-mac", "utf-8-unix")

1161

for encoding in encodings:

1162

for rep in ("-", "_"):

1163

enc = encoding.replace("-", rep)

1164

lines = (b"#!/usr/bin/python\n",

1165

b"# coding: " + enc.encode("ascii") + b"\n",

1166

b"1 + 3\n")

1167

rl = self.get_readline(lines)

1168

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1169

self.assertEqual(found, "utf-8")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1170

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1171

def test_short_files(self):

1172

readline = self.get_readline((b'print(something)\n',))

1173

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1174

self.assertEqual(encoding, 'utf-8')

1175

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1176

1177

encoding, consumed_lines = detect_encoding(self.get_readline(()))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1178

self.assertEqual(encoding, 'utf-8')

1179

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1180

1181

readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))

1182

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1183

self.assertEqual(encoding, 'utf-8-sig')

1184

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1185

1186

readline = self.get_readline((b'\xef\xbb\xbf',))

1187

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1188

self.assertEqual(encoding, 'utf-8-sig')

1189

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1190

Benjamin Peterson

433f32c

2008-12-12 01:25:05 +0000

[diff] [blame]

1191

readline = self.get_readline((b'# coding: bad\n',))

1192

self.assertRaises(SyntaxError, detect_encoding, readline)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1193

Serhiy Storchaka

dafea85

2013-09-16 23:51:56 +0300

[diff] [blame]

1194

def test_false_encoding(self):

1195

# Issue 18873: "Encoding" detected in non-comment lines

1196

readline = self.get_readline((b'print("#coding=fake")',))

1197

encoding, consumed_lines = detect_encoding(readline)

1198

self.assertEqual(encoding, 'utf-8')

1199

self.assertEqual(consumed_lines, [b'print("#coding=fake")'])

1200

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1201

def test_open(self):

1202

filename = support.TESTFN + '.py'

1203

self.addCleanup(support.unlink, filename)

1204

1205

# test coding cookie

1206

for encoding in ('iso-8859-15', 'utf-8'):

1207

with open(filename, 'w', encoding=encoding) as fp:

1208

print("# coding: %s" % encoding, file=fp)

1209

print("print('euro:\u20ac')", file=fp)

1210

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1211

self.assertEqual(fp.encoding, encoding)

1212

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1213

1214

# test BOM (no coding cookie)

1215

with open(filename, 'w', encoding='utf-8-sig') as fp:

1216

print("print('euro:\u20ac')", file=fp)

1217

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1218

self.assertEqual(fp.encoding, 'utf-8-sig')

1219

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1220

Brett Cannon

c33f3f2

2012-04-20 13:23:54 -0400

[diff] [blame]

1221

def test_filename_in_exception(self):

1222

# When possible, include the file name in the exception.

1223

path = 'some_file_path'

1224

lines = (

1225

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1226

)

1227

class Bunk:

1228

def __init__(self, lines, path):

self.name = path

self._lines = lines

self._index = 0

def readline(self):

if self._index == len(lines):

1235

raise StopIteration

1236

line = lines[self._index]

self._index += 1

return line

with self.assertRaises(SyntaxError):

1241

ins = Bunk(lines, path)

1242

# Make sure lacking a name isn't an issue.

1243

del ins.name

1244

detect_encoding(ins.readline)

1245

with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):

1246

ins = Bunk(lines, path)

1247

detect_encoding(ins.readline)

1248

Victor Stinner

387729e

2015-05-26 00:43:58 +0200

[diff] [blame]

1249

def test_open_error(self):

1250

# Issue #23840: open() must close the binary file on error

1251

m = BytesIO(b'#coding:xxx')

1252

with mock.patch('tokenize._builtin_open', return_value=m):

1253

self.assertRaises(SyntaxError, tokenize_open, 'foobar')

1254

self.assertTrue(m.closed)

1255

1256

Brett Cannon

c33f3f2

2012-04-20 13:23:54 -0400

[diff] [blame]

1257

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1258

class TestTokenize(TestCase):

1259

1260

def test_tokenize(self):

1261

import tokenize as tokenize_module

1262

encoding = object()

1263

encoding_used = None

1264

def mock_detect_encoding(readline):

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1265

return encoding, [b'first', b'second']

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1266

1267

def mock__tokenize(readline, encoding):

1268

nonlocal encoding_used

1269

encoding_used = encoding

1270

out = []

1271

while True:

1272

next_line = readline()

1273

if next_line:

1274

out.append(next_line)

continue

return out

counter = 0

def mock_readline():

nonlocal counter

counter += 1

if counter == 5:

return b''

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1284

return str(counter).encode()

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1285

1286

orig_detect_encoding = tokenize_module.detect_encoding

1287

orig__tokenize = tokenize_module._tokenize

1288

tokenize_module.detect_encoding = mock_detect_encoding

1289

tokenize_module._tokenize = mock__tokenize

1290

try:

1291

results = tokenize(mock_readline)

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1292

self.assertEqual(list(results),

1293

[b'first', b'second', b'1', b'2', b'3', b'4'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1294

finally:

1295

tokenize_module.detect_encoding = orig_detect_encoding

1296

tokenize_module._tokenize = orig__tokenize

1297

1298

self.assertTrue(encoding_used, encoding)

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

1299

Yury Selivanov

8085b80

2015-05-18 12:50:52 -0400

[diff] [blame]

1300

def test_oneline_defs(self):

1301

buf = []

1302

for i in range(500):

1303

buf.append('def i{i}(): return {i}'.format(i=i))

buf.append('OK')

buf = '\n'.join(buf)

# Test that 500 consequent, one-line defs is OK

1308

toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))

1309

self.assertEqual(toks[-2].string, 'OK') # [-1] is always ENDMARKER

1310

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1311

def assertExactTypeEqual(self, opstr, *optypes):

1312

tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))

1313

num_optypes = len(optypes)

1314

self.assertEqual(len(tokens), 2 + num_optypes)

1315

self.assertEqual(token.tok_name[tokens[0].exact_type],

1316

token.tok_name[ENCODING])

1317

for i in range(num_optypes):

1318

self.assertEqual(token.tok_name[tokens[i + 1].exact_type],

1319

token.tok_name[optypes[i]])

1320

self.assertEqual(token.tok_name[tokens[1 + num_optypes].exact_type],

1321

token.tok_name[token.ENDMARKER])

1322

1323

def test_exact_type(self):

1324

self.assertExactTypeEqual('()', token.LPAR, token.RPAR)

1325

self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)

1326

self.assertExactTypeEqual(':', token.COLON)

1327

self.assertExactTypeEqual(',', token.COMMA)

1328

self.assertExactTypeEqual(';', token.SEMI)

1329

self.assertExactTypeEqual('+', token.PLUS)

1330

self.assertExactTypeEqual('-', token.MINUS)

1331

self.assertExactTypeEqual('*', token.STAR)

1332

self.assertExactTypeEqual('/', token.SLASH)

1333

self.assertExactTypeEqual('|', token.VBAR)

1334

self.assertExactTypeEqual('&', token.AMPER)

1335

self.assertExactTypeEqual('<', token.LESS)

1336

self.assertExactTypeEqual('>', token.GREATER)

1337

self.assertExactTypeEqual('=', token.EQUAL)

1338

self.assertExactTypeEqual('.', token.DOT)

1339

self.assertExactTypeEqual('%', token.PERCENT)

1340

self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)

1341

self.assertExactTypeEqual('==', token.EQEQUAL)

1342

self.assertExactTypeEqual('!=', token.NOTEQUAL)

1343

self.assertExactTypeEqual('<=', token.LESSEQUAL)

1344

self.assertExactTypeEqual('>=', token.GREATEREQUAL)

1345

self.assertExactTypeEqual('~', token.TILDE)

1346

self.assertExactTypeEqual('^', token.CIRCUMFLEX)

1347

self.assertExactTypeEqual('<<', token.LEFTSHIFT)

1348

self.assertExactTypeEqual('>>', token.RIGHTSHIFT)

1349

self.assertExactTypeEqual('**', token.DOUBLESTAR)

1350

self.assertExactTypeEqual('+=', token.PLUSEQUAL)

1351

self.assertExactTypeEqual('-=', token.MINEQUAL)

1352

self.assertExactTypeEqual('*=', token.STAREQUAL)

1353

self.assertExactTypeEqual('/=', token.SLASHEQUAL)

1354

self.assertExactTypeEqual('%=', token.PERCENTEQUAL)

1355

self.assertExactTypeEqual('&=', token.AMPEREQUAL)

1356

self.assertExactTypeEqual('|=', token.VBAREQUAL)

1357

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1358

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1359

self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)

1360

self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)

1361

self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)

1362

self.assertExactTypeEqual('//', token.DOUBLESLASH)

1363

self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)

1364

self.assertExactTypeEqual('@', token.AT)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

1365

self.assertExactTypeEqual('@=', token.ATEQUAL)

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1366

1367

self.assertExactTypeEqual('a**2+b**2==c**2',

1368

NAME, token.DOUBLESTAR, NUMBER,

1369

token.PLUS,

1370

NAME, token.DOUBLESTAR, NUMBER,

1371

token.EQEQUAL,

1372

NAME, token.DOUBLESTAR, NUMBER)

1373

self.assertExactTypeEqual('{1, 2, 3}',

1374

token.LBRACE,

1375

token.NUMBER, token.COMMA,

1376

token.NUMBER, token.COMMA,

1377

token.NUMBER,

1378

token.RBRACE)

1379

self.assertExactTypeEqual('^(x & 0x1)',

1380

token.CIRCUMFLEX,

1381

token.LPAR,

1382

token.NAME, token.AMPER, token.NUMBER,

1383

token.RPAR)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1384

Ezio Melotti

fafa8b7

2012-11-03 17:46:51 +0200

[diff] [blame]

1385

def test_pathological_trailing_whitespace(self):

1386

# See http://bugs.python.org/issue16152

1387

self.assertExactTypeEqual('@ ', token.AT)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1388

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1389

class UntokenizeTest(TestCase):

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1390

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1391

def test_bad_input_order(self):

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1392

# raise if previous row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

u = Untokenizer()

u.prev_row = 2

u.prev_col = 2

with self.assertRaises(ValueError) as cm:

1397

u.add_whitespace((1,3))

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1398

self.assertEqual(cm.exception.args[0],

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1399

'start (1,3) precedes previous end (2,2)')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1400

# raise if previous column in row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1401

self.assertRaises(ValueError, u.add_whitespace, (2,1))

1402

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1403

def test_backslash_continuation(self):

1404

# The problem is that <whitespace>\<newline> leaves no token

u = Untokenizer()

u.prev_row = 1

u.prev_col = 1

u.tokens = []

u.add_whitespace((2, 0))

1410

self.assertEqual(u.tokens, ['\\\n'])

1411

u.prev_row = 2

1412

u.add_whitespace((4, 4))

1413

self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])

1414

self.assertTrue(roundtrip('a\n b\n c\n \\\n c\n'))

1415

Terry Jan Reedy

5b8d2c3

2014-02-17 23:12:16 -0500

[diff] [blame]

1416

def test_iter_compat(self):

1417

u = Untokenizer()

1418

token = (NAME, 'Hello')

1419

tokens = [(ENCODING, 'utf-8'), token]

1420

u.compat(token, iter([]))

1421

self.assertEqual(u.tokens, ["Hello "])

1422

u = Untokenizer()

1423

self.assertEqual(u.untokenize(iter([token])), 'Hello ')

1424

u = Untokenizer()

1425

self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')

1426

self.assertEqual(u.encoding, 'utf-8')

1427

self.assertEqual(untokenize(iter(tokens)), b'Hello ')

1428

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1429

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1430

__test__ = {"doctests" : doctests, 'decistmt': decistmt}

1431

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

1432

def test_main():

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1433

from test import test_tokenize

Benjamin Peterson

ee8712c

2008-05-20 21:35:26 +0000

[diff] [blame]

1434

support.run_doctest(test_tokenize, True)

1435

support.run_unittest(TestTokenizerAdheresToPep0263)

1436

support.run_unittest(Test_Tokenize)

1437

support.run_unittest(TestDetectEncoding)

1438

support.run_unittest(TestTokenize)

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1439

support.run_unittest(UntokenizeTest)

Neal Norwitz

c150536

2006-12-28 06:47:50 +0000

[diff] [blame]

1440

Thomas Wouters