Blame - Lib/test/test_tokenize.py - platform/external/python/cpython3

2015-10-06 18:16:28 +0300

[diff] [blame]

1

from test import support

2

from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,

3

STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,

Thomas Kluyver

c56b17b

2018-06-05 19:26:39 +0200

[diff] [blame^]

4

open as tokenize_open, Untokenizer, generate_tokens)

5

from io import BytesIO, StringIO

Stéphane Wirtel

90addd6

2017-07-25 15:33:53 +0200

[diff] [blame]

6

import unittest

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

7

from unittest import TestCase, mock

8

from test.test_grammar import (VALID_UNDERSCORE_LITERALS,

9

INVALID_UNDERSCORE_LITERALS)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

10

import os

11

import token

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

12

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

13

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

14

class TokenizeTest(TestCase):

15

# Tests for the tokenize module.

Jason R. Coombs

7cf3638

2015-06-20 19:13:50 -0400

[diff] [blame]

16

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

17

# The tests can be really simple. Given a small fragment of source

18

# code, print out a table with tokens. The ENDMARKER is omitted for

19

# brevity.

20

21

def check_tokenize(self, s, expected):

22

# Format the tokens in s in a table format.

23

# The ENDMARKER is omitted.

24

result = []

25

f = BytesIO(s.encode('utf-8'))

26

for type, token, start, end, line in tokenize(f.readline):

27

if type == ENDMARKER:

28

break

29

type = tok_name[type]

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

30

result.append(f" {type:10} {token!r:13} {start} {end}")

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

31

self.assertEqual(result,

32

[" ENCODING 'utf-8' (0, 0) (0, 0)"] +

33

expected.rstrip().splitlines())

34

35

def test_basic(self):

36

self.check_tokenize("1 + 1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

37

NUMBER '1' (1, 0) (1, 1)

38

OP '+' (1, 2) (1, 3)

39

NUMBER '1' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

40

""")

41

self.check_tokenize("if False:\n"

42

" # NL\n"

Albert-Jan Nijburg

c471ca4

2017-05-24 12:31:57 +0100

[diff] [blame]

43

" \n"

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

44

" True = False # NEWLINE\n", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

45

NAME 'if' (1, 0) (1, 2)

46

NAME 'False' (1, 3) (1, 8)

47

OP ':' (1, 8) (1, 9)

48

NEWLINE '\\n' (1, 9) (1, 10)

49

COMMENT '# NL' (2, 4) (2, 8)

50

NL '\\n' (2, 8) (2, 9)

Albert-Jan Nijburg

c471ca4

2017-05-24 12:31:57 +0100

[diff] [blame]

51

NL '\\n' (3, 4) (3, 5)

52

INDENT ' ' (4, 0) (4, 4)

53

NAME 'True' (4, 4) (4, 8)

54

OP '=' (4, 9) (4, 10)

55

NAME 'False' (4, 11) (4, 16)

56

COMMENT '# NEWLINE' (4, 17) (4, 26)

57

NEWLINE '\\n' (4, 26) (4, 27)

58

DEDENT '' (5, 0) (5, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

59

""")

60

indent_error_file = b"""\

def k(x):

x += 2

x += 5

"""

readline = BytesIO(indent_error_file).readline

66

with self.assertRaisesRegex(IndentationError,

67

"unindent does not match any "

68

"outer indentation level"):

69

for tok in tokenize(readline):

70

pass

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

71

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

72

def test_int(self):

73

# Ordinary integers and binary operators

74

self.check_tokenize("0xff <= 255", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

75

NUMBER '0xff' (1, 0) (1, 4)

76

OP '<=' (1, 5) (1, 7)

77

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

78

""")

79

self.check_tokenize("0b10 <= 255", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

80

NUMBER '0b10' (1, 0) (1, 4)

81

OP '<=' (1, 5) (1, 7)

82

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

83

""")

84

self.check_tokenize("0o123 <= 0O123", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

85

NUMBER '0o123' (1, 0) (1, 5)

86

OP '<=' (1, 6) (1, 8)

87

NUMBER '0O123' (1, 9) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

88

""")

89

self.check_tokenize("1234567 > ~0x15", """\

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

90

NUMBER '1234567' (1, 0) (1, 7)

91

OP '>' (1, 8) (1, 9)

92

OP '~' (1, 10) (1, 11)

93

NUMBER '0x15' (1, 11) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

94

""")

95

self.check_tokenize("2134568 != 1231515", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

96

NUMBER '2134568' (1, 0) (1, 7)

97

OP '!=' (1, 8) (1, 10)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

98

NUMBER '1231515' (1, 11) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

99

""")

100

self.check_tokenize("(-124561-1) & 200000000", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

101

OP '(' (1, 0) (1, 1)

102

OP '-' (1, 1) (1, 2)

103

NUMBER '124561' (1, 2) (1, 8)

104

OP '-' (1, 8) (1, 9)

105

NUMBER '1' (1, 9) (1, 10)

106

OP ')' (1, 10) (1, 11)

107

OP '&' (1, 12) (1, 13)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

108

NUMBER '200000000' (1, 14) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

109

""")

110

self.check_tokenize("0xdeadbeef != -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

111

NUMBER '0xdeadbeef' (1, 0) (1, 10)

112

OP '!=' (1, 11) (1, 13)

113

OP '-' (1, 14) (1, 15)

114

NUMBER '1' (1, 15) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

115

""")

116

self.check_tokenize("0xdeadc0de & 12345", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

117

NUMBER '0xdeadc0de' (1, 0) (1, 10)

118

OP '&' (1, 11) (1, 12)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

119

NUMBER '12345' (1, 13) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

120

""")

121

self.check_tokenize("0xFF & 0x15 | 1234", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

122

NUMBER '0xFF' (1, 0) (1, 4)

123

OP '&' (1, 5) (1, 6)

124

NUMBER '0x15' (1, 7) (1, 11)

125

OP '|' (1, 12) (1, 13)

126

NUMBER '1234' (1, 14) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

127

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

128

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

129

def test_long(self):

130

# Long integers

131

self.check_tokenize("x = 0", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

132

NAME 'x' (1, 0) (1, 1)

133

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

134

NUMBER '0' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

135

""")

136

self.check_tokenize("x = 0xfffffffffff", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

137

NAME 'x' (1, 0) (1, 1)

138

OP '=' (1, 2) (1, 3)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

139

NUMBER '0xfffffffffff' (1, 4) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

140

""")

141

self.check_tokenize("x = 123141242151251616110", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

142

NAME 'x' (1, 0) (1, 1)

143

OP '=' (1, 2) (1, 3)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

144

NUMBER '123141242151251616110' (1, 4) (1, 25)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

145

""")

146

self.check_tokenize("x = -15921590215012591", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

147

NAME 'x' (1, 0) (1, 1)

148

OP '=' (1, 2) (1, 3)

149

OP '-' (1, 4) (1, 5)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

150

NUMBER '15921590215012591' (1, 5) (1, 22)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

151

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

152

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

153

def test_float(self):

154

# Floating point numbers

155

self.check_tokenize("x = 3.14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

156

NAME 'x' (1, 0) (1, 1)

157

OP '=' (1, 2) (1, 3)

158

NUMBER '3.14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

159

""")

160

self.check_tokenize("x = 314159.", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

161

NAME 'x' (1, 0) (1, 1)

162

OP '=' (1, 2) (1, 3)

163

NUMBER '314159.' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

164

""")

165

self.check_tokenize("x = .314159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

166

NAME 'x' (1, 0) (1, 1)

167

OP '=' (1, 2) (1, 3)

168

NUMBER '.314159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

169

""")

170

self.check_tokenize("x = 3e14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

171

NAME 'x' (1, 0) (1, 1)

172

OP '=' (1, 2) (1, 3)

173

NUMBER '3e14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

174

""")

175

self.check_tokenize("x = 3E123", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

176

NAME 'x' (1, 0) (1, 1)

177

OP '=' (1, 2) (1, 3)

178

NUMBER '3E123' (1, 4) (1, 9)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

179

""")

180

self.check_tokenize("x+y = 3e-1230", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

181

NAME 'x' (1, 0) (1, 1)

182

OP '+' (1, 1) (1, 2)

183

NAME 'y' (1, 2) (1, 3)

184

OP '=' (1, 4) (1, 5)

185

NUMBER '3e-1230' (1, 6) (1, 13)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

186

""")

187

self.check_tokenize("x = 3.14e159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

188

NAME 'x' (1, 0) (1, 1)

189

OP '=' (1, 2) (1, 3)

190

NUMBER '3.14e159' (1, 4) (1, 12)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

191

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

192

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

193

def test_underscore_literals(self):

194

def number_token(s):

195

f = BytesIO(s.encode('utf-8'))

196

for toktype, token, start, end, line in tokenize(f.readline):

197

if toktype == NUMBER:

198

return token

199

return 'invalid token'

200

for lit in VALID_UNDERSCORE_LITERALS:

201

if '(' in lit:

202

# this won't work with compound complex inputs

203

continue

204

self.assertEqual(number_token(lit), lit)

205

for lit in INVALID_UNDERSCORE_LITERALS:

206

self.assertNotEqual(number_token(lit), lit)

207

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

208

def test_string(self):

209

# String literals

210

self.check_tokenize("x = ''; y = \"\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

211

NAME 'x' (1, 0) (1, 1)

212

OP '=' (1, 2) (1, 3)

213

STRING "''" (1, 4) (1, 6)

214

OP ';' (1, 6) (1, 7)

215

NAME 'y' (1, 8) (1, 9)

216

OP '=' (1, 10) (1, 11)

217

STRING '""' (1, 12) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

218

""")

219

self.check_tokenize("x = '\"'; y = \"'\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

220

NAME 'x' (1, 0) (1, 1)

221

OP '=' (1, 2) (1, 3)

222

STRING '\\'"\\'' (1, 4) (1, 7)

223

OP ';' (1, 7) (1, 8)

224

NAME 'y' (1, 9) (1, 10)

225

OP '=' (1, 11) (1, 12)

226

STRING '"\\'"' (1, 13) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

227

""")

228

self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

229

NAME 'x' (1, 0) (1, 1)

230

OP '=' (1, 2) (1, 3)

231

STRING '"doesn\\'t "' (1, 4) (1, 14)

232

NAME 'shrink' (1, 14) (1, 20)

233

STRING '", does it"' (1, 20) (1, 31)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

234

""")

235

self.check_tokenize("x = 'abc' + 'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

236

NAME 'x' (1, 0) (1, 1)

237

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

238

STRING "'abc'" (1, 4) (1, 9)

239

OP '+' (1, 10) (1, 11)

240

STRING "'ABC'" (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

241

""")

242

self.check_tokenize('y = "ABC" + "ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

243

NAME 'y' (1, 0) (1, 1)

244

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

245

STRING '"ABC"' (1, 4) (1, 9)

246

OP '+' (1, 10) (1, 11)

247

STRING '"ABC"' (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

248

""")

249

self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

250

NAME 'x' (1, 0) (1, 1)

251

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

252

STRING "r'abc'" (1, 4) (1, 10)

253

OP '+' (1, 11) (1, 12)

254

STRING "r'ABC'" (1, 13) (1, 19)

255

OP '+' (1, 20) (1, 21)

256

STRING "R'ABC'" (1, 22) (1, 28)

257

OP '+' (1, 29) (1, 30)

258

STRING "R'ABC'" (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

259

""")

260

self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

261

NAME 'y' (1, 0) (1, 1)

262

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

263

STRING 'r"abc"' (1, 4) (1, 10)

264

OP '+' (1, 11) (1, 12)

265

STRING 'r"ABC"' (1, 13) (1, 19)

266

OP '+' (1, 20) (1, 21)

267

STRING 'R"ABC"' (1, 22) (1, 28)

268

OP '+' (1, 29) (1, 30)

269

STRING 'R"ABC"' (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

270

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

271

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

272

self.check_tokenize("u'abc' + U'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

273

STRING "u'abc'" (1, 0) (1, 6)

274

OP '+' (1, 7) (1, 8)

275

STRING "U'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

276

""")

277

self.check_tokenize('u"abc" + U"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

278

STRING 'u"abc"' (1, 0) (1, 6)

279

OP '+' (1, 7) (1, 8)

280

STRING 'U"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

281

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

282

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

283

self.check_tokenize("b'abc' + B'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

284

STRING "b'abc'" (1, 0) (1, 6)

285

OP '+' (1, 7) (1, 8)

286

STRING "B'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

287

""")

288

self.check_tokenize('b"abc" + B"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

289

STRING 'b"abc"' (1, 0) (1, 6)

290

OP '+' (1, 7) (1, 8)

291

STRING 'B"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

292

""")

293

self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

294

STRING "br'abc'" (1, 0) (1, 7)

295

OP '+' (1, 8) (1, 9)

296

STRING "bR'abc'" (1, 10) (1, 17)

297

OP '+' (1, 18) (1, 19)

298

STRING "Br'abc'" (1, 20) (1, 27)

299

OP '+' (1, 28) (1, 29)

300

STRING "BR'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

301

""")

302

self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

303

STRING 'br"abc"' (1, 0) (1, 7)

304

OP '+' (1, 8) (1, 9)

305

STRING 'bR"abc"' (1, 10) (1, 17)

306

OP '+' (1, 18) (1, 19)

307

STRING 'Br"abc"' (1, 20) (1, 27)

308

OP '+' (1, 28) (1, 29)

309

STRING 'BR"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

310

""")

311

self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

312

STRING "rb'abc'" (1, 0) (1, 7)

313

OP '+' (1, 8) (1, 9)

314

STRING "rB'abc'" (1, 10) (1, 17)

315

OP '+' (1, 18) (1, 19)

316

STRING "Rb'abc'" (1, 20) (1, 27)

317

OP '+' (1, 28) (1, 29)

318

STRING "RB'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

319

""")

320

self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

321

STRING 'rb"abc"' (1, 0) (1, 7)

322

OP '+' (1, 8) (1, 9)

323

STRING 'rB"abc"' (1, 10) (1, 17)

324

OP '+' (1, 18) (1, 19)

325

STRING 'Rb"abc"' (1, 20) (1, 27)

326

OP '+' (1, 28) (1, 29)

327

STRING 'RB"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

328

""")

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

329

# Check 0, 1, and 2 character string prefixes.

330

self.check_tokenize(r'"a\

331

de\

332

fg"', """\

333

STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)

334

""")

335

self.check_tokenize(r'u"a\

336

de"', """\

337

STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3)

338

""")

339

self.check_tokenize(r'rb"a\

340

d"', """\

341

STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2)

342

""")

343

self.check_tokenize(r'"""a\

344

b"""', """\

345

STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)

346

""")

347

self.check_tokenize(r'u"""a\

348

b"""', """\

349

STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)

350

""")

351

self.check_tokenize(r'rb"""a\

352

b\

353

c"""', """\

354

STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)

355

""")

Eric V. Smith

1c8222c

2015-10-26 04:37:55 -0400

[diff] [blame]

356

self.check_tokenize('f"abc"', """\

357

STRING 'f"abc"' (1, 0) (1, 6)

358

""")

359

self.check_tokenize('fR"a{b}c"', """\

360

STRING 'fR"a{b}c"' (1, 0) (1, 9)

361

""")

362

self.check_tokenize('f"""abc"""', """\

363

STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10)

364

""")

365

self.check_tokenize(r'f"abc\

366

def"', """\

367

STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4)

368

""")

369

self.check_tokenize(r'Rf"abc\

370

def"', """\

371

STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)

372

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

373

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

374

def test_function(self):

375

self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

376

NAME 'def' (1, 0) (1, 3)

377

NAME 'd22' (1, 4) (1, 7)

378

OP '(' (1, 7) (1, 8)

379

NAME 'a' (1, 8) (1, 9)

380

OP ',' (1, 9) (1, 10)

381

NAME 'b' (1, 11) (1, 12)

382

OP ',' (1, 12) (1, 13)

383

NAME 'c' (1, 14) (1, 15)

384

OP '=' (1, 15) (1, 16)

385

NUMBER '2' (1, 16) (1, 17)

386

OP ',' (1, 17) (1, 18)

387

NAME 'd' (1, 19) (1, 20)

388

OP '=' (1, 20) (1, 21)

389

NUMBER '2' (1, 21) (1, 22)

390

OP ',' (1, 22) (1, 23)

391

OP '*' (1, 24) (1, 25)

392

NAME 'k' (1, 25) (1, 26)

393

OP ')' (1, 26) (1, 27)

394

OP ':' (1, 27) (1, 28)

395

NAME 'pass' (1, 29) (1, 33)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

396

""")

397

self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

398

NAME 'def' (1, 0) (1, 3)

399

NAME 'd01v_' (1, 4) (1, 9)

400

OP '(' (1, 9) (1, 10)

401

NAME 'a' (1, 10) (1, 11)

402

OP '=' (1, 11) (1, 12)

403

NUMBER '1' (1, 12) (1, 13)

404

OP ',' (1, 13) (1, 14)

405

OP '*' (1, 15) (1, 16)

406

NAME 'k' (1, 16) (1, 17)

407

OP ',' (1, 17) (1, 18)

408

OP '**' (1, 19) (1, 21)

409

NAME 'w' (1, 21) (1, 22)

410

OP ')' (1, 22) (1, 23)

411

OP ':' (1, 23) (1, 24)

412

NAME 'pass' (1, 25) (1, 29)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

413

""")

Jim Fasarakis-Hilliard

d4914e9

2017-03-14 22:16:15 +0200

[diff] [blame]

414

self.check_tokenize("def d23(a: str, b: int=3) -> int: pass", """\

415

NAME 'def' (1, 0) (1, 3)

416

NAME 'd23' (1, 4) (1, 7)

417

OP '(' (1, 7) (1, 8)

418

NAME 'a' (1, 8) (1, 9)

419

OP ':' (1, 9) (1, 10)

420

NAME 'str' (1, 11) (1, 14)

421

OP ',' (1, 14) (1, 15)

422

NAME 'b' (1, 16) (1, 17)

423

OP ':' (1, 17) (1, 18)

424

NAME 'int' (1, 19) (1, 22)

425

OP '=' (1, 22) (1, 23)

426

NUMBER '3' (1, 23) (1, 24)

427

OP ')' (1, 24) (1, 25)

428

OP '->' (1, 26) (1, 28)

429

NAME 'int' (1, 29) (1, 32)

430

OP ':' (1, 32) (1, 33)

431

NAME 'pass' (1, 34) (1, 38)

432

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

433

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

434

def test_comparison(self):

435

# Comparison

436

self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "

437

"1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

438

NAME 'if' (1, 0) (1, 2)

439

NUMBER '1' (1, 3) (1, 4)

440

OP '<' (1, 5) (1, 6)

441

NUMBER '1' (1, 7) (1, 8)

442

OP '>' (1, 9) (1, 10)

443

NUMBER '1' (1, 11) (1, 12)

444

OP '==' (1, 13) (1, 15)

445

NUMBER '1' (1, 16) (1, 17)

446

OP '>=' (1, 18) (1, 20)

447

NUMBER '5' (1, 21) (1, 22)

448

OP '<=' (1, 23) (1, 25)

449

NUMBER '0x15' (1, 26) (1, 30)

450

OP '<=' (1, 31) (1, 33)

451

NUMBER '0x12' (1, 34) (1, 38)

452

OP '!=' (1, 39) (1, 41)

453

NUMBER '1' (1, 42) (1, 43)

454

NAME 'and' (1, 44) (1, 47)

455

NUMBER '5' (1, 48) (1, 49)

456

NAME 'in' (1, 50) (1, 52)

457

NUMBER '1' (1, 53) (1, 54)

458

NAME 'not' (1, 55) (1, 58)

459

NAME 'in' (1, 59) (1, 61)

460

NUMBER '1' (1, 62) (1, 63)

461

NAME 'is' (1, 64) (1, 66)

462

NUMBER '1' (1, 67) (1, 68)

463

NAME 'or' (1, 69) (1, 71)

464

NUMBER '5' (1, 72) (1, 73)

465

NAME 'is' (1, 74) (1, 76)

466

NAME 'not' (1, 77) (1, 80)

467

NUMBER '1' (1, 81) (1, 82)

468

OP ':' (1, 82) (1, 83)

469

NAME 'pass' (1, 84) (1, 88)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

470

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

471

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

472

def test_shift(self):

473

# Shift

474

self.check_tokenize("x = 1 << 1 >> 5", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

475

NAME 'x' (1, 0) (1, 1)

476

OP '=' (1, 2) (1, 3)

477

NUMBER '1' (1, 4) (1, 5)

478

OP '<<' (1, 6) (1, 8)

479

NUMBER '1' (1, 9) (1, 10)

480

OP '>>' (1, 11) (1, 13)

481

NUMBER '5' (1, 14) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

482

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

483

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

484

def test_additive(self):

485

# Additive

486

self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

487

NAME 'x' (1, 0) (1, 1)

488

OP '=' (1, 2) (1, 3)

489

NUMBER '1' (1, 4) (1, 5)

490

OP '-' (1, 6) (1, 7)

491

NAME 'y' (1, 8) (1, 9)

492

OP '+' (1, 10) (1, 11)

493

NUMBER '15' (1, 12) (1, 14)

494

OP '-' (1, 15) (1, 16)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

495

NUMBER '1' (1, 17) (1, 18)

496

OP '+' (1, 19) (1, 20)

497

NUMBER '0x124' (1, 21) (1, 26)

498

OP '+' (1, 27) (1, 28)

499

NAME 'z' (1, 29) (1, 30)

500

OP '+' (1, 31) (1, 32)

501

NAME 'a' (1, 33) (1, 34)

502

OP '[' (1, 34) (1, 35)

503

NUMBER '5' (1, 35) (1, 36)

504

OP ']' (1, 36) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

505

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

506

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

507

def test_multiplicative(self):

508

# Multiplicative

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

509

self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

510

NAME 'x' (1, 0) (1, 1)

511

OP '=' (1, 2) (1, 3)

512

NUMBER '1' (1, 4) (1, 5)

513

OP '//' (1, 5) (1, 7)

514

NUMBER '1' (1, 7) (1, 8)

515

OP '*' (1, 8) (1, 9)

516

NUMBER '1' (1, 9) (1, 10)

517

OP '/' (1, 10) (1, 11)

518

NUMBER '5' (1, 11) (1, 12)

519

OP '*' (1, 12) (1, 13)

520

NUMBER '12' (1, 13) (1, 15)

521

OP '%' (1, 15) (1, 16)

522

NUMBER '0x12' (1, 16) (1, 20)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

523

OP '@' (1, 20) (1, 21)

524

NUMBER '42' (1, 21) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

525

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

526

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

527

def test_unary(self):

528

# Unary

529

self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

530

OP '~' (1, 0) (1, 1)

531

NUMBER '1' (1, 1) (1, 2)

532

OP '^' (1, 3) (1, 4)

533

NUMBER '1' (1, 5) (1, 6)

534

OP '&' (1, 7) (1, 8)

535

NUMBER '1' (1, 9) (1, 10)

536

OP '|' (1, 11) (1, 12)

537

NUMBER '1' (1, 12) (1, 13)

538

OP '^' (1, 14) (1, 15)

539

OP '-' (1, 16) (1, 17)

540

NUMBER '1' (1, 17) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

541

""")

542

self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

543

OP '-' (1, 0) (1, 1)

544

NUMBER '1' (1, 1) (1, 2)

545

OP '*' (1, 2) (1, 3)

546

NUMBER '1' (1, 3) (1, 4)

547

OP '/' (1, 4) (1, 5)

548

NUMBER '1' (1, 5) (1, 6)

549

OP '+' (1, 6) (1, 7)

550

NUMBER '1' (1, 7) (1, 8)

551

OP '*' (1, 8) (1, 9)

552

NUMBER '1' (1, 9) (1, 10)

553

OP '//' (1, 10) (1, 12)

554

NUMBER '1' (1, 12) (1, 13)

555

OP '-' (1, 14) (1, 15)

556

OP '-' (1, 16) (1, 17)

557

OP '-' (1, 17) (1, 18)

558

OP '-' (1, 18) (1, 19)

559

NUMBER '1' (1, 19) (1, 20)

560

OP '**' (1, 20) (1, 22)

561

NUMBER '1' (1, 22) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

562

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

563

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

564

def test_selector(self):

565

# Selector

566

self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

567

NAME 'import' (1, 0) (1, 6)

568

NAME 'sys' (1, 7) (1, 10)

569

OP ',' (1, 10) (1, 11)

570

NAME 'time' (1, 12) (1, 16)

571

NEWLINE '\\n' (1, 16) (1, 17)

572

NAME 'x' (2, 0) (2, 1)

573

OP '=' (2, 2) (2, 3)

574

NAME 'sys' (2, 4) (2, 7)

575

OP '.' (2, 7) (2, 8)

576

NAME 'modules' (2, 8) (2, 15)

577

OP '[' (2, 15) (2, 16)

578

STRING "'time'" (2, 16) (2, 22)

579

OP ']' (2, 22) (2, 23)

580

OP '.' (2, 23) (2, 24)

581

NAME 'time' (2, 24) (2, 28)

582

OP '(' (2, 28) (2, 29)

583

OP ')' (2, 29) (2, 30)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

584

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

585

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

586

def test_method(self):

587

# Methods

588

self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

589

OP '@' (1, 0) (1, 1)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

590

NAME 'staticmethod' (1, 1) (1, 13)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

591

NEWLINE '\\n' (1, 13) (1, 14)

592

NAME 'def' (2, 0) (2, 3)

593

NAME 'foo' (2, 4) (2, 7)

594

OP '(' (2, 7) (2, 8)

595

NAME 'x' (2, 8) (2, 9)

596

OP ',' (2, 9) (2, 10)

597

NAME 'y' (2, 10) (2, 11)

598

OP ')' (2, 11) (2, 12)

599

OP ':' (2, 12) (2, 13)

600

NAME 'pass' (2, 14) (2, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

601

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

602

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

603

def test_tabs(self):

604

# Evil tabs

605

self.check_tokenize("def f():\n"

606

"\tif x\n"

607

" \tpass", """\

Benjamin Peterson

a0dfa82

2009-11-13 02:25:08 +0000

[diff] [blame]

608

NAME 'def' (1, 0) (1, 3)

609

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

614

INDENT '\\t' (2, 0) (2, 1)

615

NAME 'if' (2, 1) (2, 3)

616

NAME 'x' (2, 4) (2, 5)

617

NEWLINE '\\n' (2, 5) (2, 6)

618

INDENT ' \\t' (3, 0) (3, 9)

619

NAME 'pass' (3, 9) (3, 13)

620

DEDENT '' (4, 0) (4, 0)

621

DEDENT '' (4, 0) (4, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

622

""")

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

623

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

624

def test_non_ascii_identifiers(self):

625

# Non-ascii identifiers

626

self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

627

NAME 'Örter' (1, 0) (1, 5)

628

OP '=' (1, 6) (1, 7)

629

STRING "'places'" (1, 8) (1, 16)

630

NEWLINE '\\n' (1, 16) (1, 17)

631

NAME 'grün' (2, 0) (2, 4)

632

OP '=' (2, 5) (2, 6)

633

STRING "'green'" (2, 7) (2, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

634

""")

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

635

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

636

def test_unicode(self):

637

# Legacy unicode literals:

638

self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

639

NAME 'Örter' (1, 0) (1, 5)

640

OP '=' (1, 6) (1, 7)

641

STRING "u'places'" (1, 8) (1, 17)

642

NEWLINE '\\n' (1, 17) (1, 18)

643

NAME 'grün' (2, 0) (2, 4)

644

OP '=' (2, 5) (2, 6)

Christian Heimes

0b3847d

2012-06-20 11:17:58 +0200

[diff] [blame]

645

STRING "U'green'" (2, 7) (2, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

646

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

647

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

648

def test_async(self):

649

# Async/await extension:

650

self.check_tokenize("async = 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

651

NAME 'async' (1, 0) (1, 5)

652

OP '=' (1, 6) (1, 7)

653

NUMBER '1' (1, 8) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

654

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

655

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

656

self.check_tokenize("a = (async = 1)", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

657

NAME 'a' (1, 0) (1, 1)

658

OP '=' (1, 2) (1, 3)

659

OP '(' (1, 4) (1, 5)

660

NAME 'async' (1, 5) (1, 10)

661

OP '=' (1, 11) (1, 12)

662

NUMBER '1' (1, 13) (1, 14)

663

OP ')' (1, 14) (1, 15)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

664

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

665

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

666

self.check_tokenize("async()", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

667

NAME 'async' (1, 0) (1, 5)

668

OP '(' (1, 5) (1, 6)

669

OP ')' (1, 6) (1, 7)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

670

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

671

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

672

self.check_tokenize("class async(Bar):pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

673

NAME 'class' (1, 0) (1, 5)

674

NAME 'async' (1, 6) (1, 11)

675

OP '(' (1, 11) (1, 12)

676

NAME 'Bar' (1, 12) (1, 15)

677

OP ')' (1, 15) (1, 16)

678

OP ':' (1, 16) (1, 17)

679

NAME 'pass' (1, 17) (1, 21)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

680

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

681

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

682

self.check_tokenize("class async:pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

683

NAME 'class' (1, 0) (1, 5)

684

NAME 'async' (1, 6) (1, 11)

685

OP ':' (1, 11) (1, 12)

686

NAME 'pass' (1, 12) (1, 16)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

687

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

688

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

689

self.check_tokenize("await = 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

690

NAME 'await' (1, 0) (1, 5)

691

OP '=' (1, 6) (1, 7)

692

NUMBER '1' (1, 8) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

693

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

694

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

695

self.check_tokenize("foo.async", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

696

NAME 'foo' (1, 0) (1, 3)

697

OP '.' (1, 3) (1, 4)

698

NAME 'async' (1, 4) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

699

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

700

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

701

self.check_tokenize("async for a in b: pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

702

NAME 'async' (1, 0) (1, 5)

703

NAME 'for' (1, 6) (1, 9)

704

NAME 'a' (1, 10) (1, 11)

705

NAME 'in' (1, 12) (1, 14)

706

NAME 'b' (1, 15) (1, 16)

707

OP ':' (1, 16) (1, 17)

708

NAME 'pass' (1, 18) (1, 22)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

709

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

710

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

711

self.check_tokenize("async with a as b: pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

712

NAME 'async' (1, 0) (1, 5)

713

NAME 'with' (1, 6) (1, 10)

714

NAME 'a' (1, 11) (1, 12)

715

NAME 'as' (1, 13) (1, 15)

716

NAME 'b' (1, 16) (1, 17)

717

OP ':' (1, 17) (1, 18)

718

NAME 'pass' (1, 19) (1, 23)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

719

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

720

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

721

self.check_tokenize("async.foo", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

722

NAME 'async' (1, 0) (1, 5)

723

OP '.' (1, 5) (1, 6)

724

NAME 'foo' (1, 6) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

725

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

726

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

727

self.check_tokenize("async", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

728

NAME 'async' (1, 0) (1, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

729

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

730

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

731

self.check_tokenize("async\n#comment\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

732

NAME 'async' (1, 0) (1, 5)

733

NEWLINE '\\n' (1, 5) (1, 6)

734

COMMENT '#comment' (2, 0) (2, 8)

735

NL '\\n' (2, 8) (2, 9)

736

NAME 'await' (3, 0) (3, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

737

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

738

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

739

self.check_tokenize("async\n...\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

740

NAME 'async' (1, 0) (1, 5)

741

NEWLINE '\\n' (1, 5) (1, 6)

742

OP '...' (2, 0) (2, 3)

743

NEWLINE '\\n' (2, 3) (2, 4)

744

NAME 'await' (3, 0) (3, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

745

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

746

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

747

self.check_tokenize("async\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

748

NAME 'async' (1, 0) (1, 5)

749

NEWLINE '\\n' (1, 5) (1, 6)

750

NAME 'await' (2, 0) (2, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

751

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

752

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

753

self.check_tokenize("foo.async + 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

754

NAME 'foo' (1, 0) (1, 3)

755

OP '.' (1, 3) (1, 4)

756

NAME 'async' (1, 4) (1, 9)

757

OP '+' (1, 10) (1, 11)

758

NUMBER '1' (1, 12) (1, 13)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

759

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

760

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

761

self.check_tokenize("async def foo(): pass", """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

762

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

763

NAME 'def' (1, 6) (1, 9)

764

NAME 'foo' (1, 10) (1, 13)

765

OP '(' (1, 13) (1, 14)

766

OP ')' (1, 14) (1, 15)

767

OP ':' (1, 15) (1, 16)

768

NAME 'pass' (1, 17) (1, 21)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

769

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

770

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

771

self.check_tokenize('''\

async def foo():

def foo(await):

await = 1

if 1:

await

async += 1

''', """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

779

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

780

NAME 'def' (1, 6) (1, 9)

781

NAME 'foo' (1, 10) (1, 13)

782

OP '(' (1, 13) (1, 14)

783

OP ')' (1, 14) (1, 15)

784

OP ':' (1, 15) (1, 16)

785

NEWLINE '\\n' (1, 16) (1, 17)

786

INDENT ' ' (2, 0) (2, 2)

787

NAME 'def' (2, 2) (2, 5)

788

NAME 'foo' (2, 6) (2, 9)

789

OP '(' (2, 9) (2, 10)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

790

NAME 'await' (2, 10) (2, 15)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

791

OP ')' (2, 15) (2, 16)

792

OP ':' (2, 16) (2, 17)

793

NEWLINE '\\n' (2, 17) (2, 18)

794

INDENT ' ' (3, 0) (3, 4)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

795

NAME 'await' (3, 4) (3, 9)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

796

OP '=' (3, 10) (3, 11)

797

NUMBER '1' (3, 12) (3, 13)

798

NEWLINE '\\n' (3, 13) (3, 14)

799

DEDENT '' (4, 2) (4, 2)

800

NAME 'if' (4, 2) (4, 4)

801

NUMBER '1' (4, 5) (4, 6)

802

OP ':' (4, 6) (4, 7)

803

NEWLINE '\\n' (4, 7) (4, 8)

804

INDENT ' ' (5, 0) (5, 4)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

805

NAME 'await' (5, 4) (5, 9)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

806

NEWLINE '\\n' (5, 9) (5, 10)

807

DEDENT '' (6, 0) (6, 0)

808

DEDENT '' (6, 0) (6, 0)

809

NAME 'async' (6, 0) (6, 5)

810

OP '+=' (6, 6) (6, 8)

811

NUMBER '1' (6, 9) (6, 10)

812

NEWLINE '\\n' (6, 10) (6, 11)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

813

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

814

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

815

self.check_tokenize('''\

816

async def foo():

817

async for i in 1: pass''', """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

818

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

819

NAME 'def' (1, 6) (1, 9)

820

NAME 'foo' (1, 10) (1, 13)

821

OP '(' (1, 13) (1, 14)

822

OP ')' (1, 14) (1, 15)

823

OP ':' (1, 15) (1, 16)

824

NEWLINE '\\n' (1, 16) (1, 17)

825

INDENT ' ' (2, 0) (2, 2)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

826

NAME 'async' (2, 2) (2, 7)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

827

NAME 'for' (2, 8) (2, 11)

828

NAME 'i' (2, 12) (2, 13)

829

NAME 'in' (2, 14) (2, 16)

830

NUMBER '1' (2, 17) (2, 18)

831

OP ':' (2, 18) (2, 19)

832

NAME 'pass' (2, 20) (2, 24)

833

DEDENT '' (3, 0) (3, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

834

""")

Yury Selivanov

8fb307c

2015-07-22 13:33:45 +0300

[diff] [blame]

835

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

836

self.check_tokenize('''async def foo(async): await''', """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

837

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

8fb307c

2015-07-22 13:33:45 +0300

[diff] [blame]

838

NAME 'def' (1, 6) (1, 9)

839

NAME 'foo' (1, 10) (1, 13)

840

OP '(' (1, 13) (1, 14)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

841

NAME 'async' (1, 14) (1, 19)

Yury Selivanov

8fb307c

2015-07-22 13:33:45 +0300

[diff] [blame]

842

OP ')' (1, 19) (1, 20)

843

OP ':' (1, 20) (1, 21)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

844

NAME 'await' (1, 22) (1, 27)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

845

""")

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

846

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

847

self.check_tokenize('''\

def f():

def baz(): pass

async def bar(): pass

852

853

await = 2''', """\

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

854

NAME 'def' (1, 0) (1, 3)

855

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

860

NL '\\n' (2, 0) (2, 1)

861

INDENT ' ' (3, 0) (3, 2)

862

NAME 'def' (3, 2) (3, 5)

863

NAME 'baz' (3, 6) (3, 9)

864

OP '(' (3, 9) (3, 10)

865

OP ')' (3, 10) (3, 11)

866

OP ':' (3, 11) (3, 12)

867

NAME 'pass' (3, 13) (3, 17)

868

NEWLINE '\\n' (3, 17) (3, 18)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

869

NAME 'async' (4, 2) (4, 7)

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

870

NAME 'def' (4, 8) (4, 11)

871

NAME 'bar' (4, 12) (4, 15)

872

OP '(' (4, 15) (4, 16)

873

OP ')' (4, 16) (4, 17)

874

OP ':' (4, 17) (4, 18)

875

NAME 'pass' (4, 19) (4, 23)

876

NEWLINE '\\n' (4, 23) (4, 24)

877

NL '\\n' (5, 0) (5, 1)

878

NAME 'await' (6, 2) (6, 7)

879

OP '=' (6, 8) (6, 9)

880

NUMBER '2' (6, 10) (6, 11)

881

DEDENT '' (7, 0) (7, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

882

""")

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

883

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

884

self.check_tokenize('''\

async def f():

def baz(): pass

async def bar(): pass

889

890

await = 2''', """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

891

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

892

NAME 'def' (1, 6) (1, 9)

893

NAME 'f' (1, 10) (1, 11)

894

OP '(' (1, 11) (1, 12)

895

OP ')' (1, 12) (1, 13)

896

OP ':' (1, 13) (1, 14)

897

NEWLINE '\\n' (1, 14) (1, 15)

898

NL '\\n' (2, 0) (2, 1)

899

INDENT ' ' (3, 0) (3, 2)

900

NAME 'def' (3, 2) (3, 5)

901

NAME 'baz' (3, 6) (3, 9)

902

OP '(' (3, 9) (3, 10)

903

OP ')' (3, 10) (3, 11)

904

OP ':' (3, 11) (3, 12)

905

NAME 'pass' (3, 13) (3, 17)

906

NEWLINE '\\n' (3, 17) (3, 18)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

907

NAME 'async' (4, 2) (4, 7)

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

908

NAME 'def' (4, 8) (4, 11)

909

NAME 'bar' (4, 12) (4, 15)

910

OP '(' (4, 15) (4, 16)

911

OP ')' (4, 16) (4, 17)

912

OP ':' (4, 17) (4, 18)

913

NAME 'pass' (4, 19) (4, 23)

914

NEWLINE '\\n' (4, 23) (4, 24)

915

NL '\\n' (5, 0) (5, 1)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

916

NAME 'await' (6, 2) (6, 7)

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

917

OP '=' (6, 8) (6, 9)

918

NUMBER '2' (6, 10) (6, 11)

919

DEDENT '' (7, 0) (7, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

920

""")

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

921

Thomas Kluyver

c56b17b

2018-06-05 19:26:39 +0200

[diff] [blame^]

922

class GenerateTokensTest(TokenizeTest):

923

def check_tokenize(self, s, expected):

924

# Format the tokens in s in a table format.

925

# The ENDMARKER is omitted.

926

result = []

927

f = StringIO(s)

928

for type, token, start, end, line in generate_tokens(f.readline):

929

if type == ENDMARKER:

930

break

931

type = tok_name[type]

932

result.append(f" {type:10} {token!r:13} {start} {end}")

933

self.assertEqual(result, expected.rstrip().splitlines())

934

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

935

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

936

def decistmt(s):

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

937

result = []

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

938

g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

939

for toknum, tokval, _, _, _ in g:

940

if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens

result.extend([

(NAME, 'Decimal'),

(OP, '('),

(STRING, repr(tokval)),

(OP, ')')

])

else:

result.append((toknum, tokval))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

949

return untokenize(result).decode('utf-8')

950

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

951

class TestMisc(TestCase):

952

953

def test_decistmt(self):

954

# Substitute Decimals for floats in a string of statements.

955

# This is an example from the docs.

956

957

from decimal import Decimal

958

s = '+21.3e-5*-.1234/81.7'

959

self.assertEqual(decistmt(s),

960

"+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")

961

962

# The format of the exponent is inherited from the platform C library.

963

# Known cases are "e-007" (Windows) and "e-07" (not Windows). Since

964

# we're only showing 11 digits, and the 12th isn't close to 5, the

965

# rest of the output should be platform-independent.

966

self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')

967

968

# Output from calculations with Decimal should be identical across all

969

# platforms.

970

self.assertEqual(eval(decistmt(s)),

971

Decimal('-3.217160342717258261933904529E-7'))

972

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

973

974

class TestTokenizerAdheresToPep0263(TestCase):

975

"""

976

Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.

977

"""

978

979

def _testFile(self, filename):

980

path = os.path.join(os.path.dirname(__file__), filename)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

981

TestRoundtrip.check_roundtrip(self, open(path, 'rb'))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

982

983

def test_utf8_coding_cookie_and_no_utf8_bom(self):

Ned Deily

2ea6fcc

2011-07-19 16:15:27 -0700

[diff] [blame]

984

f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

985

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

986

987

def test_latin1_coding_cookie_and_utf8_bom(self):

988

"""

989

As per PEP 0263, if a file starts with a utf-8 BOM signature, the only

990

allowed encoding for the comment is 'utf-8'. The text file used in

991

this test starts with a BOM signature, but specifies latin1 as the

992

coding, so verify that a SyntaxError is raised, which matches the

993

behaviour of the interpreter when it encounters a similar condition.

994

"""

995

f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'

Benjamin Peterson

c9c0f20

2009-06-30 23:06:06 +0000

[diff] [blame]

996

self.assertRaises(SyntaxError, self._testFile, f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

997

998

def test_no_coding_cookie_and_utf8_bom(self):

999

f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1000

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1001

1002

def test_utf8_coding_cookie_and_utf8_bom(self):

1003

f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1004

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1005

Florent Xicluna

11f0b41

2012-07-07 12:13:35 +0200

[diff] [blame]

1006

def test_bad_coding_cookie(self):

1007

self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')

1008

self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')

1009

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1010

1011

class Test_Tokenize(TestCase):

1012

1013

def test__tokenize_decodes_with_specified_encoding(self):

1014

literal = '"ЉЊЈЁЂ"'

1015

line = literal.encode('utf-8')

first = False

def readline():

nonlocal first

if not first:

first = True

return line

else:

return b''

# skip the initial encoding token and the end token

1026

tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]

1027

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1028

self.assertEqual(tokens, expected_tokens,

1029

"bytes not decoded with encoding")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1030

1031

def test__tokenize_does_not_decode_with_encoding_none(self):

literal = '"ЉЊЈЁЂ"'

first = False

def readline():

nonlocal first

if not first:

first = True

return literal

else:

return b''

# skip the end token

tokens = list(_tokenize(readline, encoding=None))[:-1]

1044

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1045

self.assertEqual(tokens, expected_tokens,

1046

"string not tokenized when encoding is None")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1047

1048

1049

class TestDetectEncoding(TestCase):

1050

1051

def get_readline(self, lines):

index = 0

def readline():

nonlocal index

if index == len(lines):

raise StopIteration

line = lines[index]

index += 1

return line

return readline

def test_no_bom_no_encoding_cookie(self):

1063

lines = (

1064

b'# something\n',

1065

b'print(something)\n',

1066

b'do_something(else)\n'

1067

)

1068

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1069

self.assertEqual(encoding, 'utf-8')

1070

self.assertEqual(consumed_lines, list(lines[:2]))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1071

1072

def test_bom_no_cookie(self):

1073

lines = (

1074

b'\xef\xbb\xbf# something\n',

1075

b'print(something)\n',

1076

b'do_something(else)\n'

1077

)

1078

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1079

self.assertEqual(encoding, 'utf-8-sig')

1080

self.assertEqual(consumed_lines,

1081

[b'# something\n', b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1082

1083

def test_cookie_first_line_no_bom(self):

1084

lines = (

1085

b'# -*- coding: latin-1 -*-\n',

1086

b'print(something)\n',

1087

b'do_something(else)\n'

1088

)

1089

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1090

self.assertEqual(encoding, 'iso-8859-1')

1091

self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1092

1093

def test_matched_bom_and_cookie_first_line(self):

1094

lines = (

1095

b'\xef\xbb\xbf# coding=utf-8\n',

1096

b'print(something)\n',

1097

b'do_something(else)\n'

1098

)

1099

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1100

self.assertEqual(encoding, 'utf-8-sig')

1101

self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1102

1103

def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):

1104

lines = (

1105

b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',

1106

b'print(something)\n',

1107

b'do_something(else)\n'

1108

)

1109

readline = self.get_readline(lines)

1110

self.assertRaises(SyntaxError, detect_encoding, readline)

1111

1112

def test_cookie_second_line_no_bom(self):

1113

lines = (

1114

b'#! something\n',

1115

b'# vim: set fileencoding=ascii :\n',

1116

b'print(something)\n',

1117

b'do_something(else)\n'

1118

)

1119

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1120

self.assertEqual(encoding, 'ascii')

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1121

expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1122

self.assertEqual(consumed_lines, expected)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1123

1124

def test_matched_bom_and_cookie_second_line(self):

1125

lines = (

1126

b'\xef\xbb\xbf#! something\n',

1127

b'f# coding=utf-8\n',

1128

b'print(something)\n',

1129

b'do_something(else)\n'

1130

)

1131

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1132

self.assertEqual(encoding, 'utf-8-sig')

1133

self.assertEqual(consumed_lines,

1134

[b'#! something\n', b'f# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1135

1136

def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):

1137

lines = (

1138

b'\xef\xbb\xbf#! something\n',

1139

b'# vim: set fileencoding=ascii :\n',

1140

b'print(something)\n',

1141

b'do_something(else)\n'

1142

)

1143

readline = self.get_readline(lines)

1144

self.assertRaises(SyntaxError, detect_encoding, readline)

1145

Serhiy Storchaka

768c16c

2014-01-09 18:36:09 +0200

[diff] [blame]

1146

def test_cookie_second_line_noncommented_first_line(self):

1147

lines = (

1148

b"print('\xc2\xa3')\n",

1149

b'# vim: set fileencoding=iso8859-15 :\n',

1150

b"print('\xe2\x82\xac')\n"

1151

)

1152

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1153

self.assertEqual(encoding, 'utf-8')

1154

expected = [b"print('\xc2\xa3')\n"]

1155

self.assertEqual(consumed_lines, expected)

1156

1157

def test_cookie_second_line_commented_first_line(self):

1158

lines = (

1159

b"#print('\xc2\xa3')\n",

1160

b'# vim: set fileencoding=iso8859-15 :\n',

1161

b"print('\xe2\x82\xac')\n"

1162

)

1163

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1164

self.assertEqual(encoding, 'iso8859-15')

1165

expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']

1166

self.assertEqual(consumed_lines, expected)

1167

1168

def test_cookie_second_line_empty_first_line(self):

1169

lines = (

1170

b'\n',

1171

b'# vim: set fileencoding=iso8859-15 :\n',

1172

b"print('\xe2\x82\xac')\n"

1173

)

1174

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1175

self.assertEqual(encoding, 'iso8859-15')

1176

expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']

1177

self.assertEqual(consumed_lines, expected)

1178

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1179

def test_latin1_normalization(self):

1180

# See get_normal_name() in tokenizer.c.

1181

encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",

1182

"iso-8859-1-unix", "iso-latin-1-mac")

1183

for encoding in encodings:

1184

for rep in ("-", "_"):

1185

enc = encoding.replace("-", rep)

1186

lines = (b"#!/usr/bin/python\n",

1187

b"# coding: " + enc.encode("ascii") + b"\n",

1188

b"print(things)\n",

1189

b"do_something += 4\n")

1190

rl = self.get_readline(lines)

1191

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1192

self.assertEqual(found, "iso-8859-1")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1193

Martin v. Löwis

63674f4

2012-04-20 14:36:47 +0200

[diff] [blame]

1194

def test_syntaxerror_latin1(self):

1195

# Issue 14629: need to raise SyntaxError if the first

1196

# line(s) have non-UTF-8 characters

1197

lines = (

1198

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1199

)

1200

readline = self.get_readline(lines)

1201

self.assertRaises(SyntaxError, detect_encoding, readline)

1202

1203

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1204

def test_utf8_normalization(self):

1205

# See get_normal_name() in tokenizer.c.

1206

encodings = ("utf-8", "utf-8-mac", "utf-8-unix")

1207

for encoding in encodings:

1208

for rep in ("-", "_"):

1209

enc = encoding.replace("-", rep)

1210

lines = (b"#!/usr/bin/python\n",

1211

b"# coding: " + enc.encode("ascii") + b"\n",

1212

b"1 + 3\n")

1213

rl = self.get_readline(lines)

1214

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1215

self.assertEqual(found, "utf-8")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1216

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1217

def test_short_files(self):

1218

readline = self.get_readline((b'print(something)\n',))

1219

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1220

self.assertEqual(encoding, 'utf-8')

1221

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1222

1223

encoding, consumed_lines = detect_encoding(self.get_readline(()))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1224

self.assertEqual(encoding, 'utf-8')

1225

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1226

1227

readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))

1228

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1229

self.assertEqual(encoding, 'utf-8-sig')

1230

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1231

1232

readline = self.get_readline((b'\xef\xbb\xbf',))

1233

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1234

self.assertEqual(encoding, 'utf-8-sig')

1235

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1236

Benjamin Peterson

433f32c

2008-12-12 01:25:05 +0000

[diff] [blame]

1237

readline = self.get_readline((b'# coding: bad\n',))

1238

self.assertRaises(SyntaxError, detect_encoding, readline)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1239

Serhiy Storchaka

dafea85

2013-09-16 23:51:56 +0300

[diff] [blame]

1240

def test_false_encoding(self):

1241

# Issue 18873: "Encoding" detected in non-comment lines

1242

readline = self.get_readline((b'print("#coding=fake")',))

1243

encoding, consumed_lines = detect_encoding(readline)

1244

self.assertEqual(encoding, 'utf-8')

1245

self.assertEqual(consumed_lines, [b'print("#coding=fake")'])

1246

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1247

def test_open(self):

1248

filename = support.TESTFN + '.py'

1249

self.addCleanup(support.unlink, filename)

1250

1251

# test coding cookie

1252

for encoding in ('iso-8859-15', 'utf-8'):

1253

with open(filename, 'w', encoding=encoding) as fp:

1254

print("# coding: %s" % encoding, file=fp)

1255

print("print('euro:\u20ac')", file=fp)

1256

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1257

self.assertEqual(fp.encoding, encoding)

1258

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1259

1260

# test BOM (no coding cookie)

1261

with open(filename, 'w', encoding='utf-8-sig') as fp:

1262

print("print('euro:\u20ac')", file=fp)

1263

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1264

self.assertEqual(fp.encoding, 'utf-8-sig')

1265

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1266

Brett Cannon

c33f3f2

2012-04-20 13:23:54 -0400

[diff] [blame]

1267

def test_filename_in_exception(self):

1268

# When possible, include the file name in the exception.

1269

path = 'some_file_path'

1270

lines = (

1271

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1272

)

1273

class Bunk:

1274

def __init__(self, lines, path):

self.name = path

self._lines = lines

self._index = 0

def readline(self):

if self._index == len(lines):

1281

raise StopIteration

1282

line = lines[self._index]

self._index += 1

return line

with self.assertRaises(SyntaxError):

1287

ins = Bunk(lines, path)

1288

# Make sure lacking a name isn't an issue.

1289

del ins.name

1290

detect_encoding(ins.readline)

1291

with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):

1292

ins = Bunk(lines, path)

1293

detect_encoding(ins.readline)

1294

Victor Stinner

387729e

2015-05-26 00:43:58 +0200

[diff] [blame]

1295

def test_open_error(self):

1296

# Issue #23840: open() must close the binary file on error

1297

m = BytesIO(b'#coding:xxx')

1298

with mock.patch('tokenize._builtin_open', return_value=m):

1299

self.assertRaises(SyntaxError, tokenize_open, 'foobar')

1300

self.assertTrue(m.closed)

1301

1302

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1303

class TestTokenize(TestCase):

1304

1305

def test_tokenize(self):

1306

import tokenize as tokenize_module

1307

encoding = object()

1308

encoding_used = None

1309

def mock_detect_encoding(readline):

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1310

return encoding, [b'first', b'second']

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1311

1312

def mock__tokenize(readline, encoding):

1313

nonlocal encoding_used

1314

encoding_used = encoding

1315

out = []

1316

while True:

1317

next_line = readline()

1318

if next_line:

1319

out.append(next_line)

continue

return out

counter = 0

def mock_readline():

nonlocal counter

counter += 1

if counter == 5:

return b''

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1329

return str(counter).encode()

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1330

1331

orig_detect_encoding = tokenize_module.detect_encoding

1332

orig__tokenize = tokenize_module._tokenize

1333

tokenize_module.detect_encoding = mock_detect_encoding

1334

tokenize_module._tokenize = mock__tokenize

1335

try:

1336

results = tokenize(mock_readline)

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1337

self.assertEqual(list(results),

1338

[b'first', b'second', b'1', b'2', b'3', b'4'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1339

finally:

1340

tokenize_module.detect_encoding = orig_detect_encoding

1341

tokenize_module._tokenize = orig__tokenize

1342

1343

self.assertTrue(encoding_used, encoding)

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

1344

Yury Selivanov

8085b80

2015-05-18 12:50:52 -0400

[diff] [blame]

1345

def test_oneline_defs(self):

1346

buf = []

1347

for i in range(500):

1348

buf.append('def i{i}(): return {i}'.format(i=i))

buf.append('OK')

buf = '\n'.join(buf)

# Test that 500 consequent, one-line defs is OK

1353

toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))

1354

self.assertEqual(toks[-2].string, 'OK') # [-1] is always ENDMARKER

1355

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1356

def assertExactTypeEqual(self, opstr, *optypes):

1357

tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))

1358

num_optypes = len(optypes)

1359

self.assertEqual(len(tokens), 2 + num_optypes)

Albert-Jan Nijburg

fc354f0

2017-05-31 15:00:21 +0100

[diff] [blame]

1360

self.assertEqual(tok_name[tokens[0].exact_type],

1361

tok_name[ENCODING])

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1362

for i in range(num_optypes):

Albert-Jan Nijburg

fc354f0

2017-05-31 15:00:21 +0100

[diff] [blame]

1363

self.assertEqual(tok_name[tokens[i + 1].exact_type],

1364

tok_name[optypes[i]])

1365

self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type],

1366

tok_name[token.ENDMARKER])

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1367

1368

def test_exact_type(self):

1369

self.assertExactTypeEqual('()', token.LPAR, token.RPAR)

1370

self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)

1371

self.assertExactTypeEqual(':', token.COLON)

1372

self.assertExactTypeEqual(',', token.COMMA)

1373

self.assertExactTypeEqual(';', token.SEMI)

1374

self.assertExactTypeEqual('+', token.PLUS)

1375

self.assertExactTypeEqual('-', token.MINUS)

1376

self.assertExactTypeEqual('*', token.STAR)

1377

self.assertExactTypeEqual('/', token.SLASH)

1378

self.assertExactTypeEqual('|', token.VBAR)

1379

self.assertExactTypeEqual('&', token.AMPER)

1380

self.assertExactTypeEqual('<', token.LESS)

1381

self.assertExactTypeEqual('>', token.GREATER)

1382

self.assertExactTypeEqual('=', token.EQUAL)

1383

self.assertExactTypeEqual('.', token.DOT)

1384

self.assertExactTypeEqual('%', token.PERCENT)

1385

self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)

1386

self.assertExactTypeEqual('==', token.EQEQUAL)

1387

self.assertExactTypeEqual('!=', token.NOTEQUAL)

1388

self.assertExactTypeEqual('<=', token.LESSEQUAL)

1389

self.assertExactTypeEqual('>=', token.GREATEREQUAL)

1390

self.assertExactTypeEqual('~', token.TILDE)

1391

self.assertExactTypeEqual('^', token.CIRCUMFLEX)

1392

self.assertExactTypeEqual('<<', token.LEFTSHIFT)

1393

self.assertExactTypeEqual('>>', token.RIGHTSHIFT)

1394

self.assertExactTypeEqual('**', token.DOUBLESTAR)

1395

self.assertExactTypeEqual('+=', token.PLUSEQUAL)

1396

self.assertExactTypeEqual('-=', token.MINEQUAL)

1397

self.assertExactTypeEqual('*=', token.STAREQUAL)

1398

self.assertExactTypeEqual('/=', token.SLASHEQUAL)

1399

self.assertExactTypeEqual('%=', token.PERCENTEQUAL)

1400

self.assertExactTypeEqual('&=', token.AMPEREQUAL)

1401

self.assertExactTypeEqual('|=', token.VBAREQUAL)

1402

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1403

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1404

self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)

1405

self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)

1406

self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)

1407

self.assertExactTypeEqual('//', token.DOUBLESLASH)

1408

self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)

Jim Fasarakis-Hilliard

d4914e9

2017-03-14 22:16:15 +0200

[diff] [blame]

1409

self.assertExactTypeEqual('...', token.ELLIPSIS)

1410

self.assertExactTypeEqual('->', token.RARROW)

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1411

self.assertExactTypeEqual('@', token.AT)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

1412

self.assertExactTypeEqual('@=', token.ATEQUAL)

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1413

1414

self.assertExactTypeEqual('a**2+b**2==c**2',

1415

NAME, token.DOUBLESTAR, NUMBER,

1416

token.PLUS,

1417

NAME, token.DOUBLESTAR, NUMBER,

1418

token.EQEQUAL,

1419

NAME, token.DOUBLESTAR, NUMBER)

1420

self.assertExactTypeEqual('{1, 2, 3}',

1421

token.LBRACE,

1422

token.NUMBER, token.COMMA,

1423

token.NUMBER, token.COMMA,

1424

token.NUMBER,

1425

token.RBRACE)

1426

self.assertExactTypeEqual('^(x & 0x1)',

1427

token.CIRCUMFLEX,

1428

token.LPAR,

1429

token.NAME, token.AMPER, token.NUMBER,

1430

token.RPAR)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1431

Ezio Melotti

fafa8b7

2012-11-03 17:46:51 +0200

[diff] [blame]

1432

def test_pathological_trailing_whitespace(self):

1433

# See http://bugs.python.org/issue16152

1434

self.assertExactTypeEqual('@ ', token.AT)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1435

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1436

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1437

class UntokenizeTest(TestCase):

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1438

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1439

def test_bad_input_order(self):

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1440

# raise if previous row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

u = Untokenizer()

u.prev_row = 2

u.prev_col = 2

with self.assertRaises(ValueError) as cm:

1445

u.add_whitespace((1,3))

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1446

self.assertEqual(cm.exception.args[0],

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1447

'start (1,3) precedes previous end (2,2)')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1448

# raise if previous column in row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1449

self.assertRaises(ValueError, u.add_whitespace, (2,1))

1450

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1451

def test_backslash_continuation(self):

1452

# The problem is that <whitespace>\<newline> leaves no token

u = Untokenizer()

u.prev_row = 1

u.prev_col = 1

u.tokens = []

u.add_whitespace((2, 0))

1458

self.assertEqual(u.tokens, ['\\\n'])

1459

u.prev_row = 2

1460

u.add_whitespace((4, 4))

1461

self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1462

TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1463

Terry Jan Reedy

5b8d2c3

2014-02-17 23:12:16 -0500

[diff] [blame]

1464

def test_iter_compat(self):

1465

u = Untokenizer()

1466

token = (NAME, 'Hello')

1467

tokens = [(ENCODING, 'utf-8'), token]

1468

u.compat(token, iter([]))

1469

self.assertEqual(u.tokens, ["Hello "])

1470

u = Untokenizer()

1471

self.assertEqual(u.untokenize(iter([token])), 'Hello ')

1472

u = Untokenizer()

1473

self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')

1474

self.assertEqual(u.encoding, 'utf-8')

1475

self.assertEqual(untokenize(iter(tokens)), b'Hello ')

1476

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1477

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1478

class TestRoundtrip(TestCase):

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1479

1480

def check_roundtrip(self, f):

1481

"""

1482

Test roundtrip for `untokenize`. `f` is an open file or a string.

1483

The source code in f is tokenized to both 5- and 2-tuples.

1484

Both sequences are converted back to source code via

1485

tokenize.untokenize(), and the latter tokenized again to 2-tuples.

1486

The test fails if the 3 pair tokenizations do not match.

1487

1488

When untokenize bugs are fixed, untokenize with 5-tuples should

1489

reproduce code that does not contain a backslash continuation

1490

following spaces. A proper test should test this.

1491

"""

1492

# Get source code and original tokenizations

1493

if isinstance(f, str):

1494

code = f.encode('utf-8')

else:

code = f.read()

f.close()

readline = iter(code.splitlines(keepends=True)).__next__

1499

tokens5 = list(tokenize(readline))

1500

tokens2 = [tok[:2] for tok in tokens5]

1501

# Reproduce tokens2 from pairs

1502

bytes_from2 = untokenize(tokens2)

1503

readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__

1504

tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]

1505

self.assertEqual(tokens2_from2, tokens2)

1506

# Reproduce tokens2 from 5-tuples

1507

bytes_from5 = untokenize(tokens5)

1508

readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__

1509

tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]

1510

self.assertEqual(tokens2_from5, tokens2)

1511

1512

def test_roundtrip(self):

1513

# There are some standard formatting practices that are easy to get right.

1514

1515

self.check_roundtrip("if x == 1:\n"

1516

" print(x)\n")

1517

self.check_roundtrip("# This is a comment\n"

1518

"# This also")

1519

1520

# Some people use different formatting conventions, which makes

1521

# untokenize a little trickier. Note that this test involves trailing

1522

# whitespace after the colon. Note that we use hex escapes to make the

1523

# two trailing blanks apparent in the expected output.

1524

1525

self.check_roundtrip("if x == 1 : \n"

1526

" print(x)\n")

1527

fn = support.findfile("tokenize_tests.txt")

1528

with open(fn, 'rb') as f:

1529

self.check_roundtrip(f)

1530

self.check_roundtrip("if x == 1:\n"

1531

" # A comment by itself.\n"

1532

" print(x) # Comment here, too.\n"

1533

" # Another comment.\n"

1534

"after_if = True\n")

1535

self.check_roundtrip("if (x # The comments need to go in the right place\n"

1536

" == 1):\n"

1537

" print('x==1')\n")

1538

self.check_roundtrip("class Test: # A comment here\n"

1539

" # A comment with weird indent\n"

1540

" after_com = 5\n"

1541

" def x(m): return m*5 # a one liner\n"

1542

" def y(m): # A whitespace after the colon\n"

1543

" return y*4 # 3-space indent\n")

1544

1545

# Some error-handling code

1546

self.check_roundtrip("try: import somemodule\n"

1547

"except ImportError: # comment\n"

1548

" print('Can not import' # comment2\n)"

1549

"else: print('Loaded')\n")

1550

1551

def test_continuation(self):

1552

# Balancing continuation

1553

self.check_roundtrip("a = (3,4, \n"

"5,6)\n"

"y = [3, 4,\n"

"5]\n"

"z = {'a': 5,\n"

"'b':15, 'c':True}\n"

1559

"x = len(y) + 5 - a[\n"

"3] - a[2]\n"

"+ len(z) - z[\n"

"'b']\n")

def test_backslash_continuation(self):

1565

# Backslash means line continuation, except for comments

1566

self.check_roundtrip("x=1+\\\n"

1567

"1\n"

1568

"# This is a comment\\\n"

1569

"# This also\n")

1570

self.check_roundtrip("# Comment \\\n"

1571

"x = 0")

1572

1573

def test_string_concatenation(self):

1574

# Two string literals on the same line

1575

self.check_roundtrip("'' ''")

1576

1577

def test_random_files(self):

1578

# Test roundtrip on random python modules.

1579

# pass the '-ucpu' option to process the full directory.

1580

1581

import glob, random

1582

fn = support.findfile("tokenize_tests.txt")

1583

tempdir = os.path.dirname(fn) or os.curdir

1584

testfiles = glob.glob(os.path.join(tempdir, "test*.py"))

1585

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

1586

# Tokenize is broken on test_pep3131.py because regular expressions are

1587

# broken on the obscure unicode identifiers in it. *sigh*

1588

# With roundtrip extended to test the 5-tuple mode of untokenize,

1589

# 7 more testfiles fail. Remove them also until the failure is diagnosed.

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1590

Zachary Ware

724f6a6

2016-09-09 12:55:37 -0700

[diff] [blame]

1591

testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1592

for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):

1593

testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)

1594

1595

if not support.is_resource_enabled("cpu"):

1596

testfiles = random.sample(testfiles, 10)

1597

1598

for testfile in testfiles:

1599

with open(testfile, 'rb') as f:

1600

with self.subTest(file=testfile):

1601

self.check_roundtrip(f)

1602

1603

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1604

def roundtrip(self, code):

1605

if isinstance(code, str):

1606

code = code.encode('utf-8')

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1607

return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1608

1609

def test_indentation_semantics_retained(self):

1610

"""

1611

Ensure that although whitespace might be mutated in a roundtrip,

1612

the semantic meaning of the indentation remains consistent.

1613

"""

1614

code = "if False:\n\tx=3\n\tx=3\n"

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1615

codelines = self.roundtrip(code).split('\n')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1616

self.assertEqual(codelines[1], codelines[2])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1617

self.check_roundtrip(code)

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1618

1619

Thomas Wouters

49fd7fa

2006-04-21 10:40:58 +0000

[diff] [blame]

1620

if __name__ == "__main__":

Brett Cannon