Blame - Lib/test/test_tokenize.py - platform/external/python/cpython3

2015-10-06 18:16:28 +0300

[diff] [blame]

1

from test import support

Hai Shi

4660597

2020-08-04 00:49:18 +0800

[diff] [blame^]

2

from test.support import os_helper

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

3

from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,

4

STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame]

5

open as tokenize_open, Untokenizer, generate_tokens,

6

NEWLINE)

Thomas Kluyver

2018-06-05 19:26:39 +0200

[diff] [blame]

7

from io import BytesIO, StringIO

Stéphane Wirtel

90addd6

2017-07-25 15:33:53 +0200

[diff] [blame]

8

import unittest

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

9

from unittest import TestCase, mock

10

from test.test_grammar import (VALID_UNDERSCORE_LITERALS,

11

INVALID_UNDERSCORE_LITERALS)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

12

import os

13

import token

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

14

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

15

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame]

16

# Converts a source string into a list of textual representation

17

# of the tokens such as:

18

# ` NAME 'if' (1, 0) (1, 2)`

19

# to make writing tests easier.

20

def stringify_tokens_from_source(token_generator, source_string):

21

result = []

22

num_lines = len(source_string.splitlines())

23

missing_trailing_nl = source_string[-1] not in '\r\n'

24

25

for type, token, start, end, line in token_generator:

26

if type == ENDMARKER:

27

break

28

# Ignore the new line on the last line if the input lacks one

29

if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:

30

continue

31

type = tok_name[type]

32

result.append(f" {type:10} {token!r:13} {start} {end}")

return result

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

36

class TokenizeTest(TestCase):

37

# Tests for the tokenize module.

Jason R. Coombs

7cf3638

2015-06-20 19:13:50 -0400

[diff] [blame]

38

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

39

# The tests can be really simple. Given a small fragment of source

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame]

40

# code, print out a table with tokens. The ENDMARKER, ENCODING and

41

# final NEWLINE are omitted for brevity.

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

42

43

def check_tokenize(self, s, expected):

44

# Format the tokens in s in a table format.

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame]

45

# The ENDMARKER and final NEWLINE are omitted.

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

46

f = BytesIO(s.encode('utf-8'))

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame]

47

result = stringify_tokens_from_source(tokenize(f.readline), s)

48

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

49

self.assertEqual(result,

50

[" ENCODING 'utf-8' (0, 0) (0, 0)"] +

51

expected.rstrip().splitlines())

52

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame]

53

def test_implicit_newline(self):

54

# Make sure that the tokenizer puts in an implicit NEWLINE

55

# when the input lacks a trailing new line.

56

f = BytesIO("x".encode('utf-8'))

57

tokens = list(tokenize(f.readline))

58

self.assertEqual(tokens[-2].type, NEWLINE)

59

self.assertEqual(tokens[-1].type, ENDMARKER)

60

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

61

def test_basic(self):

62

self.check_tokenize("1 + 1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

63

NUMBER '1' (1, 0) (1, 1)

64

OP '+' (1, 2) (1, 3)

65

NUMBER '1' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

66

""")

67

self.check_tokenize("if False:\n"

68

" # NL\n"

Albert-Jan Nijburg

c471ca4

2017-05-24 12:31:57 +0100

[diff] [blame]

69

" \n"

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

70

" True = False # NEWLINE\n", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

71

NAME 'if' (1, 0) (1, 2)

72

NAME 'False' (1, 3) (1, 8)

73

OP ':' (1, 8) (1, 9)

74

NEWLINE '\\n' (1, 9) (1, 10)

75

COMMENT '# NL' (2, 4) (2, 8)

76

NL '\\n' (2, 8) (2, 9)

Albert-Jan Nijburg

c471ca4

2017-05-24 12:31:57 +0100

[diff] [blame]

77

NL '\\n' (3, 4) (3, 5)

78

INDENT ' ' (4, 0) (4, 4)

79

NAME 'True' (4, 4) (4, 8)

80

OP '=' (4, 9) (4, 10)

81

NAME 'False' (4, 11) (4, 16)

82

COMMENT '# NEWLINE' (4, 17) (4, 26)

83

NEWLINE '\\n' (4, 26) (4, 27)

84

DEDENT '' (5, 0) (5, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

85

""")

86

indent_error_file = b"""\

def k(x):

x += 2

x += 5

"""

readline = BytesIO(indent_error_file).readline

92

with self.assertRaisesRegex(IndentationError,

93

"unindent does not match any "

94

"outer indentation level"):

95

for tok in tokenize(readline):

96

pass

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

97

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

98

def test_int(self):

99

# Ordinary integers and binary operators

100

self.check_tokenize("0xff <= 255", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

101

NUMBER '0xff' (1, 0) (1, 4)

102

OP '<=' (1, 5) (1, 7)

103

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

104

""")

105

self.check_tokenize("0b10 <= 255", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

106

NUMBER '0b10' (1, 0) (1, 4)

107

OP '<=' (1, 5) (1, 7)

108

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

109

""")

110

self.check_tokenize("0o123 <= 0O123", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

111

NUMBER '0o123' (1, 0) (1, 5)

112

OP '<=' (1, 6) (1, 8)

113

NUMBER '0O123' (1, 9) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

114

""")

115

self.check_tokenize("1234567 > ~0x15", """\

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

116

NUMBER '1234567' (1, 0) (1, 7)

117

OP '>' (1, 8) (1, 9)

118

OP '~' (1, 10) (1, 11)

119

NUMBER '0x15' (1, 11) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

120

""")

121

self.check_tokenize("2134568 != 1231515", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

122

NUMBER '2134568' (1, 0) (1, 7)

123

OP '!=' (1, 8) (1, 10)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

124

NUMBER '1231515' (1, 11) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

125

""")

126

self.check_tokenize("(-124561-1) & 200000000", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

127

OP '(' (1, 0) (1, 1)

128

OP '-' (1, 1) (1, 2)

129

NUMBER '124561' (1, 2) (1, 8)

130

OP '-' (1, 8) (1, 9)

131

NUMBER '1' (1, 9) (1, 10)

132

OP ')' (1, 10) (1, 11)

133

OP '&' (1, 12) (1, 13)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

134

NUMBER '200000000' (1, 14) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

135

""")

136

self.check_tokenize("0xdeadbeef != -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

137

NUMBER '0xdeadbeef' (1, 0) (1, 10)

138

OP '!=' (1, 11) (1, 13)

139

OP '-' (1, 14) (1, 15)

140

NUMBER '1' (1, 15) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

141

""")

142

self.check_tokenize("0xdeadc0de & 12345", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

143

NUMBER '0xdeadc0de' (1, 0) (1, 10)

144

OP '&' (1, 11) (1, 12)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

145

NUMBER '12345' (1, 13) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

146

""")

147

self.check_tokenize("0xFF & 0x15 | 1234", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

148

NUMBER '0xFF' (1, 0) (1, 4)

149

OP '&' (1, 5) (1, 6)

150

NUMBER '0x15' (1, 7) (1, 11)

151

OP '|' (1, 12) (1, 13)

152

NUMBER '1234' (1, 14) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

153

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

154

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

155

def test_long(self):

156

# Long integers

157

self.check_tokenize("x = 0", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

158

NAME 'x' (1, 0) (1, 1)

159

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

160

NUMBER '0' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

161

""")

162

self.check_tokenize("x = 0xfffffffffff", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

163

NAME 'x' (1, 0) (1, 1)

164

OP '=' (1, 2) (1, 3)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

165

NUMBER '0xfffffffffff' (1, 4) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

166

""")

167

self.check_tokenize("x = 123141242151251616110", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

168

NAME 'x' (1, 0) (1, 1)

169

OP '=' (1, 2) (1, 3)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

170

NUMBER '123141242151251616110' (1, 4) (1, 25)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

171

""")

172

self.check_tokenize("x = -15921590215012591", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

173

NAME 'x' (1, 0) (1, 1)

174

OP '=' (1, 2) (1, 3)

175

OP '-' (1, 4) (1, 5)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

176

NUMBER '15921590215012591' (1, 5) (1, 22)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

177

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

178

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

179

def test_float(self):

180

# Floating point numbers

181

self.check_tokenize("x = 3.14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

182

NAME 'x' (1, 0) (1, 1)

183

OP '=' (1, 2) (1, 3)

184

NUMBER '3.14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

185

""")

186

self.check_tokenize("x = 314159.", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

187

NAME 'x' (1, 0) (1, 1)

188

OP '=' (1, 2) (1, 3)

189

NUMBER '314159.' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

190

""")

191

self.check_tokenize("x = .314159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

192

NAME 'x' (1, 0) (1, 1)

193

OP '=' (1, 2) (1, 3)

194

NUMBER '.314159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

195

""")

196

self.check_tokenize("x = 3e14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

197

NAME 'x' (1, 0) (1, 1)

198

OP '=' (1, 2) (1, 3)

199

NUMBER '3e14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

200

""")

201

self.check_tokenize("x = 3E123", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

202

NAME 'x' (1, 0) (1, 1)

203

OP '=' (1, 2) (1, 3)

204

NUMBER '3E123' (1, 4) (1, 9)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

205

""")

206

self.check_tokenize("x+y = 3e-1230", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

207

NAME 'x' (1, 0) (1, 1)

208

OP '+' (1, 1) (1, 2)

209

NAME 'y' (1, 2) (1, 3)

210

OP '=' (1, 4) (1, 5)

211

NUMBER '3e-1230' (1, 6) (1, 13)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

212

""")

213

self.check_tokenize("x = 3.14e159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

214

NAME 'x' (1, 0) (1, 1)

215

OP '=' (1, 2) (1, 3)

216

NUMBER '3.14e159' (1, 4) (1, 12)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

217

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

218

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

219

def test_underscore_literals(self):

220

def number_token(s):

221

f = BytesIO(s.encode('utf-8'))

222

for toktype, token, start, end, line in tokenize(f.readline):

223

if toktype == NUMBER:

224

return token

225

return 'invalid token'

226

for lit in VALID_UNDERSCORE_LITERALS:

227

if '(' in lit:

228

# this won't work with compound complex inputs

229

continue

230

self.assertEqual(number_token(lit), lit)

231

for lit in INVALID_UNDERSCORE_LITERALS:

232

self.assertNotEqual(number_token(lit), lit)

233

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

234

def test_string(self):

235

# String literals

236

self.check_tokenize("x = ''; y = \"\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

237

NAME 'x' (1, 0) (1, 1)

238

OP '=' (1, 2) (1, 3)

239

STRING "''" (1, 4) (1, 6)

240

OP ';' (1, 6) (1, 7)

241

NAME 'y' (1, 8) (1, 9)

242

OP '=' (1, 10) (1, 11)

243

STRING '""' (1, 12) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

244

""")

245

self.check_tokenize("x = '\"'; y = \"'\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

246

NAME 'x' (1, 0) (1, 1)

247

OP '=' (1, 2) (1, 3)

248

STRING '\\'"\\'' (1, 4) (1, 7)

249

OP ';' (1, 7) (1, 8)

250

NAME 'y' (1, 9) (1, 10)

251

OP '=' (1, 11) (1, 12)

252

STRING '"\\'"' (1, 13) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

253

""")

254

self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

255

NAME 'x' (1, 0) (1, 1)

256

OP '=' (1, 2) (1, 3)

257

STRING '"doesn\\'t "' (1, 4) (1, 14)

258

NAME 'shrink' (1, 14) (1, 20)

259

STRING '", does it"' (1, 20) (1, 31)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

260

""")

261

self.check_tokenize("x = 'abc' + 'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

262

NAME 'x' (1, 0) (1, 1)

263

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

264

STRING "'abc'" (1, 4) (1, 9)

265

OP '+' (1, 10) (1, 11)

266

STRING "'ABC'" (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

267

""")

268

self.check_tokenize('y = "ABC" + "ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

269

NAME 'y' (1, 0) (1, 1)

270

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

271

STRING '"ABC"' (1, 4) (1, 9)

272

OP '+' (1, 10) (1, 11)

273

STRING '"ABC"' (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

274

""")

275

self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

276

NAME 'x' (1, 0) (1, 1)

277

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

278

STRING "r'abc'" (1, 4) (1, 10)

279

OP '+' (1, 11) (1, 12)

280

STRING "r'ABC'" (1, 13) (1, 19)

281

OP '+' (1, 20) (1, 21)

282

STRING "R'ABC'" (1, 22) (1, 28)

283

OP '+' (1, 29) (1, 30)

284

STRING "R'ABC'" (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

285

""")

286

self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

287

NAME 'y' (1, 0) (1, 1)

288

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

289

STRING 'r"abc"' (1, 4) (1, 10)

290

OP '+' (1, 11) (1, 12)

291

STRING 'r"ABC"' (1, 13) (1, 19)

292

OP '+' (1, 20) (1, 21)

293

STRING 'R"ABC"' (1, 22) (1, 28)

294

OP '+' (1, 29) (1, 30)

295

STRING 'R"ABC"' (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

296

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

297

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

298

self.check_tokenize("u'abc' + U'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

299

STRING "u'abc'" (1, 0) (1, 6)

300

OP '+' (1, 7) (1, 8)

301

STRING "U'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

302

""")

303

self.check_tokenize('u"abc" + U"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

304

STRING 'u"abc"' (1, 0) (1, 6)

305

OP '+' (1, 7) (1, 8)

306

STRING 'U"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

307

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

308

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

309

self.check_tokenize("b'abc' + B'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

310

STRING "b'abc'" (1, 0) (1, 6)

311

OP '+' (1, 7) (1, 8)

312

STRING "B'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

313

""")

314

self.check_tokenize('b"abc" + B"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

315

STRING 'b"abc"' (1, 0) (1, 6)

316

OP '+' (1, 7) (1, 8)

317

STRING 'B"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

318

""")

319

self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

320

STRING "br'abc'" (1, 0) (1, 7)

321

OP '+' (1, 8) (1, 9)

322

STRING "bR'abc'" (1, 10) (1, 17)

323

OP '+' (1, 18) (1, 19)

324

STRING "Br'abc'" (1, 20) (1, 27)

325

OP '+' (1, 28) (1, 29)

326

STRING "BR'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

327

""")

328

self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

329

STRING 'br"abc"' (1, 0) (1, 7)

330

OP '+' (1, 8) (1, 9)

331

STRING 'bR"abc"' (1, 10) (1, 17)

332

OP '+' (1, 18) (1, 19)

333

STRING 'Br"abc"' (1, 20) (1, 27)

334

OP '+' (1, 28) (1, 29)

335

STRING 'BR"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

336

""")

337

self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

338

STRING "rb'abc'" (1, 0) (1, 7)

339

OP '+' (1, 8) (1, 9)

340

STRING "rB'abc'" (1, 10) (1, 17)

341

OP '+' (1, 18) (1, 19)

342

STRING "Rb'abc'" (1, 20) (1, 27)

343

OP '+' (1, 28) (1, 29)

344

STRING "RB'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

345

""")

346

self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

347

STRING 'rb"abc"' (1, 0) (1, 7)

348

OP '+' (1, 8) (1, 9)

349

STRING 'rB"abc"' (1, 10) (1, 17)

350

OP '+' (1, 18) (1, 19)

351

STRING 'Rb"abc"' (1, 20) (1, 27)

352

OP '+' (1, 28) (1, 29)

353

STRING 'RB"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

354

""")

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

355

# Check 0, 1, and 2 character string prefixes.

356

self.check_tokenize(r'"a\

357

de\

358

fg"', """\

359

STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)

360

""")

361

self.check_tokenize(r'u"a\

362

de"', """\

363

STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3)

364

""")

365

self.check_tokenize(r'rb"a\

366

d"', """\

367

STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2)

368

""")

369

self.check_tokenize(r'"""a\

370

b"""', """\

371

STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)

372

""")

373

self.check_tokenize(r'u"""a\

374

b"""', """\

375

STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)

376

""")

377

self.check_tokenize(r'rb"""a\

378

b\

379

c"""', """\

380

STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)

381

""")

Eric V. Smith

1c8222c

2015-10-26 04:37:55 -0400

[diff] [blame]

382

self.check_tokenize('f"abc"', """\

383

STRING 'f"abc"' (1, 0) (1, 6)

384

""")

385

self.check_tokenize('fR"a{b}c"', """\

386

STRING 'fR"a{b}c"' (1, 0) (1, 9)

387

""")

388

self.check_tokenize('f"""abc"""', """\

389

STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10)

390

""")

391

self.check_tokenize(r'f"abc\

392

def"', """\

393

STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4)

394

""")

395

self.check_tokenize(r'Rf"abc\

396

def"', """\

397

STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)

398

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

399

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

400

def test_function(self):

401

self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

402

NAME 'def' (1, 0) (1, 3)

403

NAME 'd22' (1, 4) (1, 7)

404

OP '(' (1, 7) (1, 8)

405

NAME 'a' (1, 8) (1, 9)

406

OP ',' (1, 9) (1, 10)

407

NAME 'b' (1, 11) (1, 12)

408

OP ',' (1, 12) (1, 13)

409

NAME 'c' (1, 14) (1, 15)

410

OP '=' (1, 15) (1, 16)

411

NUMBER '2' (1, 16) (1, 17)

412

OP ',' (1, 17) (1, 18)

413

NAME 'd' (1, 19) (1, 20)

414

OP '=' (1, 20) (1, 21)

415

NUMBER '2' (1, 21) (1, 22)

416

OP ',' (1, 22) (1, 23)

417

OP '*' (1, 24) (1, 25)

418

NAME 'k' (1, 25) (1, 26)

419

OP ')' (1, 26) (1, 27)

420

OP ':' (1, 27) (1, 28)

421

NAME 'pass' (1, 29) (1, 33)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

422

""")

423

self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

424

NAME 'def' (1, 0) (1, 3)

425

NAME 'd01v_' (1, 4) (1, 9)

426

OP '(' (1, 9) (1, 10)

427

NAME 'a' (1, 10) (1, 11)

428

OP '=' (1, 11) (1, 12)

429

NUMBER '1' (1, 12) (1, 13)

430

OP ',' (1, 13) (1, 14)

431

OP '*' (1, 15) (1, 16)

432

NAME 'k' (1, 16) (1, 17)

433

OP ',' (1, 17) (1, 18)

434

OP '**' (1, 19) (1, 21)

435

NAME 'w' (1, 21) (1, 22)

436

OP ')' (1, 22) (1, 23)

437

OP ':' (1, 23) (1, 24)

438

NAME 'pass' (1, 25) (1, 29)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

439

""")

Jim Fasarakis-Hilliard

d4914e9

2017-03-14 22:16:15 +0200

[diff] [blame]

440

self.check_tokenize("def d23(a: str, b: int=3) -> int: pass", """\

441

NAME 'def' (1, 0) (1, 3)

442

NAME 'd23' (1, 4) (1, 7)

443

OP '(' (1, 7) (1, 8)

444

NAME 'a' (1, 8) (1, 9)

445

OP ':' (1, 9) (1, 10)

446

NAME 'str' (1, 11) (1, 14)

447

OP ',' (1, 14) (1, 15)

448

NAME 'b' (1, 16) (1, 17)

449

OP ':' (1, 17) (1, 18)

450

NAME 'int' (1, 19) (1, 22)

451

OP '=' (1, 22) (1, 23)

452

NUMBER '3' (1, 23) (1, 24)

453

OP ')' (1, 24) (1, 25)

454

OP '->' (1, 26) (1, 28)

455

NAME 'int' (1, 29) (1, 32)

456

OP ':' (1, 32) (1, 33)

457

NAME 'pass' (1, 34) (1, 38)

458

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

459

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

460

def test_comparison(self):

461

# Comparison

462

self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "

463

"1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

464

NAME 'if' (1, 0) (1, 2)

465

NUMBER '1' (1, 3) (1, 4)

466

OP '<' (1, 5) (1, 6)

467

NUMBER '1' (1, 7) (1, 8)

468

OP '>' (1, 9) (1, 10)

469

NUMBER '1' (1, 11) (1, 12)

470

OP '==' (1, 13) (1, 15)

471

NUMBER '1' (1, 16) (1, 17)

472

OP '>=' (1, 18) (1, 20)

473

NUMBER '5' (1, 21) (1, 22)

474

OP '<=' (1, 23) (1, 25)

475

NUMBER '0x15' (1, 26) (1, 30)

476

OP '<=' (1, 31) (1, 33)

477

NUMBER '0x12' (1, 34) (1, 38)

478

OP '!=' (1, 39) (1, 41)

479

NUMBER '1' (1, 42) (1, 43)

480

NAME 'and' (1, 44) (1, 47)

481

NUMBER '5' (1, 48) (1, 49)

482

NAME 'in' (1, 50) (1, 52)

483

NUMBER '1' (1, 53) (1, 54)

484

NAME 'not' (1, 55) (1, 58)

485

NAME 'in' (1, 59) (1, 61)

486

NUMBER '1' (1, 62) (1, 63)

487

NAME 'is' (1, 64) (1, 66)

488

NUMBER '1' (1, 67) (1, 68)

489

NAME 'or' (1, 69) (1, 71)

490

NUMBER '5' (1, 72) (1, 73)

491

NAME 'is' (1, 74) (1, 76)

492

NAME 'not' (1, 77) (1, 80)

493

NUMBER '1' (1, 81) (1, 82)

494

OP ':' (1, 82) (1, 83)

495

NAME 'pass' (1, 84) (1, 88)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

496

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

497

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

498

def test_shift(self):

499

# Shift

500

self.check_tokenize("x = 1 << 1 >> 5", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

501

NAME 'x' (1, 0) (1, 1)

502

OP '=' (1, 2) (1, 3)

503

NUMBER '1' (1, 4) (1, 5)

504

OP '<<' (1, 6) (1, 8)

505

NUMBER '1' (1, 9) (1, 10)

506

OP '>>' (1, 11) (1, 13)

507

NUMBER '5' (1, 14) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

508

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

509

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

510

def test_additive(self):

511

# Additive

512

self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

513

NAME 'x' (1, 0) (1, 1)

514

OP '=' (1, 2) (1, 3)

515

NUMBER '1' (1, 4) (1, 5)

516

OP '-' (1, 6) (1, 7)

517

NAME 'y' (1, 8) (1, 9)

518

OP '+' (1, 10) (1, 11)

519

NUMBER '15' (1, 12) (1, 14)

520

OP '-' (1, 15) (1, 16)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

521

NUMBER '1' (1, 17) (1, 18)

522

OP '+' (1, 19) (1, 20)

523

NUMBER '0x124' (1, 21) (1, 26)

524

OP '+' (1, 27) (1, 28)

525

NAME 'z' (1, 29) (1, 30)

526

OP '+' (1, 31) (1, 32)

527

NAME 'a' (1, 33) (1, 34)

528

OP '[' (1, 34) (1, 35)

529

NUMBER '5' (1, 35) (1, 36)

530

OP ']' (1, 36) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

531

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

532

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

533

def test_multiplicative(self):

534

# Multiplicative

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

535

self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

536

NAME 'x' (1, 0) (1, 1)

537

OP '=' (1, 2) (1, 3)

538

NUMBER '1' (1, 4) (1, 5)

539

OP '//' (1, 5) (1, 7)

540

NUMBER '1' (1, 7) (1, 8)

541

OP '*' (1, 8) (1, 9)

542

NUMBER '1' (1, 9) (1, 10)

543

OP '/' (1, 10) (1, 11)

544

NUMBER '5' (1, 11) (1, 12)

545

OP '*' (1, 12) (1, 13)

546

NUMBER '12' (1, 13) (1, 15)

547

OP '%' (1, 15) (1, 16)

548

NUMBER '0x12' (1, 16) (1, 20)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

549

OP '@' (1, 20) (1, 21)

550

NUMBER '42' (1, 21) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

551

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

552

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

553

def test_unary(self):

554

# Unary

555

self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

556

OP '~' (1, 0) (1, 1)

557

NUMBER '1' (1, 1) (1, 2)

558

OP '^' (1, 3) (1, 4)

559

NUMBER '1' (1, 5) (1, 6)

560

OP '&' (1, 7) (1, 8)

561

NUMBER '1' (1, 9) (1, 10)

562

OP '|' (1, 11) (1, 12)

563

NUMBER '1' (1, 12) (1, 13)

564

OP '^' (1, 14) (1, 15)

565

OP '-' (1, 16) (1, 17)

566

NUMBER '1' (1, 17) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

567

""")

568

self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

569

OP '-' (1, 0) (1, 1)

570

NUMBER '1' (1, 1) (1, 2)

571

OP '*' (1, 2) (1, 3)

572

NUMBER '1' (1, 3) (1, 4)

573

OP '/' (1, 4) (1, 5)

574

NUMBER '1' (1, 5) (1, 6)

575

OP '+' (1, 6) (1, 7)

576

NUMBER '1' (1, 7) (1, 8)

577

OP '*' (1, 8) (1, 9)

578

NUMBER '1' (1, 9) (1, 10)

579

OP '//' (1, 10) (1, 12)

580

NUMBER '1' (1, 12) (1, 13)

581

OP '-' (1, 14) (1, 15)

582

OP '-' (1, 16) (1, 17)

583

OP '-' (1, 17) (1, 18)

584

OP '-' (1, 18) (1, 19)

585

NUMBER '1' (1, 19) (1, 20)

586

OP '**' (1, 20) (1, 22)

587

NUMBER '1' (1, 22) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

588

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

589

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

590

def test_selector(self):

591

# Selector

592

self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

593

NAME 'import' (1, 0) (1, 6)

594

NAME 'sys' (1, 7) (1, 10)

595

OP ',' (1, 10) (1, 11)

596

NAME 'time' (1, 12) (1, 16)

597

NEWLINE '\\n' (1, 16) (1, 17)

598

NAME 'x' (2, 0) (2, 1)

599

OP '=' (2, 2) (2, 3)

600

NAME 'sys' (2, 4) (2, 7)

601

OP '.' (2, 7) (2, 8)

602

NAME 'modules' (2, 8) (2, 15)

603

OP '[' (2, 15) (2, 16)

604

STRING "'time'" (2, 16) (2, 22)

605

OP ']' (2, 22) (2, 23)

606

OP '.' (2, 23) (2, 24)

607

NAME 'time' (2, 24) (2, 28)

608

OP '(' (2, 28) (2, 29)

609

OP ')' (2, 29) (2, 30)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

610

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

611

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

612

def test_method(self):

613

# Methods

614

self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

615

OP '@' (1, 0) (1, 1)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

616

NAME 'staticmethod' (1, 1) (1, 13)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

617

NEWLINE '\\n' (1, 13) (1, 14)

618

NAME 'def' (2, 0) (2, 3)

619

NAME 'foo' (2, 4) (2, 7)

620

OP '(' (2, 7) (2, 8)

621

NAME 'x' (2, 8) (2, 9)

622

OP ',' (2, 9) (2, 10)

623

NAME 'y' (2, 10) (2, 11)

624

OP ')' (2, 11) (2, 12)

625

OP ':' (2, 12) (2, 13)

626

NAME 'pass' (2, 14) (2, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

627

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

628

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

629

def test_tabs(self):

630

# Evil tabs

631

self.check_tokenize("def f():\n"

632

"\tif x\n"

633

" \tpass", """\

Benjamin Peterson

a0dfa82

2009-11-13 02:25:08 +0000

[diff] [blame]

634

NAME 'def' (1, 0) (1, 3)

635

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

640

INDENT '\\t' (2, 0) (2, 1)

641

NAME 'if' (2, 1) (2, 3)

642

NAME 'x' (2, 4) (2, 5)

643

NEWLINE '\\n' (2, 5) (2, 6)

644

INDENT ' \\t' (3, 0) (3, 9)

645

NAME 'pass' (3, 9) (3, 13)

646

DEDENT '' (4, 0) (4, 0)

647

DEDENT '' (4, 0) (4, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

648

""")

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

649

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

650

def test_non_ascii_identifiers(self):

651

# Non-ascii identifiers

652

self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

653

NAME 'Örter' (1, 0) (1, 5)

654

OP '=' (1, 6) (1, 7)

655

STRING "'places'" (1, 8) (1, 16)

656

NEWLINE '\\n' (1, 16) (1, 17)

657

NAME 'grün' (2, 0) (2, 4)

658

OP '=' (2, 5) (2, 6)

659

STRING "'green'" (2, 7) (2, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

660

""")

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

661

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

662

def test_unicode(self):

663

# Legacy unicode literals:

664

self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

665

NAME 'Örter' (1, 0) (1, 5)

666

OP '=' (1, 6) (1, 7)

667

STRING "u'places'" (1, 8) (1, 17)

668

NEWLINE '\\n' (1, 17) (1, 18)

669

NAME 'grün' (2, 0) (2, 4)

670

OP '=' (2, 5) (2, 6)

Christian Heimes

0b3847d

2012-06-20 11:17:58 +0200

[diff] [blame]

671

STRING "U'green'" (2, 7) (2, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

672

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

673

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

674

def test_async(self):

675

# Async/await extension:

676

self.check_tokenize("async = 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

677

NAME 'async' (1, 0) (1, 5)

678

OP '=' (1, 6) (1, 7)

679

NUMBER '1' (1, 8) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

680

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

681

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

682

self.check_tokenize("a = (async = 1)", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

683

NAME 'a' (1, 0) (1, 1)

684

OP '=' (1, 2) (1, 3)

685

OP '(' (1, 4) (1, 5)

686

NAME 'async' (1, 5) (1, 10)

687

OP '=' (1, 11) (1, 12)

688

NUMBER '1' (1, 13) (1, 14)

689

OP ')' (1, 14) (1, 15)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

690

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

691

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

692

self.check_tokenize("async()", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

693

NAME 'async' (1, 0) (1, 5)

694

OP '(' (1, 5) (1, 6)

695

OP ')' (1, 6) (1, 7)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

696

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

697

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

698

self.check_tokenize("class async(Bar):pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

699

NAME 'class' (1, 0) (1, 5)

700

NAME 'async' (1, 6) (1, 11)

701

OP '(' (1, 11) (1, 12)

702

NAME 'Bar' (1, 12) (1, 15)

703

OP ')' (1, 15) (1, 16)

704

OP ':' (1, 16) (1, 17)

705

NAME 'pass' (1, 17) (1, 21)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

706

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

707

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

708

self.check_tokenize("class async:pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

709

NAME 'class' (1, 0) (1, 5)

710

NAME 'async' (1, 6) (1, 11)

711

OP ':' (1, 11) (1, 12)

712

NAME 'pass' (1, 12) (1, 16)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

713

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

714

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

715

self.check_tokenize("await = 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

716

NAME 'await' (1, 0) (1, 5)

717

OP '=' (1, 6) (1, 7)

718

NUMBER '1' (1, 8) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

719

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

720

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

721

self.check_tokenize("foo.async", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

722

NAME 'foo' (1, 0) (1, 3)

723

OP '.' (1, 3) (1, 4)

724

NAME 'async' (1, 4) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

725

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

726

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

727

self.check_tokenize("async for a in b: pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

728

NAME 'async' (1, 0) (1, 5)

729

NAME 'for' (1, 6) (1, 9)

730

NAME 'a' (1, 10) (1, 11)

731

NAME 'in' (1, 12) (1, 14)

732

NAME 'b' (1, 15) (1, 16)

733

OP ':' (1, 16) (1, 17)

734

NAME 'pass' (1, 18) (1, 22)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

735

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

736

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

737

self.check_tokenize("async with a as b: pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

738

NAME 'async' (1, 0) (1, 5)

739

NAME 'with' (1, 6) (1, 10)

740

NAME 'a' (1, 11) (1, 12)

741

NAME 'as' (1, 13) (1, 15)

742

NAME 'b' (1, 16) (1, 17)

743

OP ':' (1, 17) (1, 18)

744

NAME 'pass' (1, 19) (1, 23)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

745

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

746

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

747

self.check_tokenize("async.foo", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

748

NAME 'async' (1, 0) (1, 5)

749

OP '.' (1, 5) (1, 6)

750

NAME 'foo' (1, 6) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

751

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

752

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

753

self.check_tokenize("async", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

754

NAME 'async' (1, 0) (1, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

755

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

756

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

757

self.check_tokenize("async\n#comment\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

758

NAME 'async' (1, 0) (1, 5)

759

NEWLINE '\\n' (1, 5) (1, 6)

760

COMMENT '#comment' (2, 0) (2, 8)

761

NL '\\n' (2, 8) (2, 9)

762

NAME 'await' (3, 0) (3, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

763

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

764

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

765

self.check_tokenize("async\n...\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

766

NAME 'async' (1, 0) (1, 5)

767

NEWLINE '\\n' (1, 5) (1, 6)

768

OP '...' (2, 0) (2, 3)

769

NEWLINE '\\n' (2, 3) (2, 4)

770

NAME 'await' (3, 0) (3, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

771

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

772

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

773

self.check_tokenize("async\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

774

NAME 'async' (1, 0) (1, 5)

775

NEWLINE '\\n' (1, 5) (1, 6)

776

NAME 'await' (2, 0) (2, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

777

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

778

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

779

self.check_tokenize("foo.async + 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

780

NAME 'foo' (1, 0) (1, 3)

781

OP '.' (1, 3) (1, 4)

782

NAME 'async' (1, 4) (1, 9)

783

OP '+' (1, 10) (1, 11)

784

NUMBER '1' (1, 12) (1, 13)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

785

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

786

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

787

self.check_tokenize("async def foo(): pass", """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

788

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

789

NAME 'def' (1, 6) (1, 9)

790

NAME 'foo' (1, 10) (1, 13)

791

OP '(' (1, 13) (1, 14)

792

OP ')' (1, 14) (1, 15)

793

OP ':' (1, 15) (1, 16)

794

NAME 'pass' (1, 17) (1, 21)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

795

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

796

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

797

self.check_tokenize('''\

async def foo():

def foo(await):

await = 1

if 1:

await

async += 1

''', """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

805

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

806

NAME 'def' (1, 6) (1, 9)

807

NAME 'foo' (1, 10) (1, 13)

808

OP '(' (1, 13) (1, 14)

809

OP ')' (1, 14) (1, 15)

810

OP ':' (1, 15) (1, 16)

811

NEWLINE '\\n' (1, 16) (1, 17)

812

INDENT ' ' (2, 0) (2, 2)

813

NAME 'def' (2, 2) (2, 5)

814

NAME 'foo' (2, 6) (2, 9)

815

OP '(' (2, 9) (2, 10)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

816

NAME 'await' (2, 10) (2, 15)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

817

OP ')' (2, 15) (2, 16)

818

OP ':' (2, 16) (2, 17)

819

NEWLINE '\\n' (2, 17) (2, 18)

820

INDENT ' ' (3, 0) (3, 4)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

821

NAME 'await' (3, 4) (3, 9)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

822

OP '=' (3, 10) (3, 11)

823

NUMBER '1' (3, 12) (3, 13)

824

NEWLINE '\\n' (3, 13) (3, 14)

825

DEDENT '' (4, 2) (4, 2)

826

NAME 'if' (4, 2) (4, 4)

827

NUMBER '1' (4, 5) (4, 6)

828

OP ':' (4, 6) (4, 7)

829

NEWLINE '\\n' (4, 7) (4, 8)

830

INDENT ' ' (5, 0) (5, 4)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

831

NAME 'await' (5, 4) (5, 9)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

832

NEWLINE '\\n' (5, 9) (5, 10)

833

DEDENT '' (6, 0) (6, 0)

834

DEDENT '' (6, 0) (6, 0)

835

NAME 'async' (6, 0) (6, 5)

836

OP '+=' (6, 6) (6, 8)

837

NUMBER '1' (6, 9) (6, 10)

838

NEWLINE '\\n' (6, 10) (6, 11)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

839

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

840

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

841

self.check_tokenize('''\

842

async def foo():

843

async for i in 1: pass''', """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

844

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

845

NAME 'def' (1, 6) (1, 9)

846

NAME 'foo' (1, 10) (1, 13)

847

OP '(' (1, 13) (1, 14)

848

OP ')' (1, 14) (1, 15)

849

OP ':' (1, 15) (1, 16)

850

NEWLINE '\\n' (1, 16) (1, 17)

851

INDENT ' ' (2, 0) (2, 2)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

852

NAME 'async' (2, 2) (2, 7)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

853

NAME 'for' (2, 8) (2, 11)

854

NAME 'i' (2, 12) (2, 13)

855

NAME 'in' (2, 14) (2, 16)

856

NUMBER '1' (2, 17) (2, 18)

857

OP ':' (2, 18) (2, 19)

858

NAME 'pass' (2, 20) (2, 24)

859

DEDENT '' (3, 0) (3, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

860

""")

Yury Selivanov

8fb307c

2015-07-22 13:33:45 +0300

[diff] [blame]

861

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

862

self.check_tokenize('''async def foo(async): await''', """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

863

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

8fb307c

2015-07-22 13:33:45 +0300

[diff] [blame]

864

NAME 'def' (1, 6) (1, 9)

865

NAME 'foo' (1, 10) (1, 13)

866

OP '(' (1, 13) (1, 14)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

867

NAME 'async' (1, 14) (1, 19)

Yury Selivanov

8fb307c

2015-07-22 13:33:45 +0300

[diff] [blame]

868

OP ')' (1, 19) (1, 20)

869

OP ':' (1, 20) (1, 21)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

870

NAME 'await' (1, 22) (1, 27)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

871

""")

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

872

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

873

self.check_tokenize('''\

def f():

def baz(): pass

async def bar(): pass

878

879

await = 2''', """\

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

880

NAME 'def' (1, 0) (1, 3)

881

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

886

NL '\\n' (2, 0) (2, 1)

887

INDENT ' ' (3, 0) (3, 2)

888

NAME 'def' (3, 2) (3, 5)

889

NAME 'baz' (3, 6) (3, 9)

890

OP '(' (3, 9) (3, 10)

891

OP ')' (3, 10) (3, 11)

892

OP ':' (3, 11) (3, 12)

893

NAME 'pass' (3, 13) (3, 17)

894

NEWLINE '\\n' (3, 17) (3, 18)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

895

NAME 'async' (4, 2) (4, 7)

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

896

NAME 'def' (4, 8) (4, 11)

897

NAME 'bar' (4, 12) (4, 15)

898

OP '(' (4, 15) (4, 16)

899

OP ')' (4, 16) (4, 17)

900

OP ':' (4, 17) (4, 18)

901

NAME 'pass' (4, 19) (4, 23)

902

NEWLINE '\\n' (4, 23) (4, 24)

903

NL '\\n' (5, 0) (5, 1)

904

NAME 'await' (6, 2) (6, 7)

905

OP '=' (6, 8) (6, 9)

906

NUMBER '2' (6, 10) (6, 11)

907

DEDENT '' (7, 0) (7, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

908

""")

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

909

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

910

self.check_tokenize('''\

async def f():

def baz(): pass

async def bar(): pass

915

916

await = 2''', """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

917

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

918

NAME 'def' (1, 6) (1, 9)

919

NAME 'f' (1, 10) (1, 11)

920

OP '(' (1, 11) (1, 12)

921

OP ')' (1, 12) (1, 13)

922

OP ':' (1, 13) (1, 14)

923

NEWLINE '\\n' (1, 14) (1, 15)

924

NL '\\n' (2, 0) (2, 1)

925

INDENT ' ' (3, 0) (3, 2)

926

NAME 'def' (3, 2) (3, 5)

927

NAME 'baz' (3, 6) (3, 9)

928

OP '(' (3, 9) (3, 10)

929

OP ')' (3, 10) (3, 11)

930

OP ':' (3, 11) (3, 12)

931

NAME 'pass' (3, 13) (3, 17)

932

NEWLINE '\\n' (3, 17) (3, 18)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

933

NAME 'async' (4, 2) (4, 7)

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

934

NAME 'def' (4, 8) (4, 11)

935

NAME 'bar' (4, 12) (4, 15)

936

OP '(' (4, 15) (4, 16)

937

OP ')' (4, 16) (4, 17)

938

OP ':' (4, 17) (4, 18)

939

NAME 'pass' (4, 19) (4, 23)

940

NEWLINE '\\n' (4, 23) (4, 24)

941

NL '\\n' (5, 0) (5, 1)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

942

NAME 'await' (6, 2) (6, 7)

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

943

OP '=' (6, 8) (6, 9)

944

NUMBER '2' (6, 10) (6, 11)

945

DEDENT '' (7, 0) (7, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

946

""")

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

947

Thomas Kluyver

2018-06-05 19:26:39 +0200

[diff] [blame]

948

class GenerateTokensTest(TokenizeTest):

949

def check_tokenize(self, s, expected):

950

# Format the tokens in s in a table format.

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame]

951

# The ENDMARKER and final NEWLINE are omitted.

Thomas Kluyver

2018-06-05 19:26:39 +0200

[diff] [blame]

952

f = StringIO(s)

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame]

953

result = stringify_tokens_from_source(generate_tokens(f.readline), s)

Thomas Kluyver

2018-06-05 19:26:39 +0200

[diff] [blame]

954

self.assertEqual(result, expected.rstrip().splitlines())

955

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

956

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

957

def decistmt(s):

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

958

result = []

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

959

g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

960

for toknum, tokval, _, _, _ in g:

961

if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens

result.extend([

(NAME, 'Decimal'),

(OP, '('),

(STRING, repr(tokval)),

(OP, ')')

])

else:

result.append((toknum, tokval))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

970

return untokenize(result).decode('utf-8')

971

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

972

class TestMisc(TestCase):

973

974

def test_decistmt(self):

975

# Substitute Decimals for floats in a string of statements.

976

# This is an example from the docs.

977

978

from decimal import Decimal

979

s = '+21.3e-5*-.1234/81.7'

980

self.assertEqual(decistmt(s),

981

"+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")

982

983

# The format of the exponent is inherited from the platform C library.

984

# Known cases are "e-007" (Windows) and "e-07" (not Windows). Since

985

# we're only showing 11 digits, and the 12th isn't close to 5, the

986

# rest of the output should be platform-independent.

987

self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')

988

989

# Output from calculations with Decimal should be identical across all

990

# platforms.

991

self.assertEqual(eval(decistmt(s)),

992

Decimal('-3.217160342717258261933904529E-7'))

993

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

994

995

class TestTokenizerAdheresToPep0263(TestCase):

996

"""

997

Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.

998

"""

999

1000

def _testFile(self, filename):

1001

path = os.path.join(os.path.dirname(__file__), filename)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1002

TestRoundtrip.check_roundtrip(self, open(path, 'rb'))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1003

1004

def test_utf8_coding_cookie_and_no_utf8_bom(self):

Ned Deily

2ea6fcc

2011-07-19 16:15:27 -0700

[diff] [blame]

1005

f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1006

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1007

1008

def test_latin1_coding_cookie_and_utf8_bom(self):

1009

"""

1010

As per PEP 0263, if a file starts with a utf-8 BOM signature, the only

1011

allowed encoding for the comment is 'utf-8'. The text file used in

1012

this test starts with a BOM signature, but specifies latin1 as the

1013

coding, so verify that a SyntaxError is raised, which matches the

1014

behaviour of the interpreter when it encounters a similar condition.

1015

"""

1016

f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'

Benjamin Peterson

c9c0f20

2009-06-30 23:06:06 +0000

[diff] [blame]

1017

self.assertRaises(SyntaxError, self._testFile, f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1018

1019

def test_no_coding_cookie_and_utf8_bom(self):

1020

f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1021

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1022

1023

def test_utf8_coding_cookie_and_utf8_bom(self):

1024

f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1025

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1026

Florent Xicluna

11f0b41

2012-07-07 12:13:35 +0200

[diff] [blame]

1027

def test_bad_coding_cookie(self):

1028

self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')

1029

self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')

1030

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1031

1032

class Test_Tokenize(TestCase):

1033

1034

def test__tokenize_decodes_with_specified_encoding(self):

1035

literal = '"ЉЊЈЁЂ"'

1036

line = literal.encode('utf-8')

first = False

def readline():

nonlocal first

if not first:

first = True

return line

else:

return b''

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame]

1046

# skip the initial encoding token and the end tokens

1047

tokens = list(_tokenize(readline, encoding='utf-8'))[1:-2]

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1048

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1049

self.assertEqual(tokens, expected_tokens,

1050

"bytes not decoded with encoding")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1051

1052

def test__tokenize_does_not_decode_with_encoding_none(self):

literal = '"ЉЊЈЁЂ"'

first = False

def readline():

nonlocal first

if not first:

first = True

return literal

else:

return b''

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame]

1063

# skip the end tokens

1064

tokens = list(_tokenize(readline, encoding=None))[:-2]

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1065

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1066

self.assertEqual(tokens, expected_tokens,

1067

"string not tokenized when encoding is None")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1068

1069

1070

class TestDetectEncoding(TestCase):

1071

1072

def get_readline(self, lines):

index = 0

def readline():

nonlocal index

if index == len(lines):

raise StopIteration

line = lines[index]

index += 1

return line

return readline

def test_no_bom_no_encoding_cookie(self):

1084

lines = (

1085

b'# something\n',

1086

b'print(something)\n',

1087

b'do_something(else)\n'

1088

)

1089

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1090

self.assertEqual(encoding, 'utf-8')

1091

self.assertEqual(consumed_lines, list(lines[:2]))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1092

1093

def test_bom_no_cookie(self):

1094

lines = (

1095

b'\xef\xbb\xbf# something\n',

1096

b'print(something)\n',

1097

b'do_something(else)\n'

1098

)

1099

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1100

self.assertEqual(encoding, 'utf-8-sig')

1101

self.assertEqual(consumed_lines,

1102

[b'# something\n', b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1103

1104

def test_cookie_first_line_no_bom(self):

1105

lines = (

1106

b'# -*- coding: latin-1 -*-\n',

1107

b'print(something)\n',

1108

b'do_something(else)\n'

1109

)

1110

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1111

self.assertEqual(encoding, 'iso-8859-1')

1112

self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1113

1114

def test_matched_bom_and_cookie_first_line(self):

1115

lines = (

1116

b'\xef\xbb\xbf# coding=utf-8\n',

1117

b'print(something)\n',

1118

b'do_something(else)\n'

1119

)

1120

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1121

self.assertEqual(encoding, 'utf-8-sig')

1122

self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1123

1124

def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):

1125

lines = (

1126

b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',

1127

b'print(something)\n',

1128

b'do_something(else)\n'

1129

)

1130

readline = self.get_readline(lines)

1131

self.assertRaises(SyntaxError, detect_encoding, readline)

1132

1133

def test_cookie_second_line_no_bom(self):

1134

lines = (

1135

b'#! something\n',

1136

b'# vim: set fileencoding=ascii :\n',

1137

b'print(something)\n',

1138

b'do_something(else)\n'

1139

)

1140

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1141

self.assertEqual(encoding, 'ascii')

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1142

expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1143

self.assertEqual(consumed_lines, expected)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1144

1145

def test_matched_bom_and_cookie_second_line(self):

1146

lines = (

1147

b'\xef\xbb\xbf#! something\n',

1148

b'f# coding=utf-8\n',

1149

b'print(something)\n',

1150

b'do_something(else)\n'

1151

)

1152

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1153

self.assertEqual(encoding, 'utf-8-sig')

1154

self.assertEqual(consumed_lines,

1155

[b'#! something\n', b'f# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1156

1157

def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):

1158

lines = (

1159

b'\xef\xbb\xbf#! something\n',

1160

b'# vim: set fileencoding=ascii :\n',

1161

b'print(something)\n',

1162

b'do_something(else)\n'

1163

)

1164

readline = self.get_readline(lines)

1165

self.assertRaises(SyntaxError, detect_encoding, readline)

1166

Serhiy Storchaka

768c16c

2014-01-09 18:36:09 +0200

[diff] [blame]

1167

def test_cookie_second_line_noncommented_first_line(self):

1168

lines = (

1169

b"print('\xc2\xa3')\n",

1170

b'# vim: set fileencoding=iso8859-15 :\n',

1171

b"print('\xe2\x82\xac')\n"

1172

)

1173

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1174

self.assertEqual(encoding, 'utf-8')

1175

expected = [b"print('\xc2\xa3')\n"]

1176

self.assertEqual(consumed_lines, expected)

1177

1178

def test_cookie_second_line_commented_first_line(self):

1179

lines = (

1180

b"#print('\xc2\xa3')\n",

1181

b'# vim: set fileencoding=iso8859-15 :\n',

1182

b"print('\xe2\x82\xac')\n"

1183

)

1184

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1185

self.assertEqual(encoding, 'iso8859-15')

1186

expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']

1187

self.assertEqual(consumed_lines, expected)

1188

1189

def test_cookie_second_line_empty_first_line(self):

1190

lines = (

1191

b'\n',

1192

b'# vim: set fileencoding=iso8859-15 :\n',

1193

b"print('\xe2\x82\xac')\n"

1194

)

1195

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1196

self.assertEqual(encoding, 'iso8859-15')

1197

expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']

1198

self.assertEqual(consumed_lines, expected)

1199

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1200

def test_latin1_normalization(self):

1201

# See get_normal_name() in tokenizer.c.

1202

encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",

1203

"iso-8859-1-unix", "iso-latin-1-mac")

1204

for encoding in encodings:

1205

for rep in ("-", "_"):

1206

enc = encoding.replace("-", rep)

1207

lines = (b"#!/usr/bin/python\n",

1208

b"# coding: " + enc.encode("ascii") + b"\n",

1209

b"print(things)\n",

1210

b"do_something += 4\n")

1211

rl = self.get_readline(lines)

1212

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1213

self.assertEqual(found, "iso-8859-1")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1214

Martin v. Löwis

63674f4

2012-04-20 14:36:47 +0200

[diff] [blame]

1215

def test_syntaxerror_latin1(self):

1216

# Issue 14629: need to raise SyntaxError if the first

1217

# line(s) have non-UTF-8 characters

1218

lines = (

1219

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1220

)

1221

readline = self.get_readline(lines)

1222

self.assertRaises(SyntaxError, detect_encoding, readline)

1223

1224

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1225

def test_utf8_normalization(self):

1226

# See get_normal_name() in tokenizer.c.

1227

encodings = ("utf-8", "utf-8-mac", "utf-8-unix")

1228

for encoding in encodings:

1229

for rep in ("-", "_"):

1230

enc = encoding.replace("-", rep)

1231

lines = (b"#!/usr/bin/python\n",

1232

b"# coding: " + enc.encode("ascii") + b"\n",

1233

b"1 + 3\n")

1234

rl = self.get_readline(lines)

1235

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1236

self.assertEqual(found, "utf-8")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1237

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1238

def test_short_files(self):

1239

readline = self.get_readline((b'print(something)\n',))

1240

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1241

self.assertEqual(encoding, 'utf-8')

1242

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1243

1244

encoding, consumed_lines = detect_encoding(self.get_readline(()))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1245

self.assertEqual(encoding, 'utf-8')

1246

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1247

1248

readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))

1249

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1250

self.assertEqual(encoding, 'utf-8-sig')

1251

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1252

1253

readline = self.get_readline((b'\xef\xbb\xbf',))

1254

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1255

self.assertEqual(encoding, 'utf-8-sig')

1256

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1257

Benjamin Peterson

433f32c

2008-12-12 01:25:05 +0000

[diff] [blame]

1258

readline = self.get_readline((b'# coding: bad\n',))

1259

self.assertRaises(SyntaxError, detect_encoding, readline)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1260

Serhiy Storchaka

dafea85

2013-09-16 23:51:56 +0300

[diff] [blame]

1261

def test_false_encoding(self):

1262

# Issue 18873: "Encoding" detected in non-comment lines

1263

readline = self.get_readline((b'print("#coding=fake")',))

1264

encoding, consumed_lines = detect_encoding(readline)

1265

self.assertEqual(encoding, 'utf-8')

1266

self.assertEqual(consumed_lines, [b'print("#coding=fake")'])

1267

Victor Stinner

2010-11-09 01:08:59 +0000

[diff] [blame]

1268

def test_open(self):

Hai Shi

4660597

2020-08-04 00:49:18 +0800

[diff] [blame^]

1269

filename = os_helper.TESTFN + '.py'

1270

self.addCleanup(os_helper.unlink, filename)

Victor Stinner

2010-11-09 01:08:59 +0000

[diff] [blame]

1271

1272

# test coding cookie

1273

for encoding in ('iso-8859-15', 'utf-8'):

1274

with open(filename, 'w', encoding=encoding) as fp:

1275

print("# coding: %s" % encoding, file=fp)

1276

print("print('euro:\u20ac')", file=fp)

1277

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1278

self.assertEqual(fp.encoding, encoding)

1279

self.assertEqual(fp.mode, 'r')

Victor Stinner

2010-11-09 01:08:59 +0000

[diff] [blame]

1280

1281

# test BOM (no coding cookie)

1282

with open(filename, 'w', encoding='utf-8-sig') as fp:

1283

print("print('euro:\u20ac')", file=fp)

1284

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1285

self.assertEqual(fp.encoding, 'utf-8-sig')

1286

self.assertEqual(fp.mode, 'r')

Victor Stinner

2010-11-09 01:08:59 +0000

[diff] [blame]

1287

Brett Cannon

c33f3f2

2012-04-20 13:23:54 -0400

[diff] [blame]

1288

def test_filename_in_exception(self):

1289

# When possible, include the file name in the exception.

1290

path = 'some_file_path'

1291

lines = (

1292

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1293

)

1294

class Bunk:

1295

def __init__(self, lines, path):

self.name = path

self._lines = lines

self._index = 0

def readline(self):

if self._index == len(lines):

1302

raise StopIteration

1303

line = lines[self._index]

self._index += 1

return line

with self.assertRaises(SyntaxError):

1308

ins = Bunk(lines, path)

1309

# Make sure lacking a name isn't an issue.

1310

del ins.name

1311

detect_encoding(ins.readline)

1312

with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):

1313

ins = Bunk(lines, path)

1314

detect_encoding(ins.readline)

1315

Victor Stinner

387729e

2015-05-26 00:43:58 +0200

[diff] [blame]

1316

def test_open_error(self):

1317

# Issue #23840: open() must close the binary file on error

1318

m = BytesIO(b'#coding:xxx')

1319

with mock.patch('tokenize._builtin_open', return_value=m):

1320

self.assertRaises(SyntaxError, tokenize_open, 'foobar')

1321

self.assertTrue(m.closed)

1322

1323

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1324

class TestTokenize(TestCase):

1325

1326

def test_tokenize(self):

1327

import tokenize as tokenize_module

1328

encoding = object()

1329

encoding_used = None

1330

def mock_detect_encoding(readline):

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1331

return encoding, [b'first', b'second']

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1332

1333

def mock__tokenize(readline, encoding):

1334

nonlocal encoding_used

1335

encoding_used = encoding

1336

out = []

1337

while True:

1338

next_line = readline()

1339

if next_line:

1340

out.append(next_line)

continue

return out

counter = 0

def mock_readline():

nonlocal counter

counter += 1

if counter == 5:

return b''

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1350

return str(counter).encode()

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1351

1352

orig_detect_encoding = tokenize_module.detect_encoding

1353

orig__tokenize = tokenize_module._tokenize

1354

tokenize_module.detect_encoding = mock_detect_encoding

1355

tokenize_module._tokenize = mock__tokenize

1356

try:

1357

results = tokenize(mock_readline)

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1358

self.assertEqual(list(results),

1359

[b'first', b'second', b'1', b'2', b'3', b'4'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1360

finally:

1361

tokenize_module.detect_encoding = orig_detect_encoding

1362

tokenize_module._tokenize = orig__tokenize

1363

Sergey Fedoseev

b796e7d

2018-07-09 20:25:55 +0500

[diff] [blame]

1364

self.assertEqual(encoding_used, encoding)

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

1365

Yury Selivanov

8085b80

2015-05-18 12:50:52 -0400

[diff] [blame]

1366

def test_oneline_defs(self):

1367

buf = []

1368

for i in range(500):

1369

buf.append('def i{i}(): return {i}'.format(i=i))

buf.append('OK')

buf = '\n'.join(buf)

# Test that 500 consequent, one-line defs is OK

1374

toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame]

1375

self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER

1376

# [-2] is always NEWLINE

Yury Selivanov

8085b80

2015-05-18 12:50:52 -0400

[diff] [blame]

1377

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1378

def assertExactTypeEqual(self, opstr, *optypes):

1379

tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))

1380

num_optypes = len(optypes)

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame]

1381

self.assertEqual(len(tokens), 3 + num_optypes)

Albert-Jan Nijburg

fc354f0

2017-05-31 15:00:21 +0100

[diff] [blame]

1382

self.assertEqual(tok_name[tokens[0].exact_type],

1383

tok_name[ENCODING])

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1384

for i in range(num_optypes):

Albert-Jan Nijburg

fc354f0

2017-05-31 15:00:21 +0100

[diff] [blame]

1385

self.assertEqual(tok_name[tokens[i + 1].exact_type],

1386

tok_name[optypes[i]])

1387

self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type],

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame]

1388

tok_name[token.NEWLINE])

1389

self.assertEqual(tok_name[tokens[2 + num_optypes].exact_type],

Albert-Jan Nijburg

fc354f0

2017-05-31 15:00:21 +0100

[diff] [blame]

1390

tok_name[token.ENDMARKER])

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1391

1392

def test_exact_type(self):

1393

self.assertExactTypeEqual('()', token.LPAR, token.RPAR)

1394

self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)

1395

self.assertExactTypeEqual(':', token.COLON)

1396

self.assertExactTypeEqual(',', token.COMMA)

1397

self.assertExactTypeEqual(';', token.SEMI)

1398

self.assertExactTypeEqual('+', token.PLUS)

1399

self.assertExactTypeEqual('-', token.MINUS)

1400

self.assertExactTypeEqual('*', token.STAR)

1401

self.assertExactTypeEqual('/', token.SLASH)

1402

self.assertExactTypeEqual('|', token.VBAR)

1403

self.assertExactTypeEqual('&', token.AMPER)

1404

self.assertExactTypeEqual('<', token.LESS)

1405

self.assertExactTypeEqual('>', token.GREATER)

1406

self.assertExactTypeEqual('=', token.EQUAL)

1407

self.assertExactTypeEqual('.', token.DOT)

1408

self.assertExactTypeEqual('%', token.PERCENT)

1409

self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)

1410

self.assertExactTypeEqual('==', token.EQEQUAL)

1411

self.assertExactTypeEqual('!=', token.NOTEQUAL)

1412

self.assertExactTypeEqual('<=', token.LESSEQUAL)

1413

self.assertExactTypeEqual('>=', token.GREATEREQUAL)

1414

self.assertExactTypeEqual('~', token.TILDE)

1415

self.assertExactTypeEqual('^', token.CIRCUMFLEX)

1416

self.assertExactTypeEqual('<<', token.LEFTSHIFT)

1417

self.assertExactTypeEqual('>>', token.RIGHTSHIFT)

1418

self.assertExactTypeEqual('**', token.DOUBLESTAR)

1419

self.assertExactTypeEqual('+=', token.PLUSEQUAL)

1420

self.assertExactTypeEqual('-=', token.MINEQUAL)

1421

self.assertExactTypeEqual('*=', token.STAREQUAL)

1422

self.assertExactTypeEqual('/=', token.SLASHEQUAL)

1423

self.assertExactTypeEqual('%=', token.PERCENTEQUAL)

1424

self.assertExactTypeEqual('&=', token.AMPEREQUAL)

1425

self.assertExactTypeEqual('|=', token.VBAREQUAL)

1426

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1427

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1428

self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)

1429

self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)

1430

self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)

1431

self.assertExactTypeEqual('//', token.DOUBLESLASH)

1432

self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)

Emily Morehouse

8f59ee0

2019-01-24 16:49:56 -0700

[diff] [blame]

1433

self.assertExactTypeEqual(':=', token.COLONEQUAL)

Jim Fasarakis-Hilliard

d4914e9

2017-03-14 22:16:15 +0200

[diff] [blame]

1434

self.assertExactTypeEqual('...', token.ELLIPSIS)

1435

self.assertExactTypeEqual('->', token.RARROW)

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1436

self.assertExactTypeEqual('@', token.AT)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

1437

self.assertExactTypeEqual('@=', token.ATEQUAL)

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1438

1439

self.assertExactTypeEqual('a**2+b**2==c**2',

1440

NAME, token.DOUBLESTAR, NUMBER,

1441

token.PLUS,

1442

NAME, token.DOUBLESTAR, NUMBER,

1443

token.EQEQUAL,

1444

NAME, token.DOUBLESTAR, NUMBER)

1445

self.assertExactTypeEqual('{1, 2, 3}',

1446

token.LBRACE,

1447

token.NUMBER, token.COMMA,

1448

token.NUMBER, token.COMMA,

1449

token.NUMBER,

1450

token.RBRACE)

1451

self.assertExactTypeEqual('^(x & 0x1)',

1452

token.CIRCUMFLEX,

1453

token.LPAR,

1454

token.NAME, token.AMPER, token.NUMBER,

1455

token.RPAR)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1456

Ezio Melotti

fafa8b7

2012-11-03 17:46:51 +0200

[diff] [blame]

1457

def test_pathological_trailing_whitespace(self):

1458

# See http://bugs.python.org/issue16152

1459

self.assertExactTypeEqual('@ ', token.AT)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1460

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1461

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1462

class UntokenizeTest(TestCase):

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1463

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1464

def test_bad_input_order(self):

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1465

# raise if previous row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

u = Untokenizer()

u.prev_row = 2

u.prev_col = 2

with self.assertRaises(ValueError) as cm:

1470

u.add_whitespace((1,3))

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1471

self.assertEqual(cm.exception.args[0],

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1472

'start (1,3) precedes previous end (2,2)')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1473

# raise if previous column in row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1474

self.assertRaises(ValueError, u.add_whitespace, (2,1))

1475

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1476

def test_backslash_continuation(self):

1477

# The problem is that <whitespace>\<newline> leaves no token

u = Untokenizer()

u.prev_row = 1

u.prev_col = 1

u.tokens = []

u.add_whitespace((2, 0))

1483

self.assertEqual(u.tokens, ['\\\n'])

1484

u.prev_row = 2

1485

u.add_whitespace((4, 4))

1486

self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1487

TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1488

Terry Jan Reedy

5b8d2c3

2014-02-17 23:12:16 -0500

[diff] [blame]

1489

def test_iter_compat(self):

1490

u = Untokenizer()

1491

token = (NAME, 'Hello')

1492

tokens = [(ENCODING, 'utf-8'), token]

1493

u.compat(token, iter([]))

1494

self.assertEqual(u.tokens, ["Hello "])

1495

u = Untokenizer()

1496

self.assertEqual(u.untokenize(iter([token])), 'Hello ')

1497

u = Untokenizer()

1498

self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')

1499

self.assertEqual(u.encoding, 'utf-8')

1500

self.assertEqual(untokenize(iter(tokens)), b'Hello ')

1501

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1502

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1503

class TestRoundtrip(TestCase):

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1504

1505

def check_roundtrip(self, f):

1506

"""

1507

Test roundtrip for `untokenize`. `f` is an open file or a string.

1508

The source code in f is tokenized to both 5- and 2-tuples.

1509

Both sequences are converted back to source code via

1510

tokenize.untokenize(), and the latter tokenized again to 2-tuples.

1511

The test fails if the 3 pair tokenizations do not match.

1512

1513

When untokenize bugs are fixed, untokenize with 5-tuples should

1514

reproduce code that does not contain a backslash continuation

1515

following spaces. A proper test should test this.

1516

"""

1517

# Get source code and original tokenizations

1518

if isinstance(f, str):

1519

code = f.encode('utf-8')

else:

code = f.read()

f.close()

readline = iter(code.splitlines(keepends=True)).__next__

1524

tokens5 = list(tokenize(readline))

1525

tokens2 = [tok[:2] for tok in tokens5]

1526

# Reproduce tokens2 from pairs

1527

bytes_from2 = untokenize(tokens2)

1528

readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__

1529

tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]

1530

self.assertEqual(tokens2_from2, tokens2)

1531

# Reproduce tokens2 from 5-tuples

1532

bytes_from5 = untokenize(tokens5)

1533

readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__

1534

tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]

1535

self.assertEqual(tokens2_from5, tokens2)

1536

1537

def test_roundtrip(self):

1538

# There are some standard formatting practices that are easy to get right.

1539

1540

self.check_roundtrip("if x == 1:\n"

1541

" print(x)\n")

1542

self.check_roundtrip("# This is a comment\n"

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame]

1543

"# This also\n")

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1544

1545

# Some people use different formatting conventions, which makes

1546

# untokenize a little trickier. Note that this test involves trailing

1547

# whitespace after the colon. Note that we use hex escapes to make the

1548

# two trailing blanks apparent in the expected output.

1549

1550

self.check_roundtrip("if x == 1 : \n"

1551

" print(x)\n")

1552

fn = support.findfile("tokenize_tests.txt")

1553

with open(fn, 'rb') as f:

1554

self.check_roundtrip(f)

1555

self.check_roundtrip("if x == 1:\n"

1556

" # A comment by itself.\n"

1557

" print(x) # Comment here, too.\n"

1558

" # Another comment.\n"

1559

"after_if = True\n")

1560

self.check_roundtrip("if (x # The comments need to go in the right place\n"

1561

" == 1):\n"

1562

" print('x==1')\n")

1563

self.check_roundtrip("class Test: # A comment here\n"

1564

" # A comment with weird indent\n"

1565

" after_com = 5\n"

1566

" def x(m): return m*5 # a one liner\n"

1567

" def y(m): # A whitespace after the colon\n"

1568

" return y*4 # 3-space indent\n")

1569

1570

# Some error-handling code

1571

self.check_roundtrip("try: import somemodule\n"

1572

"except ImportError: # comment\n"

1573

" print('Can not import' # comment2\n)"

1574

"else: print('Loaded')\n")

1575

1576

def test_continuation(self):

1577

# Balancing continuation

1578

self.check_roundtrip("a = (3,4, \n"

"5,6)\n"

"y = [3, 4,\n"

"5]\n"

"z = {'a': 5,\n"

"'b':15, 'c':True}\n"

1584

"x = len(y) + 5 - a[\n"

"3] - a[2]\n"

"+ len(z) - z[\n"

"'b']\n")

def test_backslash_continuation(self):

1590

# Backslash means line continuation, except for comments

1591

self.check_roundtrip("x=1+\\\n"

1592

"1\n"

1593

"# This is a comment\\\n"

1594

"# This also\n")

1595

self.check_roundtrip("# Comment \\\n"

1596

"x = 0")

1597

1598

def test_string_concatenation(self):

1599

# Two string literals on the same line

1600

self.check_roundtrip("'' ''")

1601

1602

def test_random_files(self):

1603

# Test roundtrip on random python modules.

1604

# pass the '-ucpu' option to process the full directory.

1605

1606

import glob, random

1607

fn = support.findfile("tokenize_tests.txt")

1608

tempdir = os.path.dirname(fn) or os.curdir

Serhiy Storchaka

9355868

2020-06-20 11:10:31 +0300

[diff] [blame]

1609

testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py"))

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1610

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

1611

# Tokenize is broken on test_pep3131.py because regular expressions are

1612

# broken on the obscure unicode identifiers in it. *sigh*

1613

# With roundtrip extended to test the 5-tuple mode of untokenize,

1614

# 7 more testfiles fail. Remove them also until the failure is diagnosed.

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1615

Zachary Ware

724f6a6

2016-09-09 12:55:37 -0700

[diff] [blame]

1616

testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1617

for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):

1618

testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)

1619

1620

if not support.is_resource_enabled("cpu"):

1621

testfiles = random.sample(testfiles, 10)

1622

1623

for testfile in testfiles:

Serhiy Storchaka

8ac6581

2018-12-22 11:18:40 +0200

[diff] [blame]

1624

if support.verbose >= 2:

1625

print('tokenize', testfile)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1626

with open(testfile, 'rb') as f:

1627

with self.subTest(file=testfile):

1628

self.check_roundtrip(f)

1629

1630

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1631

def roundtrip(self, code):

1632

if isinstance(code, str):

1633

code = code.encode('utf-8')

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1634

return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1635

1636

def test_indentation_semantics_retained(self):

1637

"""

1638

Ensure that although whitespace might be mutated in a roundtrip,

1639

the semantic meaning of the indentation remains consistent.

1640

"""

1641

code = "if False:\n\tx=3\n\tx=3\n"

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1642

codelines = self.roundtrip(code).split('\n')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1643

self.assertEqual(codelines[1], codelines[2])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1644

self.check_roundtrip(code)

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1645

1646

Thomas Wouters

49fd7fa

2006-04-21 10:40:58 +0000

[diff] [blame]

1647

if __name__ == "__main__":

Brett Cannon