Blame - Lib/test/test_tokenize.py - platform/external/python/cpython3

2015-10-06 18:16:28 +0300

[diff] [blame]

1

from test import support

2

from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,

3

STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame^]

4

open as tokenize_open, Untokenizer, generate_tokens,

5

NEWLINE)

Thomas Kluyver

2018-06-05 19:26:39 +0200

[diff] [blame]

6

from io import BytesIO, StringIO

Stéphane Wirtel

90addd6

2017-07-25 15:33:53 +0200

[diff] [blame]

7

import unittest

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

8

from unittest import TestCase, mock

9

from test.test_grammar import (VALID_UNDERSCORE_LITERALS,

10

INVALID_UNDERSCORE_LITERALS)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

11

import os

12

import token

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

13

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

14

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame^]

15

# Converts a source string into a list of textual representation

16

# of the tokens such as:

17

# ` NAME 'if' (1, 0) (1, 2)`

18

# to make writing tests easier.

19

def stringify_tokens_from_source(token_generator, source_string):

20

result = []

21

num_lines = len(source_string.splitlines())

22

missing_trailing_nl = source_string[-1] not in '\r\n'

23

24

for type, token, start, end, line in token_generator:

25

if type == ENDMARKER:

26

break

27

# Ignore the new line on the last line if the input lacks one

28

if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:

29

continue

30

type = tok_name[type]

31

result.append(f" {type:10} {token!r:13} {start} {end}")

return result

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

35

class TokenizeTest(TestCase):

36

# Tests for the tokenize module.

Jason R. Coombs

7cf3638

2015-06-20 19:13:50 -0400

[diff] [blame]

37

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

38

# The tests can be really simple. Given a small fragment of source

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame^]

39

# code, print out a table with tokens. The ENDMARKER, ENCODING and

40

# final NEWLINE are omitted for brevity.

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

41

42

def check_tokenize(self, s, expected):

43

# Format the tokens in s in a table format.

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame^]

44

# The ENDMARKER and final NEWLINE are omitted.

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

45

f = BytesIO(s.encode('utf-8'))

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame^]

46

result = stringify_tokens_from_source(tokenize(f.readline), s)

47

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

48

self.assertEqual(result,

49

[" ENCODING 'utf-8' (0, 0) (0, 0)"] +

50

expected.rstrip().splitlines())

51

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame^]

52

def test_implicit_newline(self):

53

# Make sure that the tokenizer puts in an implicit NEWLINE

54

# when the input lacks a trailing new line.

55

f = BytesIO("x".encode('utf-8'))

56

tokens = list(tokenize(f.readline))

57

self.assertEqual(tokens[-2].type, NEWLINE)

58

self.assertEqual(tokens[-1].type, ENDMARKER)

59

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

60

def test_basic(self):

61

self.check_tokenize("1 + 1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

62

NUMBER '1' (1, 0) (1, 1)

63

OP '+' (1, 2) (1, 3)

64

NUMBER '1' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

65

""")

66

self.check_tokenize("if False:\n"

67

" # NL\n"

Albert-Jan Nijburg

c471ca4

2017-05-24 12:31:57 +0100

[diff] [blame]

68

" \n"

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

69

" True = False # NEWLINE\n", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

70

NAME 'if' (1, 0) (1, 2)

71

NAME 'False' (1, 3) (1, 8)

72

OP ':' (1, 8) (1, 9)

73

NEWLINE '\\n' (1, 9) (1, 10)

74

COMMENT '# NL' (2, 4) (2, 8)

75

NL '\\n' (2, 8) (2, 9)

Albert-Jan Nijburg

c471ca4

2017-05-24 12:31:57 +0100

[diff] [blame]

76

NL '\\n' (3, 4) (3, 5)

77

INDENT ' ' (4, 0) (4, 4)

78

NAME 'True' (4, 4) (4, 8)

79

OP '=' (4, 9) (4, 10)

80

NAME 'False' (4, 11) (4, 16)

81

COMMENT '# NEWLINE' (4, 17) (4, 26)

82

NEWLINE '\\n' (4, 26) (4, 27)

83

DEDENT '' (5, 0) (5, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

84

""")

85

indent_error_file = b"""\

def k(x):

x += 2

x += 5

"""

readline = BytesIO(indent_error_file).readline

91

with self.assertRaisesRegex(IndentationError,

92

"unindent does not match any "

93

"outer indentation level"):

94

for tok in tokenize(readline):

95

pass

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

96

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

97

def test_int(self):

98

# Ordinary integers and binary operators

99

self.check_tokenize("0xff <= 255", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

100

NUMBER '0xff' (1, 0) (1, 4)

101

OP '<=' (1, 5) (1, 7)

102

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

103

""")

104

self.check_tokenize("0b10 <= 255", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

105

NUMBER '0b10' (1, 0) (1, 4)

106

OP '<=' (1, 5) (1, 7)

107

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

108

""")

109

self.check_tokenize("0o123 <= 0O123", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

110

NUMBER '0o123' (1, 0) (1, 5)

111

OP '<=' (1, 6) (1, 8)

112

NUMBER '0O123' (1, 9) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

113

""")

114

self.check_tokenize("1234567 > ~0x15", """\

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

115

NUMBER '1234567' (1, 0) (1, 7)

116

OP '>' (1, 8) (1, 9)

117

OP '~' (1, 10) (1, 11)

118

NUMBER '0x15' (1, 11) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

119

""")

120

self.check_tokenize("2134568 != 1231515", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

121

NUMBER '2134568' (1, 0) (1, 7)

122

OP '!=' (1, 8) (1, 10)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

123

NUMBER '1231515' (1, 11) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

124

""")

125

self.check_tokenize("(-124561-1) & 200000000", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

126

OP '(' (1, 0) (1, 1)

127

OP '-' (1, 1) (1, 2)

128

NUMBER '124561' (1, 2) (1, 8)

129

OP '-' (1, 8) (1, 9)

130

NUMBER '1' (1, 9) (1, 10)

131

OP ')' (1, 10) (1, 11)

132

OP '&' (1, 12) (1, 13)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

133

NUMBER '200000000' (1, 14) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

134

""")

135

self.check_tokenize("0xdeadbeef != -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

136

NUMBER '0xdeadbeef' (1, 0) (1, 10)

137

OP '!=' (1, 11) (1, 13)

138

OP '-' (1, 14) (1, 15)

139

NUMBER '1' (1, 15) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

140

""")

141

self.check_tokenize("0xdeadc0de & 12345", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

142

NUMBER '0xdeadc0de' (1, 0) (1, 10)

143

OP '&' (1, 11) (1, 12)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

144

NUMBER '12345' (1, 13) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

145

""")

146

self.check_tokenize("0xFF & 0x15 | 1234", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

147

NUMBER '0xFF' (1, 0) (1, 4)

148

OP '&' (1, 5) (1, 6)

149

NUMBER '0x15' (1, 7) (1, 11)

150

OP '|' (1, 12) (1, 13)

151

NUMBER '1234' (1, 14) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

152

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

153

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

154

def test_long(self):

155

# Long integers

156

self.check_tokenize("x = 0", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

157

NAME 'x' (1, 0) (1, 1)

158

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

159

NUMBER '0' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

160

""")

161

self.check_tokenize("x = 0xfffffffffff", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

162

NAME 'x' (1, 0) (1, 1)

163

OP '=' (1, 2) (1, 3)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

164

NUMBER '0xfffffffffff' (1, 4) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

165

""")

166

self.check_tokenize("x = 123141242151251616110", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

167

NAME 'x' (1, 0) (1, 1)

168

OP '=' (1, 2) (1, 3)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

169

NUMBER '123141242151251616110' (1, 4) (1, 25)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

170

""")

171

self.check_tokenize("x = -15921590215012591", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

172

NAME 'x' (1, 0) (1, 1)

173

OP '=' (1, 2) (1, 3)

174

OP '-' (1, 4) (1, 5)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

175

NUMBER '15921590215012591' (1, 5) (1, 22)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

176

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

177

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

178

def test_float(self):

179

# Floating point numbers

180

self.check_tokenize("x = 3.14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

181

NAME 'x' (1, 0) (1, 1)

182

OP '=' (1, 2) (1, 3)

183

NUMBER '3.14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

184

""")

185

self.check_tokenize("x = 314159.", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

186

NAME 'x' (1, 0) (1, 1)

187

OP '=' (1, 2) (1, 3)

188

NUMBER '314159.' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

189

""")

190

self.check_tokenize("x = .314159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

191

NAME 'x' (1, 0) (1, 1)

192

OP '=' (1, 2) (1, 3)

193

NUMBER '.314159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

194

""")

195

self.check_tokenize("x = 3e14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

196

NAME 'x' (1, 0) (1, 1)

197

OP '=' (1, 2) (1, 3)

198

NUMBER '3e14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

199

""")

200

self.check_tokenize("x = 3E123", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

201

NAME 'x' (1, 0) (1, 1)

202

OP '=' (1, 2) (1, 3)

203

NUMBER '3E123' (1, 4) (1, 9)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

204

""")

205

self.check_tokenize("x+y = 3e-1230", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

206

NAME 'x' (1, 0) (1, 1)

207

OP '+' (1, 1) (1, 2)

208

NAME 'y' (1, 2) (1, 3)

209

OP '=' (1, 4) (1, 5)

210

NUMBER '3e-1230' (1, 6) (1, 13)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

211

""")

212

self.check_tokenize("x = 3.14e159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

213

NAME 'x' (1, 0) (1, 1)

214

OP '=' (1, 2) (1, 3)

215

NUMBER '3.14e159' (1, 4) (1, 12)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

216

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

217

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

218

def test_underscore_literals(self):

219

def number_token(s):

220

f = BytesIO(s.encode('utf-8'))

221

for toktype, token, start, end, line in tokenize(f.readline):

222

if toktype == NUMBER:

223

return token

224

return 'invalid token'

225

for lit in VALID_UNDERSCORE_LITERALS:

226

if '(' in lit:

227

# this won't work with compound complex inputs

228

continue

229

self.assertEqual(number_token(lit), lit)

230

for lit in INVALID_UNDERSCORE_LITERALS:

231

self.assertNotEqual(number_token(lit), lit)

232

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

233

def test_string(self):

234

# String literals

235

self.check_tokenize("x = ''; y = \"\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

236

NAME 'x' (1, 0) (1, 1)

237

OP '=' (1, 2) (1, 3)

238

STRING "''" (1, 4) (1, 6)

239

OP ';' (1, 6) (1, 7)

240

NAME 'y' (1, 8) (1, 9)

241

OP '=' (1, 10) (1, 11)

242

STRING '""' (1, 12) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

243

""")

244

self.check_tokenize("x = '\"'; y = \"'\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

245

NAME 'x' (1, 0) (1, 1)

246

OP '=' (1, 2) (1, 3)

247

STRING '\\'"\\'' (1, 4) (1, 7)

248

OP ';' (1, 7) (1, 8)

249

NAME 'y' (1, 9) (1, 10)

250

OP '=' (1, 11) (1, 12)

251

STRING '"\\'"' (1, 13) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

252

""")

253

self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

254

NAME 'x' (1, 0) (1, 1)

255

OP '=' (1, 2) (1, 3)

256

STRING '"doesn\\'t "' (1, 4) (1, 14)

257

NAME 'shrink' (1, 14) (1, 20)

258

STRING '", does it"' (1, 20) (1, 31)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

259

""")

260

self.check_tokenize("x = 'abc' + 'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

261

NAME 'x' (1, 0) (1, 1)

262

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

263

STRING "'abc'" (1, 4) (1, 9)

264

OP '+' (1, 10) (1, 11)

265

STRING "'ABC'" (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

266

""")

267

self.check_tokenize('y = "ABC" + "ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

268

NAME 'y' (1, 0) (1, 1)

269

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

270

STRING '"ABC"' (1, 4) (1, 9)

271

OP '+' (1, 10) (1, 11)

272

STRING '"ABC"' (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

273

""")

274

self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

275

NAME 'x' (1, 0) (1, 1)

276

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

277

STRING "r'abc'" (1, 4) (1, 10)

278

OP '+' (1, 11) (1, 12)

279

STRING "r'ABC'" (1, 13) (1, 19)

280

OP '+' (1, 20) (1, 21)

281

STRING "R'ABC'" (1, 22) (1, 28)

282

OP '+' (1, 29) (1, 30)

283

STRING "R'ABC'" (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

284

""")

285

self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

286

NAME 'y' (1, 0) (1, 1)

287

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

288

STRING 'r"abc"' (1, 4) (1, 10)

289

OP '+' (1, 11) (1, 12)

290

STRING 'r"ABC"' (1, 13) (1, 19)

291

OP '+' (1, 20) (1, 21)

292

STRING 'R"ABC"' (1, 22) (1, 28)

293

OP '+' (1, 29) (1, 30)

294

STRING 'R"ABC"' (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

295

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

296

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

297

self.check_tokenize("u'abc' + U'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

298

STRING "u'abc'" (1, 0) (1, 6)

299

OP '+' (1, 7) (1, 8)

300

STRING "U'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

301

""")

302

self.check_tokenize('u"abc" + U"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

303

STRING 'u"abc"' (1, 0) (1, 6)

304

OP '+' (1, 7) (1, 8)

305

STRING 'U"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

306

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

307

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

308

self.check_tokenize("b'abc' + B'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

309

STRING "b'abc'" (1, 0) (1, 6)

310

OP '+' (1, 7) (1, 8)

311

STRING "B'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

312

""")

313

self.check_tokenize('b"abc" + B"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

314

STRING 'b"abc"' (1, 0) (1, 6)

315

OP '+' (1, 7) (1, 8)

316

STRING 'B"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

317

""")

318

self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

319

STRING "br'abc'" (1, 0) (1, 7)

320

OP '+' (1, 8) (1, 9)

321

STRING "bR'abc'" (1, 10) (1, 17)

322

OP '+' (1, 18) (1, 19)

323

STRING "Br'abc'" (1, 20) (1, 27)

324

OP '+' (1, 28) (1, 29)

325

STRING "BR'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

326

""")

327

self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

328

STRING 'br"abc"' (1, 0) (1, 7)

329

OP '+' (1, 8) (1, 9)

330

STRING 'bR"abc"' (1, 10) (1, 17)

331

OP '+' (1, 18) (1, 19)

332

STRING 'Br"abc"' (1, 20) (1, 27)

333

OP '+' (1, 28) (1, 29)

334

STRING 'BR"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

335

""")

336

self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

337

STRING "rb'abc'" (1, 0) (1, 7)

338

OP '+' (1, 8) (1, 9)

339

STRING "rB'abc'" (1, 10) (1, 17)

340

OP '+' (1, 18) (1, 19)

341

STRING "Rb'abc'" (1, 20) (1, 27)

342

OP '+' (1, 28) (1, 29)

343

STRING "RB'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

344

""")

345

self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

346

STRING 'rb"abc"' (1, 0) (1, 7)

347

OP '+' (1, 8) (1, 9)

348

STRING 'rB"abc"' (1, 10) (1, 17)

349

OP '+' (1, 18) (1, 19)

350

STRING 'Rb"abc"' (1, 20) (1, 27)

351

OP '+' (1, 28) (1, 29)

352

STRING 'RB"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

353

""")

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

354

# Check 0, 1, and 2 character string prefixes.

355

self.check_tokenize(r'"a\

356

de\

357

fg"', """\

358

STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)

359

""")

360

self.check_tokenize(r'u"a\

361

de"', """\

362

STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3)

363

""")

364

self.check_tokenize(r'rb"a\

365

d"', """\

366

STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2)

367

""")

368

self.check_tokenize(r'"""a\

369

b"""', """\

370

STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)

371

""")

372

self.check_tokenize(r'u"""a\

373

b"""', """\

374

STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)

375

""")

376

self.check_tokenize(r'rb"""a\

377

b\

378

c"""', """\

379

STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)

380

""")

Eric V. Smith

1c8222c

2015-10-26 04:37:55 -0400

[diff] [blame]

381

self.check_tokenize('f"abc"', """\

382

STRING 'f"abc"' (1, 0) (1, 6)

383

""")

384

self.check_tokenize('fR"a{b}c"', """\

385

STRING 'fR"a{b}c"' (1, 0) (1, 9)

386

""")

387

self.check_tokenize('f"""abc"""', """\

388

STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10)

389

""")

390

self.check_tokenize(r'f"abc\

391

def"', """\

392

STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4)

393

""")

394

self.check_tokenize(r'Rf"abc\

395

def"', """\

396

STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)

397

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

398

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

399

def test_function(self):

400

self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

401

NAME 'def' (1, 0) (1, 3)

402

NAME 'd22' (1, 4) (1, 7)

403

OP '(' (1, 7) (1, 8)

404

NAME 'a' (1, 8) (1, 9)

405

OP ',' (1, 9) (1, 10)

406

NAME 'b' (1, 11) (1, 12)

407

OP ',' (1, 12) (1, 13)

408

NAME 'c' (1, 14) (1, 15)

409

OP '=' (1, 15) (1, 16)

410

NUMBER '2' (1, 16) (1, 17)

411

OP ',' (1, 17) (1, 18)

412

NAME 'd' (1, 19) (1, 20)

413

OP '=' (1, 20) (1, 21)

414

NUMBER '2' (1, 21) (1, 22)

415

OP ',' (1, 22) (1, 23)

416

OP '*' (1, 24) (1, 25)

417

NAME 'k' (1, 25) (1, 26)

418

OP ')' (1, 26) (1, 27)

419

OP ':' (1, 27) (1, 28)

420

NAME 'pass' (1, 29) (1, 33)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

421

""")

422

self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

423

NAME 'def' (1, 0) (1, 3)

424

NAME 'd01v_' (1, 4) (1, 9)

425

OP '(' (1, 9) (1, 10)

426

NAME 'a' (1, 10) (1, 11)

427

OP '=' (1, 11) (1, 12)

428

NUMBER '1' (1, 12) (1, 13)

429

OP ',' (1, 13) (1, 14)

430

OP '*' (1, 15) (1, 16)

431

NAME 'k' (1, 16) (1, 17)

432

OP ',' (1, 17) (1, 18)

433

OP '**' (1, 19) (1, 21)

434

NAME 'w' (1, 21) (1, 22)

435

OP ')' (1, 22) (1, 23)

436

OP ':' (1, 23) (1, 24)

437

NAME 'pass' (1, 25) (1, 29)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

438

""")

Jim Fasarakis-Hilliard

d4914e9

2017-03-14 22:16:15 +0200

[diff] [blame]

439

self.check_tokenize("def d23(a: str, b: int=3) -> int: pass", """\

440

NAME 'def' (1, 0) (1, 3)

441

NAME 'd23' (1, 4) (1, 7)

442

OP '(' (1, 7) (1, 8)

443

NAME 'a' (1, 8) (1, 9)

444

OP ':' (1, 9) (1, 10)

445

NAME 'str' (1, 11) (1, 14)

446

OP ',' (1, 14) (1, 15)

447

NAME 'b' (1, 16) (1, 17)

448

OP ':' (1, 17) (1, 18)

449

NAME 'int' (1, 19) (1, 22)

450

OP '=' (1, 22) (1, 23)

451

NUMBER '3' (1, 23) (1, 24)

452

OP ')' (1, 24) (1, 25)

453

OP '->' (1, 26) (1, 28)

454

NAME 'int' (1, 29) (1, 32)

455

OP ':' (1, 32) (1, 33)

456

NAME 'pass' (1, 34) (1, 38)

457

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

458

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

459

def test_comparison(self):

460

# Comparison

461

self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "

462

"1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

463

NAME 'if' (1, 0) (1, 2)

464

NUMBER '1' (1, 3) (1, 4)

465

OP '<' (1, 5) (1, 6)

466

NUMBER '1' (1, 7) (1, 8)

467

OP '>' (1, 9) (1, 10)

468

NUMBER '1' (1, 11) (1, 12)

469

OP '==' (1, 13) (1, 15)

470

NUMBER '1' (1, 16) (1, 17)

471

OP '>=' (1, 18) (1, 20)

472

NUMBER '5' (1, 21) (1, 22)

473

OP '<=' (1, 23) (1, 25)

474

NUMBER '0x15' (1, 26) (1, 30)

475

OP '<=' (1, 31) (1, 33)

476

NUMBER '0x12' (1, 34) (1, 38)

477

OP '!=' (1, 39) (1, 41)

478

NUMBER '1' (1, 42) (1, 43)

479

NAME 'and' (1, 44) (1, 47)

480

NUMBER '5' (1, 48) (1, 49)

481

NAME 'in' (1, 50) (1, 52)

482

NUMBER '1' (1, 53) (1, 54)

483

NAME 'not' (1, 55) (1, 58)

484

NAME 'in' (1, 59) (1, 61)

485

NUMBER '1' (1, 62) (1, 63)

486

NAME 'is' (1, 64) (1, 66)

487

NUMBER '1' (1, 67) (1, 68)

488

NAME 'or' (1, 69) (1, 71)

489

NUMBER '5' (1, 72) (1, 73)

490

NAME 'is' (1, 74) (1, 76)

491

NAME 'not' (1, 77) (1, 80)

492

NUMBER '1' (1, 81) (1, 82)

493

OP ':' (1, 82) (1, 83)

494

NAME 'pass' (1, 84) (1, 88)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

495

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

496

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

497

def test_shift(self):

498

# Shift

499

self.check_tokenize("x = 1 << 1 >> 5", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

500

NAME 'x' (1, 0) (1, 1)

501

OP '=' (1, 2) (1, 3)

502

NUMBER '1' (1, 4) (1, 5)

503

OP '<<' (1, 6) (1, 8)

504

NUMBER '1' (1, 9) (1, 10)

505

OP '>>' (1, 11) (1, 13)

506

NUMBER '5' (1, 14) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

507

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

508

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

509

def test_additive(self):

510

# Additive

511

self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

512

NAME 'x' (1, 0) (1, 1)

513

OP '=' (1, 2) (1, 3)

514

NUMBER '1' (1, 4) (1, 5)

515

OP '-' (1, 6) (1, 7)

516

NAME 'y' (1, 8) (1, 9)

517

OP '+' (1, 10) (1, 11)

518

NUMBER '15' (1, 12) (1, 14)

519

OP '-' (1, 15) (1, 16)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

520

NUMBER '1' (1, 17) (1, 18)

521

OP '+' (1, 19) (1, 20)

522

NUMBER '0x124' (1, 21) (1, 26)

523

OP '+' (1, 27) (1, 28)

524

NAME 'z' (1, 29) (1, 30)

525

OP '+' (1, 31) (1, 32)

526

NAME 'a' (1, 33) (1, 34)

527

OP '[' (1, 34) (1, 35)

528

NUMBER '5' (1, 35) (1, 36)

529

OP ']' (1, 36) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

530

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

531

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

532

def test_multiplicative(self):

533

# Multiplicative

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

534

self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

535

NAME 'x' (1, 0) (1, 1)

536

OP '=' (1, 2) (1, 3)

537

NUMBER '1' (1, 4) (1, 5)

538

OP '//' (1, 5) (1, 7)

539

NUMBER '1' (1, 7) (1, 8)

540

OP '*' (1, 8) (1, 9)

541

NUMBER '1' (1, 9) (1, 10)

542

OP '/' (1, 10) (1, 11)

543

NUMBER '5' (1, 11) (1, 12)

544

OP '*' (1, 12) (1, 13)

545

NUMBER '12' (1, 13) (1, 15)

546

OP '%' (1, 15) (1, 16)

547

NUMBER '0x12' (1, 16) (1, 20)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

548

OP '@' (1, 20) (1, 21)

549

NUMBER '42' (1, 21) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

550

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

551

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

552

def test_unary(self):

553

# Unary

554

self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

555

OP '~' (1, 0) (1, 1)

556

NUMBER '1' (1, 1) (1, 2)

557

OP '^' (1, 3) (1, 4)

558

NUMBER '1' (1, 5) (1, 6)

559

OP '&' (1, 7) (1, 8)

560

NUMBER '1' (1, 9) (1, 10)

561

OP '|' (1, 11) (1, 12)

562

NUMBER '1' (1, 12) (1, 13)

563

OP '^' (1, 14) (1, 15)

564

OP '-' (1, 16) (1, 17)

565

NUMBER '1' (1, 17) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

566

""")

567

self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

568

OP '-' (1, 0) (1, 1)

569

NUMBER '1' (1, 1) (1, 2)

570

OP '*' (1, 2) (1, 3)

571

NUMBER '1' (1, 3) (1, 4)

572

OP '/' (1, 4) (1, 5)

573

NUMBER '1' (1, 5) (1, 6)

574

OP '+' (1, 6) (1, 7)

575

NUMBER '1' (1, 7) (1, 8)

576

OP '*' (1, 8) (1, 9)

577

NUMBER '1' (1, 9) (1, 10)

578

OP '//' (1, 10) (1, 12)

579

NUMBER '1' (1, 12) (1, 13)

580

OP '-' (1, 14) (1, 15)

581

OP '-' (1, 16) (1, 17)

582

OP '-' (1, 17) (1, 18)

583

OP '-' (1, 18) (1, 19)

584

NUMBER '1' (1, 19) (1, 20)

585

OP '**' (1, 20) (1, 22)

586

NUMBER '1' (1, 22) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

587

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

588

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

589

def test_selector(self):

590

# Selector

591

self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

592

NAME 'import' (1, 0) (1, 6)

593

NAME 'sys' (1, 7) (1, 10)

594

OP ',' (1, 10) (1, 11)

595

NAME 'time' (1, 12) (1, 16)

596

NEWLINE '\\n' (1, 16) (1, 17)

597

NAME 'x' (2, 0) (2, 1)

598

OP '=' (2, 2) (2, 3)

599

NAME 'sys' (2, 4) (2, 7)

600

OP '.' (2, 7) (2, 8)

601

NAME 'modules' (2, 8) (2, 15)

602

OP '[' (2, 15) (2, 16)

603

STRING "'time'" (2, 16) (2, 22)

604

OP ']' (2, 22) (2, 23)

605

OP '.' (2, 23) (2, 24)

606

NAME 'time' (2, 24) (2, 28)

607

OP '(' (2, 28) (2, 29)

608

OP ')' (2, 29) (2, 30)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

609

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

610

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

611

def test_method(self):

612

# Methods

613

self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

614

OP '@' (1, 0) (1, 1)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

615

NAME 'staticmethod' (1, 1) (1, 13)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

616

NEWLINE '\\n' (1, 13) (1, 14)

617

NAME 'def' (2, 0) (2, 3)

618

NAME 'foo' (2, 4) (2, 7)

619

OP '(' (2, 7) (2, 8)

620

NAME 'x' (2, 8) (2, 9)

621

OP ',' (2, 9) (2, 10)

622

NAME 'y' (2, 10) (2, 11)

623

OP ')' (2, 11) (2, 12)

624

OP ':' (2, 12) (2, 13)

625

NAME 'pass' (2, 14) (2, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

626

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

627

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

628

def test_tabs(self):

629

# Evil tabs

630

self.check_tokenize("def f():\n"

631

"\tif x\n"

632

" \tpass", """\

Benjamin Peterson

a0dfa82

2009-11-13 02:25:08 +0000

[diff] [blame]

633

NAME 'def' (1, 0) (1, 3)

634

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

639

INDENT '\\t' (2, 0) (2, 1)

640

NAME 'if' (2, 1) (2, 3)

641

NAME 'x' (2, 4) (2, 5)

642

NEWLINE '\\n' (2, 5) (2, 6)

643

INDENT ' \\t' (3, 0) (3, 9)

644

NAME 'pass' (3, 9) (3, 13)

645

DEDENT '' (4, 0) (4, 0)

646

DEDENT '' (4, 0) (4, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

647

""")

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

648

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

649

def test_non_ascii_identifiers(self):

650

# Non-ascii identifiers

651

self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

652

NAME 'Örter' (1, 0) (1, 5)

653

OP '=' (1, 6) (1, 7)

654

STRING "'places'" (1, 8) (1, 16)

655

NEWLINE '\\n' (1, 16) (1, 17)

656

NAME 'grün' (2, 0) (2, 4)

657

OP '=' (2, 5) (2, 6)

658

STRING "'green'" (2, 7) (2, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

659

""")

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

660

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

661

def test_unicode(self):

662

# Legacy unicode literals:

663

self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

664

NAME 'Örter' (1, 0) (1, 5)

665

OP '=' (1, 6) (1, 7)

666

STRING "u'places'" (1, 8) (1, 17)

667

NEWLINE '\\n' (1, 17) (1, 18)

668

NAME 'grün' (2, 0) (2, 4)

669

OP '=' (2, 5) (2, 6)

Christian Heimes

0b3847d

2012-06-20 11:17:58 +0200

[diff] [blame]

670

STRING "U'green'" (2, 7) (2, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

671

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

672

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

673

def test_async(self):

674

# Async/await extension:

675

self.check_tokenize("async = 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

676

NAME 'async' (1, 0) (1, 5)

677

OP '=' (1, 6) (1, 7)

678

NUMBER '1' (1, 8) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

679

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

680

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

681

self.check_tokenize("a = (async = 1)", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

682

NAME 'a' (1, 0) (1, 1)

683

OP '=' (1, 2) (1, 3)

684

OP '(' (1, 4) (1, 5)

685

NAME 'async' (1, 5) (1, 10)

686

OP '=' (1, 11) (1, 12)

687

NUMBER '1' (1, 13) (1, 14)

688

OP ')' (1, 14) (1, 15)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

689

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

690

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

691

self.check_tokenize("async()", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

692

NAME 'async' (1, 0) (1, 5)

693

OP '(' (1, 5) (1, 6)

694

OP ')' (1, 6) (1, 7)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

695

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

696

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

697

self.check_tokenize("class async(Bar):pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

698

NAME 'class' (1, 0) (1, 5)

699

NAME 'async' (1, 6) (1, 11)

700

OP '(' (1, 11) (1, 12)

701

NAME 'Bar' (1, 12) (1, 15)

702

OP ')' (1, 15) (1, 16)

703

OP ':' (1, 16) (1, 17)

704

NAME 'pass' (1, 17) (1, 21)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

705

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

706

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

707

self.check_tokenize("class async:pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

708

NAME 'class' (1, 0) (1, 5)

709

NAME 'async' (1, 6) (1, 11)

710

OP ':' (1, 11) (1, 12)

711

NAME 'pass' (1, 12) (1, 16)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

712

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

713

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

714

self.check_tokenize("await = 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

715

NAME 'await' (1, 0) (1, 5)

716

OP '=' (1, 6) (1, 7)

717

NUMBER '1' (1, 8) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

718

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

719

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

720

self.check_tokenize("foo.async", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

721

NAME 'foo' (1, 0) (1, 3)

722

OP '.' (1, 3) (1, 4)

723

NAME 'async' (1, 4) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

724

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

725

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

726

self.check_tokenize("async for a in b: pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

727

NAME 'async' (1, 0) (1, 5)

728

NAME 'for' (1, 6) (1, 9)

729

NAME 'a' (1, 10) (1, 11)

730

NAME 'in' (1, 12) (1, 14)

731

NAME 'b' (1, 15) (1, 16)

732

OP ':' (1, 16) (1, 17)

733

NAME 'pass' (1, 18) (1, 22)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

734

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

735

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

736

self.check_tokenize("async with a as b: pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

737

NAME 'async' (1, 0) (1, 5)

738

NAME 'with' (1, 6) (1, 10)

739

NAME 'a' (1, 11) (1, 12)

740

NAME 'as' (1, 13) (1, 15)

741

NAME 'b' (1, 16) (1, 17)

742

OP ':' (1, 17) (1, 18)

743

NAME 'pass' (1, 19) (1, 23)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

744

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

745

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

746

self.check_tokenize("async.foo", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

747

NAME 'async' (1, 0) (1, 5)

748

OP '.' (1, 5) (1, 6)

749

NAME 'foo' (1, 6) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

750

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

751

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

752

self.check_tokenize("async", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

753

NAME 'async' (1, 0) (1, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

754

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

755

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

756

self.check_tokenize("async\n#comment\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

757

NAME 'async' (1, 0) (1, 5)

758

NEWLINE '\\n' (1, 5) (1, 6)

759

COMMENT '#comment' (2, 0) (2, 8)

760

NL '\\n' (2, 8) (2, 9)

761

NAME 'await' (3, 0) (3, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

762

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

763

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

764

self.check_tokenize("async\n...\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

765

NAME 'async' (1, 0) (1, 5)

766

NEWLINE '\\n' (1, 5) (1, 6)

767

OP '...' (2, 0) (2, 3)

768

NEWLINE '\\n' (2, 3) (2, 4)

769

NAME 'await' (3, 0) (3, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

770

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

771

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

772

self.check_tokenize("async\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

773

NAME 'async' (1, 0) (1, 5)

774

NEWLINE '\\n' (1, 5) (1, 6)

775

NAME 'await' (2, 0) (2, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

776

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

777

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

778

self.check_tokenize("foo.async + 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

779

NAME 'foo' (1, 0) (1, 3)

780

OP '.' (1, 3) (1, 4)

781

NAME 'async' (1, 4) (1, 9)

782

OP '+' (1, 10) (1, 11)

783

NUMBER '1' (1, 12) (1, 13)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

784

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

785

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

786

self.check_tokenize("async def foo(): pass", """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

787

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

788

NAME 'def' (1, 6) (1, 9)

789

NAME 'foo' (1, 10) (1, 13)

790

OP '(' (1, 13) (1, 14)

791

OP ')' (1, 14) (1, 15)

792

OP ':' (1, 15) (1, 16)

793

NAME 'pass' (1, 17) (1, 21)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

794

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

795

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

796

self.check_tokenize('''\

async def foo():

def foo(await):

await = 1

if 1:

await

async += 1

''', """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

804

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

805

NAME 'def' (1, 6) (1, 9)

806

NAME 'foo' (1, 10) (1, 13)

807

OP '(' (1, 13) (1, 14)

808

OP ')' (1, 14) (1, 15)

809

OP ':' (1, 15) (1, 16)

810

NEWLINE '\\n' (1, 16) (1, 17)

811

INDENT ' ' (2, 0) (2, 2)

812

NAME 'def' (2, 2) (2, 5)

813

NAME 'foo' (2, 6) (2, 9)

814

OP '(' (2, 9) (2, 10)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

815

NAME 'await' (2, 10) (2, 15)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

816

OP ')' (2, 15) (2, 16)

817

OP ':' (2, 16) (2, 17)

818

NEWLINE '\\n' (2, 17) (2, 18)

819

INDENT ' ' (3, 0) (3, 4)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

820

NAME 'await' (3, 4) (3, 9)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

821

OP '=' (3, 10) (3, 11)

822

NUMBER '1' (3, 12) (3, 13)

823

NEWLINE '\\n' (3, 13) (3, 14)

824

DEDENT '' (4, 2) (4, 2)

825

NAME 'if' (4, 2) (4, 4)

826

NUMBER '1' (4, 5) (4, 6)

827

OP ':' (4, 6) (4, 7)

828

NEWLINE '\\n' (4, 7) (4, 8)

829

INDENT ' ' (5, 0) (5, 4)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

830

NAME 'await' (5, 4) (5, 9)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

831

NEWLINE '\\n' (5, 9) (5, 10)

832

DEDENT '' (6, 0) (6, 0)

833

DEDENT '' (6, 0) (6, 0)

834

NAME 'async' (6, 0) (6, 5)

835

OP '+=' (6, 6) (6, 8)

836

NUMBER '1' (6, 9) (6, 10)

837

NEWLINE '\\n' (6, 10) (6, 11)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

838

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

839

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

840

self.check_tokenize('''\

841

async def foo():

842

async for i in 1: pass''', """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

843

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

844

NAME 'def' (1, 6) (1, 9)

845

NAME 'foo' (1, 10) (1, 13)

846

OP '(' (1, 13) (1, 14)

847

OP ')' (1, 14) (1, 15)

848

OP ':' (1, 15) (1, 16)

849

NEWLINE '\\n' (1, 16) (1, 17)

850

INDENT ' ' (2, 0) (2, 2)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

851

NAME 'async' (2, 2) (2, 7)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

852

NAME 'for' (2, 8) (2, 11)

853

NAME 'i' (2, 12) (2, 13)

854

NAME 'in' (2, 14) (2, 16)

855

NUMBER '1' (2, 17) (2, 18)

856

OP ':' (2, 18) (2, 19)

857

NAME 'pass' (2, 20) (2, 24)

858

DEDENT '' (3, 0) (3, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

859

""")

Yury Selivanov

8fb307c

2015-07-22 13:33:45 +0300

[diff] [blame]

860

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

861

self.check_tokenize('''async def foo(async): await''', """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

862

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

8fb307c

2015-07-22 13:33:45 +0300

[diff] [blame]

863

NAME 'def' (1, 6) (1, 9)

864

NAME 'foo' (1, 10) (1, 13)

865

OP '(' (1, 13) (1, 14)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

866

NAME 'async' (1, 14) (1, 19)

Yury Selivanov

8fb307c

2015-07-22 13:33:45 +0300

[diff] [blame]

867

OP ')' (1, 19) (1, 20)

868

OP ':' (1, 20) (1, 21)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

869

NAME 'await' (1, 22) (1, 27)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

870

""")

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

871

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

872

self.check_tokenize('''\

def f():

def baz(): pass

async def bar(): pass

877

878

await = 2''', """\

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

879

NAME 'def' (1, 0) (1, 3)

880

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

885

NL '\\n' (2, 0) (2, 1)

886

INDENT ' ' (3, 0) (3, 2)

887

NAME 'def' (3, 2) (3, 5)

888

NAME 'baz' (3, 6) (3, 9)

889

OP '(' (3, 9) (3, 10)

890

OP ')' (3, 10) (3, 11)

891

OP ':' (3, 11) (3, 12)

892

NAME 'pass' (3, 13) (3, 17)

893

NEWLINE '\\n' (3, 17) (3, 18)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

894

NAME 'async' (4, 2) (4, 7)

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

895

NAME 'def' (4, 8) (4, 11)

896

NAME 'bar' (4, 12) (4, 15)

897

OP '(' (4, 15) (4, 16)

898

OP ')' (4, 16) (4, 17)

899

OP ':' (4, 17) (4, 18)

900

NAME 'pass' (4, 19) (4, 23)

901

NEWLINE '\\n' (4, 23) (4, 24)

902

NL '\\n' (5, 0) (5, 1)

903

NAME 'await' (6, 2) (6, 7)

904

OP '=' (6, 8) (6, 9)

905

NUMBER '2' (6, 10) (6, 11)

906

DEDENT '' (7, 0) (7, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

907

""")

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

908

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

909

self.check_tokenize('''\

async def f():

def baz(): pass

async def bar(): pass

914

915

await = 2''', """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

916

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

917

NAME 'def' (1, 6) (1, 9)

918

NAME 'f' (1, 10) (1, 11)

919

OP '(' (1, 11) (1, 12)

920

OP ')' (1, 12) (1, 13)

921

OP ':' (1, 13) (1, 14)

922

NEWLINE '\\n' (1, 14) (1, 15)

923

NL '\\n' (2, 0) (2, 1)

924

INDENT ' ' (3, 0) (3, 2)

925

NAME 'def' (3, 2) (3, 5)

926

NAME 'baz' (3, 6) (3, 9)

927

OP '(' (3, 9) (3, 10)

928

OP ')' (3, 10) (3, 11)

929

OP ':' (3, 11) (3, 12)

930

NAME 'pass' (3, 13) (3, 17)

931

NEWLINE '\\n' (3, 17) (3, 18)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

932

NAME 'async' (4, 2) (4, 7)

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

933

NAME 'def' (4, 8) (4, 11)

934

NAME 'bar' (4, 12) (4, 15)

935

OP '(' (4, 15) (4, 16)

936

OP ')' (4, 16) (4, 17)

937

OP ':' (4, 17) (4, 18)

938

NAME 'pass' (4, 19) (4, 23)

939

NEWLINE '\\n' (4, 23) (4, 24)

940

NL '\\n' (5, 0) (5, 1)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

941

NAME 'await' (6, 2) (6, 7)

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

942

OP '=' (6, 8) (6, 9)

943

NUMBER '2' (6, 10) (6, 11)

944

DEDENT '' (7, 0) (7, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

945

""")

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

946

Thomas Kluyver

2018-06-05 19:26:39 +0200

[diff] [blame]

947

class GenerateTokensTest(TokenizeTest):

948

def check_tokenize(self, s, expected):

949

# Format the tokens in s in a table format.

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame^]

950

# The ENDMARKER and final NEWLINE are omitted.

Thomas Kluyver

2018-06-05 19:26:39 +0200

[diff] [blame]

951

f = StringIO(s)

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame^]

952

result = stringify_tokens_from_source(generate_tokens(f.readline), s)

Thomas Kluyver

2018-06-05 19:26:39 +0200

[diff] [blame]

953

self.assertEqual(result, expected.rstrip().splitlines())

954

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

955

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

956

def decistmt(s):

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

957

result = []

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

958

g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

959

for toknum, tokval, _, _, _ in g:

960

if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens

result.extend([

(NAME, 'Decimal'),

(OP, '('),

(STRING, repr(tokval)),

(OP, ')')

])

else:

result.append((toknum, tokval))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

969

return untokenize(result).decode('utf-8')

970

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

971

class TestMisc(TestCase):

972

973

def test_decistmt(self):

974

# Substitute Decimals for floats in a string of statements.

975

# This is an example from the docs.

976

977

from decimal import Decimal

978

s = '+21.3e-5*-.1234/81.7'

979

self.assertEqual(decistmt(s),

980

"+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")

981

982

# The format of the exponent is inherited from the platform C library.

983

# Known cases are "e-007" (Windows) and "e-07" (not Windows). Since

984

# we're only showing 11 digits, and the 12th isn't close to 5, the

985

# rest of the output should be platform-independent.

986

self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')

987

988

# Output from calculations with Decimal should be identical across all

989

# platforms.

990

self.assertEqual(eval(decistmt(s)),

991

Decimal('-3.217160342717258261933904529E-7'))

992

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

993

994

class TestTokenizerAdheresToPep0263(TestCase):

995

"""

996

Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.

997

"""

998

999

def _testFile(self, filename):

1000

path = os.path.join(os.path.dirname(__file__), filename)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1001

TestRoundtrip.check_roundtrip(self, open(path, 'rb'))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1002

1003

def test_utf8_coding_cookie_and_no_utf8_bom(self):

Ned Deily

2ea6fcc

2011-07-19 16:15:27 -0700

[diff] [blame]

1004

f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1005

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1006

1007

def test_latin1_coding_cookie_and_utf8_bom(self):

1008

"""

1009

As per PEP 0263, if a file starts with a utf-8 BOM signature, the only

1010

allowed encoding for the comment is 'utf-8'. The text file used in

1011

this test starts with a BOM signature, but specifies latin1 as the

1012

coding, so verify that a SyntaxError is raised, which matches the

1013

behaviour of the interpreter when it encounters a similar condition.

1014

"""

1015

f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'

Benjamin Peterson

c9c0f20

2009-06-30 23:06:06 +0000

[diff] [blame]

1016

self.assertRaises(SyntaxError, self._testFile, f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1017

1018

def test_no_coding_cookie_and_utf8_bom(self):

1019

f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1020

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1021

1022

def test_utf8_coding_cookie_and_utf8_bom(self):

1023

f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1024

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1025

Florent Xicluna

11f0b41

2012-07-07 12:13:35 +0200

[diff] [blame]

1026

def test_bad_coding_cookie(self):

1027

self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')

1028

self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')

1029

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1030

1031

class Test_Tokenize(TestCase):

1032

1033

def test__tokenize_decodes_with_specified_encoding(self):

1034

literal = '"ЉЊЈЁЂ"'

1035

line = literal.encode('utf-8')

first = False

def readline():

nonlocal first

if not first:

first = True

return line

else:

return b''

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame^]

1045

# skip the initial encoding token and the end tokens

1046

tokens = list(_tokenize(readline, encoding='utf-8'))[1:-2]

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1047

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1048

self.assertEqual(tokens, expected_tokens,

1049

"bytes not decoded with encoding")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1050

1051

def test__tokenize_does_not_decode_with_encoding_none(self):

literal = '"ЉЊЈЁЂ"'

first = False

def readline():

nonlocal first

if not first:

first = True

return literal

else:

return b''

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame^]

1062

# skip the end tokens

1063

tokens = list(_tokenize(readline, encoding=None))[:-2]

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1064

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1065

self.assertEqual(tokens, expected_tokens,

1066

"string not tokenized when encoding is None")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1067

1068

1069

class TestDetectEncoding(TestCase):

1070

1071

def get_readline(self, lines):

index = 0

def readline():

nonlocal index

if index == len(lines):

raise StopIteration

line = lines[index]

index += 1

return line

return readline

def test_no_bom_no_encoding_cookie(self):

1083

lines = (

1084

b'# something\n',

1085

b'print(something)\n',

1086

b'do_something(else)\n'

1087

)

1088

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1089

self.assertEqual(encoding, 'utf-8')

1090

self.assertEqual(consumed_lines, list(lines[:2]))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1091

1092

def test_bom_no_cookie(self):

1093

lines = (

1094

b'\xef\xbb\xbf# something\n',

1095

b'print(something)\n',

1096

b'do_something(else)\n'

1097

)

1098

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1099

self.assertEqual(encoding, 'utf-8-sig')

1100

self.assertEqual(consumed_lines,

1101

[b'# something\n', b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1102

1103

def test_cookie_first_line_no_bom(self):

1104

lines = (

1105

b'# -*- coding: latin-1 -*-\n',

1106

b'print(something)\n',

1107

b'do_something(else)\n'

1108

)

1109

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1110

self.assertEqual(encoding, 'iso-8859-1')

1111

self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1112

1113

def test_matched_bom_and_cookie_first_line(self):

1114

lines = (

1115

b'\xef\xbb\xbf# coding=utf-8\n',

1116

b'print(something)\n',

1117

b'do_something(else)\n'

1118

)

1119

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1120

self.assertEqual(encoding, 'utf-8-sig')

1121

self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1122

1123

def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):

1124

lines = (

1125

b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',

1126

b'print(something)\n',

1127

b'do_something(else)\n'

1128

)

1129

readline = self.get_readline(lines)

1130

self.assertRaises(SyntaxError, detect_encoding, readline)

1131

1132

def test_cookie_second_line_no_bom(self):

1133

lines = (

1134

b'#! something\n',

1135

b'# vim: set fileencoding=ascii :\n',

1136

b'print(something)\n',

1137

b'do_something(else)\n'

1138

)

1139

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1140

self.assertEqual(encoding, 'ascii')

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1141

expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1142

self.assertEqual(consumed_lines, expected)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1143

1144

def test_matched_bom_and_cookie_second_line(self):

1145

lines = (

1146

b'\xef\xbb\xbf#! something\n',

1147

b'f# coding=utf-8\n',

1148

b'print(something)\n',

1149

b'do_something(else)\n'

1150

)

1151

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1152

self.assertEqual(encoding, 'utf-8-sig')

1153

self.assertEqual(consumed_lines,

1154

[b'#! something\n', b'f# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1155

1156

def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):

1157

lines = (

1158

b'\xef\xbb\xbf#! something\n',

1159

b'# vim: set fileencoding=ascii :\n',

1160

b'print(something)\n',

1161

b'do_something(else)\n'

1162

)

1163

readline = self.get_readline(lines)

1164

self.assertRaises(SyntaxError, detect_encoding, readline)

1165

Serhiy Storchaka

768c16c

2014-01-09 18:36:09 +0200

[diff] [blame]

1166

def test_cookie_second_line_noncommented_first_line(self):

1167

lines = (

1168

b"print('\xc2\xa3')\n",

1169

b'# vim: set fileencoding=iso8859-15 :\n',

1170

b"print('\xe2\x82\xac')\n"

1171

)

1172

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1173

self.assertEqual(encoding, 'utf-8')

1174

expected = [b"print('\xc2\xa3')\n"]

1175

self.assertEqual(consumed_lines, expected)

1176

1177

def test_cookie_second_line_commented_first_line(self):

1178

lines = (

1179

b"#print('\xc2\xa3')\n",

1180

b'# vim: set fileencoding=iso8859-15 :\n',

1181

b"print('\xe2\x82\xac')\n"

1182

)

1183

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1184

self.assertEqual(encoding, 'iso8859-15')

1185

expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']

1186

self.assertEqual(consumed_lines, expected)

1187

1188

def test_cookie_second_line_empty_first_line(self):

1189

lines = (

1190

b'\n',

1191

b'# vim: set fileencoding=iso8859-15 :\n',

1192

b"print('\xe2\x82\xac')\n"

1193

)

1194

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1195

self.assertEqual(encoding, 'iso8859-15')

1196

expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']

1197

self.assertEqual(consumed_lines, expected)

1198

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1199

def test_latin1_normalization(self):

1200

# See get_normal_name() in tokenizer.c.

1201

encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",

1202

"iso-8859-1-unix", "iso-latin-1-mac")

1203

for encoding in encodings:

1204

for rep in ("-", "_"):

1205

enc = encoding.replace("-", rep)

1206

lines = (b"#!/usr/bin/python\n",

1207

b"# coding: " + enc.encode("ascii") + b"\n",

1208

b"print(things)\n",

1209

b"do_something += 4\n")

1210

rl = self.get_readline(lines)

1211

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1212

self.assertEqual(found, "iso-8859-1")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1213

Martin v. Löwis

63674f4

2012-04-20 14:36:47 +0200

[diff] [blame]

1214

def test_syntaxerror_latin1(self):

1215

# Issue 14629: need to raise SyntaxError if the first

1216

# line(s) have non-UTF-8 characters

1217

lines = (

1218

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1219

)

1220

readline = self.get_readline(lines)

1221

self.assertRaises(SyntaxError, detect_encoding, readline)

1222

1223

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1224

def test_utf8_normalization(self):

1225

# See get_normal_name() in tokenizer.c.

1226

encodings = ("utf-8", "utf-8-mac", "utf-8-unix")

1227

for encoding in encodings:

1228

for rep in ("-", "_"):

1229

enc = encoding.replace("-", rep)

1230

lines = (b"#!/usr/bin/python\n",

1231

b"# coding: " + enc.encode("ascii") + b"\n",

1232

b"1 + 3\n")

1233

rl = self.get_readline(lines)

1234

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1235

self.assertEqual(found, "utf-8")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1236

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1237

def test_short_files(self):

1238

readline = self.get_readline((b'print(something)\n',))

1239

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1240

self.assertEqual(encoding, 'utf-8')

1241

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1242

1243

encoding, consumed_lines = detect_encoding(self.get_readline(()))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1244

self.assertEqual(encoding, 'utf-8')

1245

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1246

1247

readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))

1248

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1249

self.assertEqual(encoding, 'utf-8-sig')

1250

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1251

1252

readline = self.get_readline((b'\xef\xbb\xbf',))

1253

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1254

self.assertEqual(encoding, 'utf-8-sig')

1255

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1256

Benjamin Peterson

433f32c

2008-12-12 01:25:05 +0000

[diff] [blame]

1257

readline = self.get_readline((b'# coding: bad\n',))

1258

self.assertRaises(SyntaxError, detect_encoding, readline)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1259

Serhiy Storchaka

dafea85

2013-09-16 23:51:56 +0300

[diff] [blame]

1260

def test_false_encoding(self):

1261

# Issue 18873: "Encoding" detected in non-comment lines

1262

readline = self.get_readline((b'print("#coding=fake")',))

1263

encoding, consumed_lines = detect_encoding(readline)

1264

self.assertEqual(encoding, 'utf-8')

1265

self.assertEqual(consumed_lines, [b'print("#coding=fake")'])

1266

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1267

def test_open(self):

1268

filename = support.TESTFN + '.py'

1269

self.addCleanup(support.unlink, filename)

1270

1271

# test coding cookie

1272

for encoding in ('iso-8859-15', 'utf-8'):

1273

with open(filename, 'w', encoding=encoding) as fp:

1274

print("# coding: %s" % encoding, file=fp)

1275

print("print('euro:\u20ac')", file=fp)

1276

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1277

self.assertEqual(fp.encoding, encoding)

1278

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1279

1280

# test BOM (no coding cookie)

1281

with open(filename, 'w', encoding='utf-8-sig') as fp:

1282

print("print('euro:\u20ac')", file=fp)

1283

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1284

self.assertEqual(fp.encoding, 'utf-8-sig')

1285

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1286

Brett Cannon

c33f3f2

2012-04-20 13:23:54 -0400

[diff] [blame]

1287

def test_filename_in_exception(self):

1288

# When possible, include the file name in the exception.

1289

path = 'some_file_path'

1290

lines = (

1291

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1292

)

1293

class Bunk:

1294

def __init__(self, lines, path):

self.name = path

self._lines = lines

self._index = 0

def readline(self):

if self._index == len(lines):

1301

raise StopIteration

1302

line = lines[self._index]

self._index += 1

return line

with self.assertRaises(SyntaxError):

1307

ins = Bunk(lines, path)

1308

# Make sure lacking a name isn't an issue.

1309

del ins.name

1310

detect_encoding(ins.readline)

1311

with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):

1312

ins = Bunk(lines, path)

1313

detect_encoding(ins.readline)

1314

Victor Stinner

387729e

2015-05-26 00:43:58 +0200

[diff] [blame]

1315

def test_open_error(self):

1316

# Issue #23840: open() must close the binary file on error

1317

m = BytesIO(b'#coding:xxx')

1318

with mock.patch('tokenize._builtin_open', return_value=m):

1319

self.assertRaises(SyntaxError, tokenize_open, 'foobar')

1320

self.assertTrue(m.closed)

1321

1322

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1323

class TestTokenize(TestCase):

1324

1325

def test_tokenize(self):

1326

import tokenize as tokenize_module

1327

encoding = object()

1328

encoding_used = None

1329

def mock_detect_encoding(readline):

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1330

return encoding, [b'first', b'second']

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1331

1332

def mock__tokenize(readline, encoding):

1333

nonlocal encoding_used

1334

encoding_used = encoding

1335

out = []

1336

while True:

1337

next_line = readline()

1338

if next_line:

1339

out.append(next_line)

continue

return out

counter = 0

def mock_readline():

nonlocal counter

counter += 1

if counter == 5:

return b''

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1349

return str(counter).encode()

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1350

1351

orig_detect_encoding = tokenize_module.detect_encoding

1352

orig__tokenize = tokenize_module._tokenize

1353

tokenize_module.detect_encoding = mock_detect_encoding

1354

tokenize_module._tokenize = mock__tokenize

1355

try:

1356

results = tokenize(mock_readline)

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1357

self.assertEqual(list(results),

1358

[b'first', b'second', b'1', b'2', b'3', b'4'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1359

finally:

1360

tokenize_module.detect_encoding = orig_detect_encoding

1361

tokenize_module._tokenize = orig__tokenize

1362

1363

self.assertTrue(encoding_used, encoding)

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

1364

Yury Selivanov

8085b80

2015-05-18 12:50:52 -0400

[diff] [blame]

1365

def test_oneline_defs(self):

1366

buf = []

1367

for i in range(500):

1368

buf.append('def i{i}(): return {i}'.format(i=i))

buf.append('OK')

buf = '\n'.join(buf)

# Test that 500 consequent, one-line defs is OK

1373

toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame^]

1374

self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER

1375

# [-2] is always NEWLINE

Yury Selivanov

8085b80

2015-05-18 12:50:52 -0400

[diff] [blame]

1376

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1377

def assertExactTypeEqual(self, opstr, *optypes):

1378

tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))

1379

num_optypes = len(optypes)

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame^]

1380

self.assertEqual(len(tokens), 3 + num_optypes)

Albert-Jan Nijburg

fc354f0

2017-05-31 15:00:21 +0100

[diff] [blame]

1381

self.assertEqual(tok_name[tokens[0].exact_type],

1382

tok_name[ENCODING])

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1383

for i in range(num_optypes):

Albert-Jan Nijburg

fc354f0

2017-05-31 15:00:21 +0100

[diff] [blame]

1384

self.assertEqual(tok_name[tokens[i + 1].exact_type],

1385

tok_name[optypes[i]])

1386

self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type],

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame^]

1387

tok_name[token.NEWLINE])

1388

self.assertEqual(tok_name[tokens[2 + num_optypes].exact_type],

Albert-Jan Nijburg

fc354f0

2017-05-31 15:00:21 +0100

[diff] [blame]

1389

tok_name[token.ENDMARKER])

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1390

1391

def test_exact_type(self):

1392

self.assertExactTypeEqual('()', token.LPAR, token.RPAR)

1393

self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)

1394

self.assertExactTypeEqual(':', token.COLON)

1395

self.assertExactTypeEqual(',', token.COMMA)

1396

self.assertExactTypeEqual(';', token.SEMI)

1397

self.assertExactTypeEqual('+', token.PLUS)

1398

self.assertExactTypeEqual('-', token.MINUS)

1399

self.assertExactTypeEqual('*', token.STAR)

1400

self.assertExactTypeEqual('/', token.SLASH)

1401

self.assertExactTypeEqual('|', token.VBAR)

1402

self.assertExactTypeEqual('&', token.AMPER)

1403

self.assertExactTypeEqual('<', token.LESS)

1404

self.assertExactTypeEqual('>', token.GREATER)

1405

self.assertExactTypeEqual('=', token.EQUAL)

1406

self.assertExactTypeEqual('.', token.DOT)

1407

self.assertExactTypeEqual('%', token.PERCENT)

1408

self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)

1409

self.assertExactTypeEqual('==', token.EQEQUAL)

1410

self.assertExactTypeEqual('!=', token.NOTEQUAL)

1411

self.assertExactTypeEqual('<=', token.LESSEQUAL)

1412

self.assertExactTypeEqual('>=', token.GREATEREQUAL)

1413

self.assertExactTypeEqual('~', token.TILDE)

1414

self.assertExactTypeEqual('^', token.CIRCUMFLEX)

1415

self.assertExactTypeEqual('<<', token.LEFTSHIFT)

1416

self.assertExactTypeEqual('>>', token.RIGHTSHIFT)

1417

self.assertExactTypeEqual('**', token.DOUBLESTAR)

1418

self.assertExactTypeEqual('+=', token.PLUSEQUAL)

1419

self.assertExactTypeEqual('-=', token.MINEQUAL)

1420

self.assertExactTypeEqual('*=', token.STAREQUAL)

1421

self.assertExactTypeEqual('/=', token.SLASHEQUAL)

1422

self.assertExactTypeEqual('%=', token.PERCENTEQUAL)

1423

self.assertExactTypeEqual('&=', token.AMPEREQUAL)

1424

self.assertExactTypeEqual('|=', token.VBAREQUAL)

1425

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1426

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1427

self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)

1428

self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)

1429

self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)

1430

self.assertExactTypeEqual('//', token.DOUBLESLASH)

1431

self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)

Jim Fasarakis-Hilliard

d4914e9

2017-03-14 22:16:15 +0200

[diff] [blame]

1432

self.assertExactTypeEqual('...', token.ELLIPSIS)

1433

self.assertExactTypeEqual('->', token.RARROW)

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1434

self.assertExactTypeEqual('@', token.AT)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

1435

self.assertExactTypeEqual('@=', token.ATEQUAL)

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1436

1437

self.assertExactTypeEqual('a**2+b**2==c**2',

1438

NAME, token.DOUBLESTAR, NUMBER,

1439

token.PLUS,

1440

NAME, token.DOUBLESTAR, NUMBER,

1441

token.EQEQUAL,

1442

NAME, token.DOUBLESTAR, NUMBER)

1443

self.assertExactTypeEqual('{1, 2, 3}',

1444

token.LBRACE,

1445

token.NUMBER, token.COMMA,

1446

token.NUMBER, token.COMMA,

1447

token.NUMBER,

1448

token.RBRACE)

1449

self.assertExactTypeEqual('^(x & 0x1)',

1450

token.CIRCUMFLEX,

1451

token.LPAR,

1452

token.NAME, token.AMPER, token.NUMBER,

1453

token.RPAR)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1454

Ezio Melotti

fafa8b7

2012-11-03 17:46:51 +0200

[diff] [blame]

1455

def test_pathological_trailing_whitespace(self):

1456

# See http://bugs.python.org/issue16152

1457

self.assertExactTypeEqual('@ ', token.AT)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1458

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1459

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1460

class UntokenizeTest(TestCase):

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1461

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1462

def test_bad_input_order(self):

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1463

# raise if previous row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

u = Untokenizer()

u.prev_row = 2

u.prev_col = 2

with self.assertRaises(ValueError) as cm:

1468

u.add_whitespace((1,3))

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1469

self.assertEqual(cm.exception.args[0],

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1470

'start (1,3) precedes previous end (2,2)')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1471

# raise if previous column in row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1472

self.assertRaises(ValueError, u.add_whitespace, (2,1))

1473

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1474

def test_backslash_continuation(self):

1475

# The problem is that <whitespace>\<newline> leaves no token

u = Untokenizer()

u.prev_row = 1

u.prev_col = 1

u.tokens = []

u.add_whitespace((2, 0))

1481

self.assertEqual(u.tokens, ['\\\n'])

1482

u.prev_row = 2

1483

u.add_whitespace((4, 4))

1484

self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1485

TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1486

Terry Jan Reedy

5b8d2c3

2014-02-17 23:12:16 -0500

[diff] [blame]

1487

def test_iter_compat(self):

1488

u = Untokenizer()

1489

token = (NAME, 'Hello')

1490

tokens = [(ENCODING, 'utf-8'), token]

1491

u.compat(token, iter([]))

1492

self.assertEqual(u.tokens, ["Hello "])

1493

u = Untokenizer()

1494

self.assertEqual(u.untokenize(iter([token])), 'Hello ')

1495

u = Untokenizer()

1496

self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')

1497

self.assertEqual(u.encoding, 'utf-8')

1498

self.assertEqual(untokenize(iter(tokens)), b'Hello ')

1499

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1500

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1501

class TestRoundtrip(TestCase):

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1502

1503

def check_roundtrip(self, f):

1504

"""

1505

Test roundtrip for `untokenize`. `f` is an open file or a string.

1506

The source code in f is tokenized to both 5- and 2-tuples.

1507

Both sequences are converted back to source code via

1508

tokenize.untokenize(), and the latter tokenized again to 2-tuples.

1509

The test fails if the 3 pair tokenizations do not match.

1510

1511

When untokenize bugs are fixed, untokenize with 5-tuples should

1512

reproduce code that does not contain a backslash continuation

1513

following spaces. A proper test should test this.

1514

"""

1515

# Get source code and original tokenizations

1516

if isinstance(f, str):

1517

code = f.encode('utf-8')

else:

code = f.read()

f.close()

readline = iter(code.splitlines(keepends=True)).__next__

1522

tokens5 = list(tokenize(readline))

1523

tokens2 = [tok[:2] for tok in tokens5]

1524

# Reproduce tokens2 from pairs

1525

bytes_from2 = untokenize(tokens2)

1526

readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__

1527

tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]

1528

self.assertEqual(tokens2_from2, tokens2)

1529

# Reproduce tokens2 from 5-tuples

1530

bytes_from5 = untokenize(tokens5)

1531

readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__

1532

tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]

1533

self.assertEqual(tokens2_from5, tokens2)

1534

1535

def test_roundtrip(self):

1536

# There are some standard formatting practices that are easy to get right.

1537

1538

self.check_roundtrip("if x == 1:\n"

1539

" print(x)\n")

1540

self.check_roundtrip("# This is a comment\n"

Ammar Askar

2018-07-06 03:19:08 -0400

[diff] [blame^]

1541

"# This also\n")

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1542

1543

# Some people use different formatting conventions, which makes

1544

# untokenize a little trickier. Note that this test involves trailing

1545

# whitespace after the colon. Note that we use hex escapes to make the

1546

# two trailing blanks apparent in the expected output.

1547

1548

self.check_roundtrip("if x == 1 : \n"

1549

" print(x)\n")

1550

fn = support.findfile("tokenize_tests.txt")

1551

with open(fn, 'rb') as f:

1552

self.check_roundtrip(f)

1553

self.check_roundtrip("if x == 1:\n"

1554

" # A comment by itself.\n"

1555

" print(x) # Comment here, too.\n"

1556

" # Another comment.\n"

1557

"after_if = True\n")

1558

self.check_roundtrip("if (x # The comments need to go in the right place\n"

1559

" == 1):\n"

1560

" print('x==1')\n")

1561

self.check_roundtrip("class Test: # A comment here\n"

1562

" # A comment with weird indent\n"

1563

" after_com = 5\n"

1564

" def x(m): return m*5 # a one liner\n"

1565

" def y(m): # A whitespace after the colon\n"

1566

" return y*4 # 3-space indent\n")

1567

1568

# Some error-handling code

1569

self.check_roundtrip("try: import somemodule\n"

1570

"except ImportError: # comment\n"

1571

" print('Can not import' # comment2\n)"

1572

"else: print('Loaded')\n")

1573

1574

def test_continuation(self):

1575

# Balancing continuation

1576

self.check_roundtrip("a = (3,4, \n"

"5,6)\n"

"y = [3, 4,\n"

"5]\n"

"z = {'a': 5,\n"

"'b':15, 'c':True}\n"

1582

"x = len(y) + 5 - a[\n"

"3] - a[2]\n"

"+ len(z) - z[\n"

"'b']\n")

def test_backslash_continuation(self):

1588

# Backslash means line continuation, except for comments

1589

self.check_roundtrip("x=1+\\\n"

1590

"1\n"

1591

"# This is a comment\\\n"

1592

"# This also\n")

1593

self.check_roundtrip("# Comment \\\n"

1594

"x = 0")

1595

1596

def test_string_concatenation(self):

1597

# Two string literals on the same line

1598

self.check_roundtrip("'' ''")

1599

1600

def test_random_files(self):

1601

# Test roundtrip on random python modules.

1602

# pass the '-ucpu' option to process the full directory.

1603

1604

import glob, random

1605

fn = support.findfile("tokenize_tests.txt")

1606

tempdir = os.path.dirname(fn) or os.curdir

1607

testfiles = glob.glob(os.path.join(tempdir, "test*.py"))

1608

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

1609

# Tokenize is broken on test_pep3131.py because regular expressions are

1610

# broken on the obscure unicode identifiers in it. *sigh*

1611

# With roundtrip extended to test the 5-tuple mode of untokenize,

1612

# 7 more testfiles fail. Remove them also until the failure is diagnosed.

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1613

Zachary Ware

724f6a6

2016-09-09 12:55:37 -0700

[diff] [blame]

1614

testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1615

for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):

1616

testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)

1617

1618

if not support.is_resource_enabled("cpu"):

1619

testfiles = random.sample(testfiles, 10)

1620

1621

for testfile in testfiles:

1622

with open(testfile, 'rb') as f:

1623

with self.subTest(file=testfile):

1624

self.check_roundtrip(f)

1625

1626

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1627

def roundtrip(self, code):

1628

if isinstance(code, str):

1629

code = code.encode('utf-8')

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1630

return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1631

1632

def test_indentation_semantics_retained(self):

1633

"""

1634

Ensure that although whitespace might be mutated in a roundtrip,

1635

the semantic meaning of the indentation remains consistent.

1636

"""

1637

code = "if False:\n\tx=3\n\tx=3\n"

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1638

codelines = self.roundtrip(code).split('\n')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1639

self.assertEqual(codelines[1], codelines[2])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1640

self.check_roundtrip(code)

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1641

1642

Thomas Wouters

49fd7fa

2006-04-21 10:40:58 +0000

[diff] [blame]

1643

if __name__ == "__main__":

Brett Cannon