Blame - Lib/test/test_tokenize.py - platform/external/python/cpython3

2015-10-06 18:16:28 +0300

[diff] [blame]

1

from test import support

2

from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,

3

STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,

4

open as tokenize_open, Untokenizer)

5

from io import BytesIO

Stéphane Wirtel

90addd6

2017-07-25 15:33:53 +0200

[diff] [blame]

6

import unittest

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

7

from unittest import TestCase, mock

8

from test.test_grammar import (VALID_UNDERSCORE_LITERALS,

9

INVALID_UNDERSCORE_LITERALS)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

10

import os

11

import token

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

12

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

13

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

14

class TokenizeTest(TestCase):

15

# Tests for the tokenize module.

Jason R. Coombs

7cf3638

2015-06-20 19:13:50 -0400

[diff] [blame]

16

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

17

# The tests can be really simple. Given a small fragment of source

18

# code, print out a table with tokens. The ENDMARKER is omitted for

19

# brevity.

20

21

def check_tokenize(self, s, expected):

22

# Format the tokens in s in a table format.

23

# The ENDMARKER is omitted.

24

result = []

25

f = BytesIO(s.encode('utf-8'))

26

for type, token, start, end, line in tokenize(f.readline):

27

if type == ENDMARKER:

28

break

29

type = tok_name[type]

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

30

result.append(f" {type:10} {token!r:13} {start} {end}")

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

31

self.assertEqual(result,

32

[" ENCODING 'utf-8' (0, 0) (0, 0)"] +

33

expected.rstrip().splitlines())

34

35

def test_basic(self):

36

self.check_tokenize("1 + 1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

37

NUMBER '1' (1, 0) (1, 1)

38

OP '+' (1, 2) (1, 3)

39

NUMBER '1' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

40

""")

41

self.check_tokenize("if False:\n"

42

" # NL\n"

Albert-Jan Nijburg

c471ca4

2017-05-24 12:31:57 +0100

[diff] [blame]

43

" \n"

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

44

" True = False # NEWLINE\n", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

45

NAME 'if' (1, 0) (1, 2)

46

NAME 'False' (1, 3) (1, 8)

47

OP ':' (1, 8) (1, 9)

48

NEWLINE '\\n' (1, 9) (1, 10)

49

COMMENT '# NL' (2, 4) (2, 8)

50

NL '\\n' (2, 8) (2, 9)

Albert-Jan Nijburg

c471ca4

2017-05-24 12:31:57 +0100

[diff] [blame]

51

NL '\\n' (3, 4) (3, 5)

52

INDENT ' ' (4, 0) (4, 4)

53

NAME 'True' (4, 4) (4, 8)

54

OP '=' (4, 9) (4, 10)

55

NAME 'False' (4, 11) (4, 16)

56

COMMENT '# NEWLINE' (4, 17) (4, 26)

57

NEWLINE '\\n' (4, 26) (4, 27)

58

DEDENT '' (5, 0) (5, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

59

""")

60

indent_error_file = b"""\

def k(x):

x += 2

x += 5

"""

readline = BytesIO(indent_error_file).readline

66

with self.assertRaisesRegex(IndentationError,

67

"unindent does not match any "

68

"outer indentation level"):

69

for tok in tokenize(readline):

70

pass

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

71

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

72

def test_int(self):

73

# Ordinary integers and binary operators

74

self.check_tokenize("0xff <= 255", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

75

NUMBER '0xff' (1, 0) (1, 4)

76

OP '<=' (1, 5) (1, 7)

77

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

78

""")

79

self.check_tokenize("0b10 <= 255", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

80

NUMBER '0b10' (1, 0) (1, 4)

81

OP '<=' (1, 5) (1, 7)

82

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

83

""")

84

self.check_tokenize("0o123 <= 0O123", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

85

NUMBER '0o123' (1, 0) (1, 5)

86

OP '<=' (1, 6) (1, 8)

87

NUMBER '0O123' (1, 9) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

88

""")

89

self.check_tokenize("1234567 > ~0x15", """\

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

90

NUMBER '1234567' (1, 0) (1, 7)

91

OP '>' (1, 8) (1, 9)

92

OP '~' (1, 10) (1, 11)

93

NUMBER '0x15' (1, 11) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

94

""")

95

self.check_tokenize("2134568 != 1231515", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

96

NUMBER '2134568' (1, 0) (1, 7)

97

OP '!=' (1, 8) (1, 10)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

98

NUMBER '1231515' (1, 11) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

99

""")

100

self.check_tokenize("(-124561-1) & 200000000", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

101

OP '(' (1, 0) (1, 1)

102

OP '-' (1, 1) (1, 2)

103

NUMBER '124561' (1, 2) (1, 8)

104

OP '-' (1, 8) (1, 9)

105

NUMBER '1' (1, 9) (1, 10)

106

OP ')' (1, 10) (1, 11)

107

OP '&' (1, 12) (1, 13)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

108

NUMBER '200000000' (1, 14) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

109

""")

110

self.check_tokenize("0xdeadbeef != -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

111

NUMBER '0xdeadbeef' (1, 0) (1, 10)

112

OP '!=' (1, 11) (1, 13)

113

OP '-' (1, 14) (1, 15)

114

NUMBER '1' (1, 15) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

115

""")

116

self.check_tokenize("0xdeadc0de & 12345", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

117

NUMBER '0xdeadc0de' (1, 0) (1, 10)

118

OP '&' (1, 11) (1, 12)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

119

NUMBER '12345' (1, 13) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

120

""")

121

self.check_tokenize("0xFF & 0x15 | 1234", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

122

NUMBER '0xFF' (1, 0) (1, 4)

123

OP '&' (1, 5) (1, 6)

124

NUMBER '0x15' (1, 7) (1, 11)

125

OP '|' (1, 12) (1, 13)

126

NUMBER '1234' (1, 14) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

127

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

128

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

129

def test_long(self):

130

# Long integers

131

self.check_tokenize("x = 0", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

132

NAME 'x' (1, 0) (1, 1)

133

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

134

NUMBER '0' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

135

""")

136

self.check_tokenize("x = 0xfffffffffff", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

137

NAME 'x' (1, 0) (1, 1)

138

OP '=' (1, 2) (1, 3)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

139

NUMBER '0xfffffffffff' (1, 4) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

140

""")

141

self.check_tokenize("x = 123141242151251616110", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

142

NAME 'x' (1, 0) (1, 1)

143

OP '=' (1, 2) (1, 3)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

144

NUMBER '123141242151251616110' (1, 4) (1, 25)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

145

""")

146

self.check_tokenize("x = -15921590215012591", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

147

NAME 'x' (1, 0) (1, 1)

148

OP '=' (1, 2) (1, 3)

149

OP '-' (1, 4) (1, 5)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

150

NUMBER '15921590215012591' (1, 5) (1, 22)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

151

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

152

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

153

def test_float(self):

154

# Floating point numbers

155

self.check_tokenize("x = 3.14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

156

NAME 'x' (1, 0) (1, 1)

157

OP '=' (1, 2) (1, 3)

158

NUMBER '3.14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

159

""")

160

self.check_tokenize("x = 314159.", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

161

NAME 'x' (1, 0) (1, 1)

162

OP '=' (1, 2) (1, 3)

163

NUMBER '314159.' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

164

""")

165

self.check_tokenize("x = .314159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

166

NAME 'x' (1, 0) (1, 1)

167

OP '=' (1, 2) (1, 3)

168

NUMBER '.314159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

169

""")

170

self.check_tokenize("x = 3e14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

171

NAME 'x' (1, 0) (1, 1)

172

OP '=' (1, 2) (1, 3)

173

NUMBER '3e14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

174

""")

175

self.check_tokenize("x = 3E123", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

176

NAME 'x' (1, 0) (1, 1)

177

OP '=' (1, 2) (1, 3)

178

NUMBER '3E123' (1, 4) (1, 9)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

179

""")

180

self.check_tokenize("x+y = 3e-1230", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

181

NAME 'x' (1, 0) (1, 1)

182

OP '+' (1, 1) (1, 2)

183

NAME 'y' (1, 2) (1, 3)

184

OP '=' (1, 4) (1, 5)

185

NUMBER '3e-1230' (1, 6) (1, 13)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

186

""")

187

self.check_tokenize("x = 3.14e159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

188

NAME 'x' (1, 0) (1, 1)

189

OP '=' (1, 2) (1, 3)

190

NUMBER '3.14e159' (1, 4) (1, 12)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

191

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

192

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

193

def test_underscore_literals(self):

194

def number_token(s):

195

f = BytesIO(s.encode('utf-8'))

196

for toktype, token, start, end, line in tokenize(f.readline):

197

if toktype == NUMBER:

198

return token

199

return 'invalid token'

200

for lit in VALID_UNDERSCORE_LITERALS:

201

if '(' in lit:

202

# this won't work with compound complex inputs

203

continue

204

self.assertEqual(number_token(lit), lit)

205

for lit in INVALID_UNDERSCORE_LITERALS:

206

self.assertNotEqual(number_token(lit), lit)

207

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

208

def test_string(self):

209

# String literals

210

self.check_tokenize("x = ''; y = \"\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

211

NAME 'x' (1, 0) (1, 1)

212

OP '=' (1, 2) (1, 3)

213

STRING "''" (1, 4) (1, 6)

214

OP ';' (1, 6) (1, 7)

215

NAME 'y' (1, 8) (1, 9)

216

OP '=' (1, 10) (1, 11)

217

STRING '""' (1, 12) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

218

""")

219

self.check_tokenize("x = '\"'; y = \"'\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

220

NAME 'x' (1, 0) (1, 1)

221

OP '=' (1, 2) (1, 3)

222

STRING '\\'"\\'' (1, 4) (1, 7)

223

OP ';' (1, 7) (1, 8)

224

NAME 'y' (1, 9) (1, 10)

225

OP '=' (1, 11) (1, 12)

226

STRING '"\\'"' (1, 13) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

227

""")

228

self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

229

NAME 'x' (1, 0) (1, 1)

230

OP '=' (1, 2) (1, 3)

231

STRING '"doesn\\'t "' (1, 4) (1, 14)

232

NAME 'shrink' (1, 14) (1, 20)

233

STRING '", does it"' (1, 20) (1, 31)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

234

""")

235

self.check_tokenize("x = 'abc' + 'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

236

NAME 'x' (1, 0) (1, 1)

237

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

238

STRING "'abc'" (1, 4) (1, 9)

239

OP '+' (1, 10) (1, 11)

240

STRING "'ABC'" (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

241

""")

242

self.check_tokenize('y = "ABC" + "ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

243

NAME 'y' (1, 0) (1, 1)

244

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

245

STRING '"ABC"' (1, 4) (1, 9)

246

OP '+' (1, 10) (1, 11)

247

STRING '"ABC"' (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

248

""")

249

self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

250

NAME 'x' (1, 0) (1, 1)

251

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

252

STRING "r'abc'" (1, 4) (1, 10)

253

OP '+' (1, 11) (1, 12)

254

STRING "r'ABC'" (1, 13) (1, 19)

255

OP '+' (1, 20) (1, 21)

256

STRING "R'ABC'" (1, 22) (1, 28)

257

OP '+' (1, 29) (1, 30)

258

STRING "R'ABC'" (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

259

""")

260

self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

261

NAME 'y' (1, 0) (1, 1)

262

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

263

STRING 'r"abc"' (1, 4) (1, 10)

264

OP '+' (1, 11) (1, 12)

265

STRING 'r"ABC"' (1, 13) (1, 19)

266

OP '+' (1, 20) (1, 21)

267

STRING 'R"ABC"' (1, 22) (1, 28)

268

OP '+' (1, 29) (1, 30)

269

STRING 'R"ABC"' (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

270

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

271

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

272

self.check_tokenize("u'abc' + U'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

273

STRING "u'abc'" (1, 0) (1, 6)

274

OP '+' (1, 7) (1, 8)

275

STRING "U'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

276

""")

277

self.check_tokenize('u"abc" + U"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

278

STRING 'u"abc"' (1, 0) (1, 6)

279

OP '+' (1, 7) (1, 8)

280

STRING 'U"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

281

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

282

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

283

self.check_tokenize("b'abc' + B'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

284

STRING "b'abc'" (1, 0) (1, 6)

285

OP '+' (1, 7) (1, 8)

286

STRING "B'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

287

""")

288

self.check_tokenize('b"abc" + B"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

289

STRING 'b"abc"' (1, 0) (1, 6)

290

OP '+' (1, 7) (1, 8)

291

STRING 'B"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

292

""")

293

self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

294

STRING "br'abc'" (1, 0) (1, 7)

295

OP '+' (1, 8) (1, 9)

296

STRING "bR'abc'" (1, 10) (1, 17)

297

OP '+' (1, 18) (1, 19)

298

STRING "Br'abc'" (1, 20) (1, 27)

299

OP '+' (1, 28) (1, 29)

300

STRING "BR'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

301

""")

302

self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

303

STRING 'br"abc"' (1, 0) (1, 7)

304

OP '+' (1, 8) (1, 9)

305

STRING 'bR"abc"' (1, 10) (1, 17)

306

OP '+' (1, 18) (1, 19)

307

STRING 'Br"abc"' (1, 20) (1, 27)

308

OP '+' (1, 28) (1, 29)

309

STRING 'BR"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

310

""")

311

self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

312

STRING "rb'abc'" (1, 0) (1, 7)

313

OP '+' (1, 8) (1, 9)

314

STRING "rB'abc'" (1, 10) (1, 17)

315

OP '+' (1, 18) (1, 19)

316

STRING "Rb'abc'" (1, 20) (1, 27)

317

OP '+' (1, 28) (1, 29)

318

STRING "RB'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

319

""")

320

self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

321

STRING 'rb"abc"' (1, 0) (1, 7)

322

OP '+' (1, 8) (1, 9)

323

STRING 'rB"abc"' (1, 10) (1, 17)

324

OP '+' (1, 18) (1, 19)

325

STRING 'Rb"abc"' (1, 20) (1, 27)

326

OP '+' (1, 28) (1, 29)

327

STRING 'RB"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

328

""")

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

329

# Check 0, 1, and 2 character string prefixes.

330

self.check_tokenize(r'"a\

331

de\

332

fg"', """\

333

STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)

334

""")

335

self.check_tokenize(r'u"a\

336

de"', """\

337

STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3)

338

""")

339

self.check_tokenize(r'rb"a\

340

d"', """\

341

STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2)

342

""")

343

self.check_tokenize(r'"""a\

344

b"""', """\

345

STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)

346

""")

347

self.check_tokenize(r'u"""a\

348

b"""', """\

349

STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)

350

""")

351

self.check_tokenize(r'rb"""a\

352

b\

353

c"""', """\

354

STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)

355

""")

Eric V. Smith

1c8222c

2015-10-26 04:37:55 -0400

[diff] [blame]

356

self.check_tokenize('f"abc"', """\

357

STRING 'f"abc"' (1, 0) (1, 6)

358

""")

359

self.check_tokenize('fR"a{b}c"', """\

360

STRING 'fR"a{b}c"' (1, 0) (1, 9)

361

""")

362

self.check_tokenize('f"""abc"""', """\

363

STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10)

364

""")

365

self.check_tokenize(r'f"abc\

366

def"', """\

367

STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4)

368

""")

369

self.check_tokenize(r'Rf"abc\

370

def"', """\

371

STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)

372

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

373

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

374

def test_function(self):

375

self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

376

NAME 'def' (1, 0) (1, 3)

377

NAME 'd22' (1, 4) (1, 7)

378

OP '(' (1, 7) (1, 8)

379

NAME 'a' (1, 8) (1, 9)

380

OP ',' (1, 9) (1, 10)

381

NAME 'b' (1, 11) (1, 12)

382

OP ',' (1, 12) (1, 13)

383

NAME 'c' (1, 14) (1, 15)

384

OP '=' (1, 15) (1, 16)

385

NUMBER '2' (1, 16) (1, 17)

386

OP ',' (1, 17) (1, 18)

387

NAME 'd' (1, 19) (1, 20)

388

OP '=' (1, 20) (1, 21)

389

NUMBER '2' (1, 21) (1, 22)

390

OP ',' (1, 22) (1, 23)

391

OP '*' (1, 24) (1, 25)

392

NAME 'k' (1, 25) (1, 26)

393

OP ')' (1, 26) (1, 27)

394

OP ':' (1, 27) (1, 28)

395

NAME 'pass' (1, 29) (1, 33)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

396

""")

397

self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

398

NAME 'def' (1, 0) (1, 3)

399

NAME 'd01v_' (1, 4) (1, 9)

400

OP '(' (1, 9) (1, 10)

401

NAME 'a' (1, 10) (1, 11)

402

OP '=' (1, 11) (1, 12)

403

NUMBER '1' (1, 12) (1, 13)

404

OP ',' (1, 13) (1, 14)

405

OP '*' (1, 15) (1, 16)

406

NAME 'k' (1, 16) (1, 17)

407

OP ',' (1, 17) (1, 18)

408

OP '**' (1, 19) (1, 21)

409

NAME 'w' (1, 21) (1, 22)

410

OP ')' (1, 22) (1, 23)

411

OP ':' (1, 23) (1, 24)

412

NAME 'pass' (1, 25) (1, 29)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

413

""")

Jim Fasarakis-Hilliard

d4914e9

2017-03-14 22:16:15 +0200

[diff] [blame]

414

self.check_tokenize("def d23(a: str, b: int=3) -> int: pass", """\

415

NAME 'def' (1, 0) (1, 3)

416

NAME 'd23' (1, 4) (1, 7)

417

OP '(' (1, 7) (1, 8)

418

NAME 'a' (1, 8) (1, 9)

419

OP ':' (1, 9) (1, 10)

420

NAME 'str' (1, 11) (1, 14)

421

OP ',' (1, 14) (1, 15)

422

NAME 'b' (1, 16) (1, 17)

423

OP ':' (1, 17) (1, 18)

424

NAME 'int' (1, 19) (1, 22)

425

OP '=' (1, 22) (1, 23)

426

NUMBER '3' (1, 23) (1, 24)

427

OP ')' (1, 24) (1, 25)

428

OP '->' (1, 26) (1, 28)

429

NAME 'int' (1, 29) (1, 32)

430

OP ':' (1, 32) (1, 33)

431

NAME 'pass' (1, 34) (1, 38)

432

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

433

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

434

def test_comparison(self):

435

# Comparison

436

self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "

437

"1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

438

NAME 'if' (1, 0) (1, 2)

439

NUMBER '1' (1, 3) (1, 4)

440

OP '<' (1, 5) (1, 6)

441

NUMBER '1' (1, 7) (1, 8)

442

OP '>' (1, 9) (1, 10)

443

NUMBER '1' (1, 11) (1, 12)

444

OP '==' (1, 13) (1, 15)

445

NUMBER '1' (1, 16) (1, 17)

446

OP '>=' (1, 18) (1, 20)

447

NUMBER '5' (1, 21) (1, 22)

448

OP '<=' (1, 23) (1, 25)

449

NUMBER '0x15' (1, 26) (1, 30)

450

OP '<=' (1, 31) (1, 33)

451

NUMBER '0x12' (1, 34) (1, 38)

452

OP '!=' (1, 39) (1, 41)

453

NUMBER '1' (1, 42) (1, 43)

454

NAME 'and' (1, 44) (1, 47)

455

NUMBER '5' (1, 48) (1, 49)

456

NAME 'in' (1, 50) (1, 52)

457

NUMBER '1' (1, 53) (1, 54)

458

NAME 'not' (1, 55) (1, 58)

459

NAME 'in' (1, 59) (1, 61)

460

NUMBER '1' (1, 62) (1, 63)

461

NAME 'is' (1, 64) (1, 66)

462

NUMBER '1' (1, 67) (1, 68)

463

NAME 'or' (1, 69) (1, 71)

464

NUMBER '5' (1, 72) (1, 73)

465

NAME 'is' (1, 74) (1, 76)

466

NAME 'not' (1, 77) (1, 80)

467

NUMBER '1' (1, 81) (1, 82)

468

OP ':' (1, 82) (1, 83)

469

NAME 'pass' (1, 84) (1, 88)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

470

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

471

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

472

def test_shift(self):

473

# Shift

474

self.check_tokenize("x = 1 << 1 >> 5", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

475

NAME 'x' (1, 0) (1, 1)

476

OP '=' (1, 2) (1, 3)

477

NUMBER '1' (1, 4) (1, 5)

478

OP '<<' (1, 6) (1, 8)

479

NUMBER '1' (1, 9) (1, 10)

480

OP '>>' (1, 11) (1, 13)

481

NUMBER '5' (1, 14) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

482

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

483

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

484

def test_additive(self):

485

# Additive

486

self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

487

NAME 'x' (1, 0) (1, 1)

488

OP '=' (1, 2) (1, 3)

489

NUMBER '1' (1, 4) (1, 5)

490

OP '-' (1, 6) (1, 7)

491

NAME 'y' (1, 8) (1, 9)

492

OP '+' (1, 10) (1, 11)

493

NUMBER '15' (1, 12) (1, 14)

494

OP '-' (1, 15) (1, 16)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

495

NUMBER '1' (1, 17) (1, 18)

496

OP '+' (1, 19) (1, 20)

497

NUMBER '0x124' (1, 21) (1, 26)

498

OP '+' (1, 27) (1, 28)

499

NAME 'z' (1, 29) (1, 30)

500

OP '+' (1, 31) (1, 32)

501

NAME 'a' (1, 33) (1, 34)

502

OP '[' (1, 34) (1, 35)

503

NUMBER '5' (1, 35) (1, 36)

504

OP ']' (1, 36) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

505

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

506

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

507

def test_multiplicative(self):

508

# Multiplicative

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

509

self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

510

NAME 'x' (1, 0) (1, 1)

511

OP '=' (1, 2) (1, 3)

512

NUMBER '1' (1, 4) (1, 5)

513

OP '//' (1, 5) (1, 7)

514

NUMBER '1' (1, 7) (1, 8)

515

OP '*' (1, 8) (1, 9)

516

NUMBER '1' (1, 9) (1, 10)

517

OP '/' (1, 10) (1, 11)

518

NUMBER '5' (1, 11) (1, 12)

519

OP '*' (1, 12) (1, 13)

520

NUMBER '12' (1, 13) (1, 15)

521

OP '%' (1, 15) (1, 16)

522

NUMBER '0x12' (1, 16) (1, 20)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

523

OP '@' (1, 20) (1, 21)

524

NUMBER '42' (1, 21) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

525

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

526

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

527

def test_unary(self):

528

# Unary

529

self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

530

OP '~' (1, 0) (1, 1)

531

NUMBER '1' (1, 1) (1, 2)

532

OP '^' (1, 3) (1, 4)

533

NUMBER '1' (1, 5) (1, 6)

534

OP '&' (1, 7) (1, 8)

535

NUMBER '1' (1, 9) (1, 10)

536

OP '|' (1, 11) (1, 12)

537

NUMBER '1' (1, 12) (1, 13)

538

OP '^' (1, 14) (1, 15)

539

OP '-' (1, 16) (1, 17)

540

NUMBER '1' (1, 17) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

541

""")

542

self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

543

OP '-' (1, 0) (1, 1)

544

NUMBER '1' (1, 1) (1, 2)

545

OP '*' (1, 2) (1, 3)

546

NUMBER '1' (1, 3) (1, 4)

547

OP '/' (1, 4) (1, 5)

548

NUMBER '1' (1, 5) (1, 6)

549

OP '+' (1, 6) (1, 7)

550

NUMBER '1' (1, 7) (1, 8)

551

OP '*' (1, 8) (1, 9)

552

NUMBER '1' (1, 9) (1, 10)

553

OP '//' (1, 10) (1, 12)

554

NUMBER '1' (1, 12) (1, 13)

555

OP '-' (1, 14) (1, 15)

556

OP '-' (1, 16) (1, 17)

557

OP '-' (1, 17) (1, 18)

558

OP '-' (1, 18) (1, 19)

559

NUMBER '1' (1, 19) (1, 20)

560

OP '**' (1, 20) (1, 22)

561

NUMBER '1' (1, 22) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

562

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

563

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

564

def test_selector(self):

565

# Selector

566

self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

567

NAME 'import' (1, 0) (1, 6)

568

NAME 'sys' (1, 7) (1, 10)

569

OP ',' (1, 10) (1, 11)

570

NAME 'time' (1, 12) (1, 16)

571

NEWLINE '\\n' (1, 16) (1, 17)

572

NAME 'x' (2, 0) (2, 1)

573

OP '=' (2, 2) (2, 3)

574

NAME 'sys' (2, 4) (2, 7)

575

OP '.' (2, 7) (2, 8)

576

NAME 'modules' (2, 8) (2, 15)

577

OP '[' (2, 15) (2, 16)

578

STRING "'time'" (2, 16) (2, 22)

579

OP ']' (2, 22) (2, 23)

580

OP '.' (2, 23) (2, 24)

581

NAME 'time' (2, 24) (2, 28)

582

OP '(' (2, 28) (2, 29)

583

OP ')' (2, 29) (2, 30)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

584

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

585

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

586

def test_method(self):

587

# Methods

588

self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

589

OP '@' (1, 0) (1, 1)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

590

NAME 'staticmethod' (1, 1) (1, 13)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

591

NEWLINE '\\n' (1, 13) (1, 14)

592

NAME 'def' (2, 0) (2, 3)

593

NAME 'foo' (2, 4) (2, 7)

594

OP '(' (2, 7) (2, 8)

595

NAME 'x' (2, 8) (2, 9)

596

OP ',' (2, 9) (2, 10)

597

NAME 'y' (2, 10) (2, 11)

598

OP ')' (2, 11) (2, 12)

599

OP ':' (2, 12) (2, 13)

600

NAME 'pass' (2, 14) (2, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

601

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

602

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

603

def test_tabs(self):

604

# Evil tabs

605

self.check_tokenize("def f():\n"

606

"\tif x\n"

607

" \tpass", """\

Benjamin Peterson

a0dfa82

2009-11-13 02:25:08 +0000

[diff] [blame]

608

NAME 'def' (1, 0) (1, 3)

609

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

614

INDENT '\\t' (2, 0) (2, 1)

615

NAME 'if' (2, 1) (2, 3)

616

NAME 'x' (2, 4) (2, 5)

617

NEWLINE '\\n' (2, 5) (2, 6)

618

INDENT ' \\t' (3, 0) (3, 9)

619

NAME 'pass' (3, 9) (3, 13)

620

DEDENT '' (4, 0) (4, 0)

621

DEDENT '' (4, 0) (4, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

622

""")

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

623

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

624

def test_non_ascii_identifiers(self):

625

# Non-ascii identifiers

626

self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

627

NAME 'Örter' (1, 0) (1, 5)

628

OP '=' (1, 6) (1, 7)

629

STRING "'places'" (1, 8) (1, 16)

630

NEWLINE '\\n' (1, 16) (1, 17)

631

NAME 'grün' (2, 0) (2, 4)

632

OP '=' (2, 5) (2, 6)

633

STRING "'green'" (2, 7) (2, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

634

""")

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

635

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

636

def test_unicode(self):

637

# Legacy unicode literals:

638

self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

639

NAME 'Örter' (1, 0) (1, 5)

640

OP '=' (1, 6) (1, 7)

641

STRING "u'places'" (1, 8) (1, 17)

642

NEWLINE '\\n' (1, 17) (1, 18)

643

NAME 'grün' (2, 0) (2, 4)

644

OP '=' (2, 5) (2, 6)

Christian Heimes

0b3847d

2012-06-20 11:17:58 +0200

[diff] [blame]

645

STRING "U'green'" (2, 7) (2, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

646

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

647

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

648

def test_async(self):

649

# Async/await extension:

650

self.check_tokenize("async = 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

651

NAME 'async' (1, 0) (1, 5)

652

OP '=' (1, 6) (1, 7)

653

NUMBER '1' (1, 8) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

654

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

655

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

656

self.check_tokenize("a = (async = 1)", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

657

NAME 'a' (1, 0) (1, 1)

658

OP '=' (1, 2) (1, 3)

659

OP '(' (1, 4) (1, 5)

660

NAME 'async' (1, 5) (1, 10)

661

OP '=' (1, 11) (1, 12)

662

NUMBER '1' (1, 13) (1, 14)

663

OP ')' (1, 14) (1, 15)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

664

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

665

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

666

self.check_tokenize("async()", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

667

NAME 'async' (1, 0) (1, 5)

668

OP '(' (1, 5) (1, 6)

669

OP ')' (1, 6) (1, 7)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

670

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

671

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

672

self.check_tokenize("class async(Bar):pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

673

NAME 'class' (1, 0) (1, 5)

674

NAME 'async' (1, 6) (1, 11)

675

OP '(' (1, 11) (1, 12)

676

NAME 'Bar' (1, 12) (1, 15)

677

OP ')' (1, 15) (1, 16)

678

OP ':' (1, 16) (1, 17)

679

NAME 'pass' (1, 17) (1, 21)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

680

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

681

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

682

self.check_tokenize("class async:pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

683

NAME 'class' (1, 0) (1, 5)

684

NAME 'async' (1, 6) (1, 11)

685

OP ':' (1, 11) (1, 12)

686

NAME 'pass' (1, 12) (1, 16)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

687

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

688

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

689

self.check_tokenize("await = 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

690

NAME 'await' (1, 0) (1, 5)

691

OP '=' (1, 6) (1, 7)

692

NUMBER '1' (1, 8) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

693

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

694

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

695

self.check_tokenize("foo.async", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

696

NAME 'foo' (1, 0) (1, 3)

697

OP '.' (1, 3) (1, 4)

698

NAME 'async' (1, 4) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

699

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

700

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

701

self.check_tokenize("async for a in b: pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

702

NAME 'async' (1, 0) (1, 5)

703

NAME 'for' (1, 6) (1, 9)

704

NAME 'a' (1, 10) (1, 11)

705

NAME 'in' (1, 12) (1, 14)

706

NAME 'b' (1, 15) (1, 16)

707

OP ':' (1, 16) (1, 17)

708

NAME 'pass' (1, 18) (1, 22)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

709

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

710

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

711

self.check_tokenize("async with a as b: pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

712

NAME 'async' (1, 0) (1, 5)

713

NAME 'with' (1, 6) (1, 10)

714

NAME 'a' (1, 11) (1, 12)

715

NAME 'as' (1, 13) (1, 15)

716

NAME 'b' (1, 16) (1, 17)

717

OP ':' (1, 17) (1, 18)

718

NAME 'pass' (1, 19) (1, 23)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

719

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

720

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

721

self.check_tokenize("async.foo", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

722

NAME 'async' (1, 0) (1, 5)

723

OP '.' (1, 5) (1, 6)

724

NAME 'foo' (1, 6) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

725

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

726

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

727

self.check_tokenize("async", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

728

NAME 'async' (1, 0) (1, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

729

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

730

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

731

self.check_tokenize("async\n#comment\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

732

NAME 'async' (1, 0) (1, 5)

733

NEWLINE '\\n' (1, 5) (1, 6)

734

COMMENT '#comment' (2, 0) (2, 8)

735

NL '\\n' (2, 8) (2, 9)

736

NAME 'await' (3, 0) (3, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

737

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

738

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

739

self.check_tokenize("async\n...\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

740

NAME 'async' (1, 0) (1, 5)

741

NEWLINE '\\n' (1, 5) (1, 6)

742

OP '...' (2, 0) (2, 3)

743

NEWLINE '\\n' (2, 3) (2, 4)

744

NAME 'await' (3, 0) (3, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

745

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

746

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

747

self.check_tokenize("async\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

748

NAME 'async' (1, 0) (1, 5)

749

NEWLINE '\\n' (1, 5) (1, 6)

750

NAME 'await' (2, 0) (2, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

751

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

752

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

753

self.check_tokenize("foo.async + 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

754

NAME 'foo' (1, 0) (1, 3)

755

OP '.' (1, 3) (1, 4)

756

NAME 'async' (1, 4) (1, 9)

757

OP '+' (1, 10) (1, 11)

758

NUMBER '1' (1, 12) (1, 13)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

759

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

760

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

761

self.check_tokenize("async def foo(): pass", """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

762

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

763

NAME 'def' (1, 6) (1, 9)

764

NAME 'foo' (1, 10) (1, 13)

765

OP '(' (1, 13) (1, 14)

766

OP ')' (1, 14) (1, 15)

767

OP ':' (1, 15) (1, 16)

768

NAME 'pass' (1, 17) (1, 21)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

769

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

770

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

771

self.check_tokenize('''\

async def foo():

def foo(await):

await = 1

if 1:

await

async += 1

''', """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

779

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

780

NAME 'def' (1, 6) (1, 9)

781

NAME 'foo' (1, 10) (1, 13)

782

OP '(' (1, 13) (1, 14)

783

OP ')' (1, 14) (1, 15)

784

OP ':' (1, 15) (1, 16)

785

NEWLINE '\\n' (1, 16) (1, 17)

786

INDENT ' ' (2, 0) (2, 2)

787

NAME 'def' (2, 2) (2, 5)

788

NAME 'foo' (2, 6) (2, 9)

789

OP '(' (2, 9) (2, 10)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

790

NAME 'await' (2, 10) (2, 15)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

791

OP ')' (2, 15) (2, 16)

792

OP ':' (2, 16) (2, 17)

793

NEWLINE '\\n' (2, 17) (2, 18)

794

INDENT ' ' (3, 0) (3, 4)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

795

NAME 'await' (3, 4) (3, 9)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

796

OP '=' (3, 10) (3, 11)

797

NUMBER '1' (3, 12) (3, 13)

798

NEWLINE '\\n' (3, 13) (3, 14)

799

DEDENT '' (4, 2) (4, 2)

800

NAME 'if' (4, 2) (4, 4)

801

NUMBER '1' (4, 5) (4, 6)

802

OP ':' (4, 6) (4, 7)

803

NEWLINE '\\n' (4, 7) (4, 8)

804

INDENT ' ' (5, 0) (5, 4)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

805

NAME 'await' (5, 4) (5, 9)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

806

NEWLINE '\\n' (5, 9) (5, 10)

807

DEDENT '' (6, 0) (6, 0)

808

DEDENT '' (6, 0) (6, 0)

809

NAME 'async' (6, 0) (6, 5)

810

OP '+=' (6, 6) (6, 8)

811

NUMBER '1' (6, 9) (6, 10)

812

NEWLINE '\\n' (6, 10) (6, 11)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

813

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

814

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

815

self.check_tokenize('''\

816

async def foo():

817

async for i in 1: pass''', """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

818

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

819

NAME 'def' (1, 6) (1, 9)

820

NAME 'foo' (1, 10) (1, 13)

821

OP '(' (1, 13) (1, 14)

822

OP ')' (1, 14) (1, 15)

823

OP ':' (1, 15) (1, 16)

824

NEWLINE '\\n' (1, 16) (1, 17)

825

INDENT ' ' (2, 0) (2, 2)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

826

NAME 'async' (2, 2) (2, 7)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

827

NAME 'for' (2, 8) (2, 11)

828

NAME 'i' (2, 12) (2, 13)

829

NAME 'in' (2, 14) (2, 16)

830

NUMBER '1' (2, 17) (2, 18)

831

OP ':' (2, 18) (2, 19)

832

NAME 'pass' (2, 20) (2, 24)

833

DEDENT '' (3, 0) (3, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

834

""")

Yury Selivanov

8fb307c

2015-07-22 13:33:45 +0300

[diff] [blame]

835

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

836

self.check_tokenize('''async def foo(async): await''', """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

837

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

8fb307c

2015-07-22 13:33:45 +0300

[diff] [blame]

838

NAME 'def' (1, 6) (1, 9)

839

NAME 'foo' (1, 10) (1, 13)

840

OP '(' (1, 13) (1, 14)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

841

NAME 'async' (1, 14) (1, 19)

Yury Selivanov

8fb307c

2015-07-22 13:33:45 +0300

[diff] [blame]

842

OP ')' (1, 19) (1, 20)

843

OP ':' (1, 20) (1, 21)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

844

NAME 'await' (1, 22) (1, 27)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

845

""")

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

846

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

847

self.check_tokenize('''\

def f():

def baz(): pass

async def bar(): pass

852

853

await = 2''', """\

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

854

NAME 'def' (1, 0) (1, 3)

855

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

860

NL '\\n' (2, 0) (2, 1)

861

INDENT ' ' (3, 0) (3, 2)

862

NAME 'def' (3, 2) (3, 5)

863

NAME 'baz' (3, 6) (3, 9)

864

OP '(' (3, 9) (3, 10)

865

OP ')' (3, 10) (3, 11)

866

OP ':' (3, 11) (3, 12)

867

NAME 'pass' (3, 13) (3, 17)

868

NEWLINE '\\n' (3, 17) (3, 18)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

869

NAME 'async' (4, 2) (4, 7)

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

870

NAME 'def' (4, 8) (4, 11)

871

NAME 'bar' (4, 12) (4, 15)

872

OP '(' (4, 15) (4, 16)

873

OP ')' (4, 16) (4, 17)

874

OP ':' (4, 17) (4, 18)

875

NAME 'pass' (4, 19) (4, 23)

876

NEWLINE '\\n' (4, 23) (4, 24)

877

NL '\\n' (5, 0) (5, 1)

878

NAME 'await' (6, 2) (6, 7)

879

OP '=' (6, 8) (6, 9)

880

NUMBER '2' (6, 10) (6, 11)

881

DEDENT '' (7, 0) (7, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

882

""")

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

883

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

884

self.check_tokenize('''\

async def f():

def baz(): pass

async def bar(): pass

889

890

await = 2''', """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

891

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

892

NAME 'def' (1, 6) (1, 9)

893

NAME 'f' (1, 10) (1, 11)

894

OP '(' (1, 11) (1, 12)

895

OP ')' (1, 12) (1, 13)

896

OP ':' (1, 13) (1, 14)

897

NEWLINE '\\n' (1, 14) (1, 15)

898

NL '\\n' (2, 0) (2, 1)

899

INDENT ' ' (3, 0) (3, 2)

900

NAME 'def' (3, 2) (3, 5)

901

NAME 'baz' (3, 6) (3, 9)

902

OP '(' (3, 9) (3, 10)

903

OP ')' (3, 10) (3, 11)

904

OP ':' (3, 11) (3, 12)

905

NAME 'pass' (3, 13) (3, 17)

906

NEWLINE '\\n' (3, 17) (3, 18)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

907

NAME 'async' (4, 2) (4, 7)

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

908

NAME 'def' (4, 8) (4, 11)

909

NAME 'bar' (4, 12) (4, 15)

910

OP '(' (4, 15) (4, 16)

911

OP ')' (4, 16) (4, 17)

912

OP ':' (4, 17) (4, 18)

913

NAME 'pass' (4, 19) (4, 23)

914

NEWLINE '\\n' (4, 23) (4, 24)

915

NL '\\n' (5, 0) (5, 1)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

916

NAME 'await' (6, 2) (6, 7)

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

917

OP '=' (6, 8) (6, 9)

918

NUMBER '2' (6, 10) (6, 11)

919

DEDENT '' (7, 0) (7, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

920

""")

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

921

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

922

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

923

def decistmt(s):

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

924

result = []

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

925

g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

926

for toknum, tokval, _, _, _ in g:

927

if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens

result.extend([

(NAME, 'Decimal'),

(OP, '('),

(STRING, repr(tokval)),

(OP, ')')

])

else:

result.append((toknum, tokval))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

936

return untokenize(result).decode('utf-8')

937

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

938

class TestMisc(TestCase):

939

940

def test_decistmt(self):

941

# Substitute Decimals for floats in a string of statements.

942

# This is an example from the docs.

943

944

from decimal import Decimal

945

s = '+21.3e-5*-.1234/81.7'

946

self.assertEqual(decistmt(s),

947

"+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")

948

949

# The format of the exponent is inherited from the platform C library.

950

# Known cases are "e-007" (Windows) and "e-07" (not Windows). Since

951

# we're only showing 11 digits, and the 12th isn't close to 5, the

952

# rest of the output should be platform-independent.

953

self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')

954

955

# Output from calculations with Decimal should be identical across all

956

# platforms.

957

self.assertEqual(eval(decistmt(s)),

958

Decimal('-3.217160342717258261933904529E-7'))

959

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

960

961

class TestTokenizerAdheresToPep0263(TestCase):

962

"""

963

Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.

964

"""

965

966

def _testFile(self, filename):

967

path = os.path.join(os.path.dirname(__file__), filename)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

968

TestRoundtrip.check_roundtrip(self, open(path, 'rb'))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

969

970

def test_utf8_coding_cookie_and_no_utf8_bom(self):

Ned Deily

2ea6fcc

2011-07-19 16:15:27 -0700

[diff] [blame]

971

f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

972

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

973

974

def test_latin1_coding_cookie_and_utf8_bom(self):

975

"""

976

As per PEP 0263, if a file starts with a utf-8 BOM signature, the only

977

allowed encoding for the comment is 'utf-8'. The text file used in

978

this test starts with a BOM signature, but specifies latin1 as the

979

coding, so verify that a SyntaxError is raised, which matches the

980

behaviour of the interpreter when it encounters a similar condition.

981

"""

982

f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'

Benjamin Peterson

c9c0f20

2009-06-30 23:06:06 +0000

[diff] [blame]

983

self.assertRaises(SyntaxError, self._testFile, f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

984

985

def test_no_coding_cookie_and_utf8_bom(self):

986

f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

987

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

988

989

def test_utf8_coding_cookie_and_utf8_bom(self):

990

f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

991

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

992

Florent Xicluna

11f0b41

2012-07-07 12:13:35 +0200

[diff] [blame]

993

def test_bad_coding_cookie(self):

994

self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')

995

self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')

996

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

997

998

class Test_Tokenize(TestCase):

999

1000

def test__tokenize_decodes_with_specified_encoding(self):

1001

literal = '"ЉЊЈЁЂ"'

1002

line = literal.encode('utf-8')

first = False

def readline():

nonlocal first

if not first:

first = True

return line

else:

return b''

# skip the initial encoding token and the end token

1013

tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]

1014

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1015

self.assertEqual(tokens, expected_tokens,

1016

"bytes not decoded with encoding")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1017

1018

def test__tokenize_does_not_decode_with_encoding_none(self):

literal = '"ЉЊЈЁЂ"'

first = False

def readline():

nonlocal first

if not first:

first = True

return literal

else:

return b''

# skip the end token

tokens = list(_tokenize(readline, encoding=None))[:-1]

1031

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1032

self.assertEqual(tokens, expected_tokens,

1033

"string not tokenized when encoding is None")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1034

1035

1036

class TestDetectEncoding(TestCase):

1037

1038

def get_readline(self, lines):

index = 0

def readline():

nonlocal index

if index == len(lines):

raise StopIteration

line = lines[index]

index += 1

return line

return readline

def test_no_bom_no_encoding_cookie(self):

1050

lines = (

1051

b'# something\n',

1052

b'print(something)\n',

1053

b'do_something(else)\n'

1054

)

1055

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1056

self.assertEqual(encoding, 'utf-8')

1057

self.assertEqual(consumed_lines, list(lines[:2]))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1058

1059

def test_bom_no_cookie(self):

1060

lines = (

1061

b'\xef\xbb\xbf# something\n',

1062

b'print(something)\n',

1063

b'do_something(else)\n'

1064

)

1065

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1066

self.assertEqual(encoding, 'utf-8-sig')

1067

self.assertEqual(consumed_lines,

1068

[b'# something\n', b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1069

1070

def test_cookie_first_line_no_bom(self):

1071

lines = (

1072

b'# -*- coding: latin-1 -*-\n',

1073

b'print(something)\n',

1074

b'do_something(else)\n'

1075

)

1076

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1077

self.assertEqual(encoding, 'iso-8859-1')

1078

self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1079

1080

def test_matched_bom_and_cookie_first_line(self):

1081

lines = (

1082

b'\xef\xbb\xbf# coding=utf-8\n',

1083

b'print(something)\n',

1084

b'do_something(else)\n'

1085

)

1086

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1087

self.assertEqual(encoding, 'utf-8-sig')

1088

self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1089

1090

def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):

1091

lines = (

1092

b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',

1093

b'print(something)\n',

1094

b'do_something(else)\n'

1095

)

1096

readline = self.get_readline(lines)

1097

self.assertRaises(SyntaxError, detect_encoding, readline)

1098

1099

def test_cookie_second_line_no_bom(self):

1100

lines = (

1101

b'#! something\n',

1102

b'# vim: set fileencoding=ascii :\n',

1103

b'print(something)\n',

1104

b'do_something(else)\n'

1105

)

1106

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1107

self.assertEqual(encoding, 'ascii')

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1108

expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1109

self.assertEqual(consumed_lines, expected)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1110

1111

def test_matched_bom_and_cookie_second_line(self):

1112

lines = (

1113

b'\xef\xbb\xbf#! something\n',

1114

b'f# coding=utf-8\n',

1115

b'print(something)\n',

1116

b'do_something(else)\n'

1117

)

1118

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1119

self.assertEqual(encoding, 'utf-8-sig')

1120

self.assertEqual(consumed_lines,

1121

[b'#! something\n', b'f# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1122

1123

def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):

1124

lines = (

1125

b'\xef\xbb\xbf#! something\n',

1126

b'# vim: set fileencoding=ascii :\n',

1127

b'print(something)\n',

1128

b'do_something(else)\n'

1129

)

1130

readline = self.get_readline(lines)

1131

self.assertRaises(SyntaxError, detect_encoding, readline)

1132

Serhiy Storchaka

768c16c

2014-01-09 18:36:09 +0200

[diff] [blame]

1133

def test_cookie_second_line_noncommented_first_line(self):

1134

lines = (

1135

b"print('\xc2\xa3')\n",

1136

b'# vim: set fileencoding=iso8859-15 :\n',

1137

b"print('\xe2\x82\xac')\n"

1138

)

1139

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1140

self.assertEqual(encoding, 'utf-8')

1141

expected = [b"print('\xc2\xa3')\n"]

1142

self.assertEqual(consumed_lines, expected)

1143

1144

def test_cookie_second_line_commented_first_line(self):

1145

lines = (

1146

b"#print('\xc2\xa3')\n",

1147

b'# vim: set fileencoding=iso8859-15 :\n',

1148

b"print('\xe2\x82\xac')\n"

1149

)

1150

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1151

self.assertEqual(encoding, 'iso8859-15')

1152

expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']

1153

self.assertEqual(consumed_lines, expected)

1154

1155

def test_cookie_second_line_empty_first_line(self):

1156

lines = (

1157

b'\n',

1158

b'# vim: set fileencoding=iso8859-15 :\n',

1159

b"print('\xe2\x82\xac')\n"

1160

)

1161

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1162

self.assertEqual(encoding, 'iso8859-15')

1163

expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']

1164

self.assertEqual(consumed_lines, expected)

1165

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1166

def test_latin1_normalization(self):

1167

# See get_normal_name() in tokenizer.c.

1168

encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",

1169

"iso-8859-1-unix", "iso-latin-1-mac")

1170

for encoding in encodings:

1171

for rep in ("-", "_"):

1172

enc = encoding.replace("-", rep)

1173

lines = (b"#!/usr/bin/python\n",

1174

b"# coding: " + enc.encode("ascii") + b"\n",

1175

b"print(things)\n",

1176

b"do_something += 4\n")

1177

rl = self.get_readline(lines)

1178

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1179

self.assertEqual(found, "iso-8859-1")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1180

Martin v. Löwis

63674f4

2012-04-20 14:36:47 +0200

[diff] [blame]

1181

def test_syntaxerror_latin1(self):

1182

# Issue 14629: need to raise SyntaxError if the first

1183

# line(s) have non-UTF-8 characters

1184

lines = (

1185

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1186

)

1187

readline = self.get_readline(lines)

1188

self.assertRaises(SyntaxError, detect_encoding, readline)

1189

1190

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1191

def test_utf8_normalization(self):

1192

# See get_normal_name() in tokenizer.c.

1193

encodings = ("utf-8", "utf-8-mac", "utf-8-unix")

1194

for encoding in encodings:

1195

for rep in ("-", "_"):

1196

enc = encoding.replace("-", rep)

1197

lines = (b"#!/usr/bin/python\n",

1198

b"# coding: " + enc.encode("ascii") + b"\n",

1199

b"1 + 3\n")

1200

rl = self.get_readline(lines)

1201

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1202

self.assertEqual(found, "utf-8")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1203

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1204

def test_short_files(self):

1205

readline = self.get_readline((b'print(something)\n',))

1206

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1207

self.assertEqual(encoding, 'utf-8')

1208

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1209

1210

encoding, consumed_lines = detect_encoding(self.get_readline(()))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1211

self.assertEqual(encoding, 'utf-8')

1212

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1213

1214

readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))

1215

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1216

self.assertEqual(encoding, 'utf-8-sig')

1217

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1218

1219

readline = self.get_readline((b'\xef\xbb\xbf',))

1220

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1221

self.assertEqual(encoding, 'utf-8-sig')

1222

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1223

Benjamin Peterson

433f32c

2008-12-12 01:25:05 +0000

[diff] [blame]

1224

readline = self.get_readline((b'# coding: bad\n',))

1225

self.assertRaises(SyntaxError, detect_encoding, readline)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1226

Serhiy Storchaka

dafea85

2013-09-16 23:51:56 +0300

[diff] [blame]

1227

def test_false_encoding(self):

1228

# Issue 18873: "Encoding" detected in non-comment lines

1229

readline = self.get_readline((b'print("#coding=fake")',))

1230

encoding, consumed_lines = detect_encoding(readline)

1231

self.assertEqual(encoding, 'utf-8')

1232

self.assertEqual(consumed_lines, [b'print("#coding=fake")'])

1233

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1234

def test_open(self):

1235

filename = support.TESTFN + '.py'

1236

self.addCleanup(support.unlink, filename)

1237

1238

# test coding cookie

1239

for encoding in ('iso-8859-15', 'utf-8'):

1240

with open(filename, 'w', encoding=encoding) as fp:

1241

print("# coding: %s" % encoding, file=fp)

1242

print("print('euro:\u20ac')", file=fp)

1243

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1244

self.assertEqual(fp.encoding, encoding)

1245

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1246

1247

# test BOM (no coding cookie)

1248

with open(filename, 'w', encoding='utf-8-sig') as fp:

1249

print("print('euro:\u20ac')", file=fp)

1250

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1251

self.assertEqual(fp.encoding, 'utf-8-sig')

1252

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1253

Brett Cannon

c33f3f2

2012-04-20 13:23:54 -0400

[diff] [blame]

1254

def test_filename_in_exception(self):

1255

# When possible, include the file name in the exception.

1256

path = 'some_file_path'

1257

lines = (

1258

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1259

)

1260

class Bunk:

1261

def __init__(self, lines, path):

self.name = path

self._lines = lines

self._index = 0

def readline(self):

if self._index == len(lines):

1268

raise StopIteration

1269

line = lines[self._index]

self._index += 1

return line

with self.assertRaises(SyntaxError):

1274

ins = Bunk(lines, path)

1275

# Make sure lacking a name isn't an issue.

1276

del ins.name

1277

detect_encoding(ins.readline)

1278

with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):

1279

ins = Bunk(lines, path)

1280

detect_encoding(ins.readline)

1281

Victor Stinner

387729e

2015-05-26 00:43:58 +0200

[diff] [blame]

1282

def test_open_error(self):

1283

# Issue #23840: open() must close the binary file on error

1284

m = BytesIO(b'#coding:xxx')

1285

with mock.patch('tokenize._builtin_open', return_value=m):

1286

self.assertRaises(SyntaxError, tokenize_open, 'foobar')

1287

self.assertTrue(m.closed)

1288

1289

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1290

class TestTokenize(TestCase):

1291

1292

def test_tokenize(self):

1293

import tokenize as tokenize_module

1294

encoding = object()

1295

encoding_used = None

1296

def mock_detect_encoding(readline):

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1297

return encoding, [b'first', b'second']

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1298

1299

def mock__tokenize(readline, encoding):

1300

nonlocal encoding_used

1301

encoding_used = encoding

1302

out = []

1303

while True:

1304

next_line = readline()

1305

if next_line:

1306

out.append(next_line)

continue

return out

counter = 0

def mock_readline():

nonlocal counter

counter += 1

if counter == 5:

return b''

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1316

return str(counter).encode()

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1317

1318

orig_detect_encoding = tokenize_module.detect_encoding

1319

orig__tokenize = tokenize_module._tokenize

1320

tokenize_module.detect_encoding = mock_detect_encoding

1321

tokenize_module._tokenize = mock__tokenize

1322

try:

1323

results = tokenize(mock_readline)

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1324

self.assertEqual(list(results),

1325

[b'first', b'second', b'1', b'2', b'3', b'4'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1326

finally:

1327

tokenize_module.detect_encoding = orig_detect_encoding

1328

tokenize_module._tokenize = orig__tokenize

1329

1330

self.assertTrue(encoding_used, encoding)

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

1331

Yury Selivanov

8085b80

2015-05-18 12:50:52 -0400

[diff] [blame]

1332

def test_oneline_defs(self):

1333

buf = []

1334

for i in range(500):

1335

buf.append('def i{i}(): return {i}'.format(i=i))

buf.append('OK')

buf = '\n'.join(buf)

# Test that 500 consequent, one-line defs is OK

1340

toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))

1341

self.assertEqual(toks[-2].string, 'OK') # [-1] is always ENDMARKER

1342

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1343

def assertExactTypeEqual(self, opstr, *optypes):

1344

tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))

1345

num_optypes = len(optypes)

1346

self.assertEqual(len(tokens), 2 + num_optypes)

Albert-Jan Nijburg

fc354f0

2017-05-31 15:00:21 +0100

[diff] [blame]

1347

self.assertEqual(tok_name[tokens[0].exact_type],

1348

tok_name[ENCODING])

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1349

for i in range(num_optypes):

Albert-Jan Nijburg

fc354f0

2017-05-31 15:00:21 +0100

[diff] [blame]

1350

self.assertEqual(tok_name[tokens[i + 1].exact_type],

1351

tok_name[optypes[i]])

1352

self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type],

1353

tok_name[token.ENDMARKER])

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1354

1355

def test_exact_type(self):

1356

self.assertExactTypeEqual('()', token.LPAR, token.RPAR)

1357

self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)

1358

self.assertExactTypeEqual(':', token.COLON)

1359

self.assertExactTypeEqual(',', token.COMMA)

1360

self.assertExactTypeEqual(';', token.SEMI)

1361

self.assertExactTypeEqual('+', token.PLUS)

1362

self.assertExactTypeEqual('-', token.MINUS)

1363

self.assertExactTypeEqual('*', token.STAR)

1364

self.assertExactTypeEqual('/', token.SLASH)

1365

self.assertExactTypeEqual('|', token.VBAR)

1366

self.assertExactTypeEqual('&', token.AMPER)

1367

self.assertExactTypeEqual('<', token.LESS)

1368

self.assertExactTypeEqual('>', token.GREATER)

1369

self.assertExactTypeEqual('=', token.EQUAL)

1370

self.assertExactTypeEqual('.', token.DOT)

1371

self.assertExactTypeEqual('%', token.PERCENT)

1372

self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)

1373

self.assertExactTypeEqual('==', token.EQEQUAL)

1374

self.assertExactTypeEqual('!=', token.NOTEQUAL)

1375

self.assertExactTypeEqual('<=', token.LESSEQUAL)

1376

self.assertExactTypeEqual('>=', token.GREATEREQUAL)

1377

self.assertExactTypeEqual('~', token.TILDE)

1378

self.assertExactTypeEqual('^', token.CIRCUMFLEX)

1379

self.assertExactTypeEqual('<<', token.LEFTSHIFT)

1380

self.assertExactTypeEqual('>>', token.RIGHTSHIFT)

1381

self.assertExactTypeEqual('**', token.DOUBLESTAR)

1382

self.assertExactTypeEqual('+=', token.PLUSEQUAL)

1383

self.assertExactTypeEqual('-=', token.MINEQUAL)

1384

self.assertExactTypeEqual('*=', token.STAREQUAL)

1385

self.assertExactTypeEqual('/=', token.SLASHEQUAL)

1386

self.assertExactTypeEqual('%=', token.PERCENTEQUAL)

1387

self.assertExactTypeEqual('&=', token.AMPEREQUAL)

1388

self.assertExactTypeEqual('|=', token.VBAREQUAL)

1389

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1390

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1391

self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)

1392

self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)

1393

self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)

1394

self.assertExactTypeEqual('//', token.DOUBLESLASH)

1395

self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)

Jim Fasarakis-Hilliard

d4914e9

2017-03-14 22:16:15 +0200

[diff] [blame]

1396

self.assertExactTypeEqual('...', token.ELLIPSIS)

1397

self.assertExactTypeEqual('->', token.RARROW)

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1398

self.assertExactTypeEqual('@', token.AT)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

1399

self.assertExactTypeEqual('@=', token.ATEQUAL)

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1400

1401

self.assertExactTypeEqual('a**2+b**2==c**2',

1402

NAME, token.DOUBLESTAR, NUMBER,

1403

token.PLUS,

1404

NAME, token.DOUBLESTAR, NUMBER,

1405

token.EQEQUAL,

1406

NAME, token.DOUBLESTAR, NUMBER)

1407

self.assertExactTypeEqual('{1, 2, 3}',

1408

token.LBRACE,

1409

token.NUMBER, token.COMMA,

1410

token.NUMBER, token.COMMA,

1411

token.NUMBER,

1412

token.RBRACE)

1413

self.assertExactTypeEqual('^(x & 0x1)',

1414

token.CIRCUMFLEX,

1415

token.LPAR,

1416

token.NAME, token.AMPER, token.NUMBER,

1417

token.RPAR)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1418

Ezio Melotti

fafa8b7

2012-11-03 17:46:51 +0200

[diff] [blame]

1419

def test_pathological_trailing_whitespace(self):

1420

# See http://bugs.python.org/issue16152

1421

self.assertExactTypeEqual('@ ', token.AT)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1422

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1423

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1424

class UntokenizeTest(TestCase):

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1425

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1426

def test_bad_input_order(self):

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1427

# raise if previous row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

u = Untokenizer()

u.prev_row = 2

u.prev_col = 2

with self.assertRaises(ValueError) as cm:

1432

u.add_whitespace((1,3))

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1433

self.assertEqual(cm.exception.args[0],

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1434

'start (1,3) precedes previous end (2,2)')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1435

# raise if previous column in row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1436

self.assertRaises(ValueError, u.add_whitespace, (2,1))

1437

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1438

def test_backslash_continuation(self):

1439

# The problem is that <whitespace>\<newline> leaves no token

u = Untokenizer()

u.prev_row = 1

u.prev_col = 1

u.tokens = []

u.add_whitespace((2, 0))

1445

self.assertEqual(u.tokens, ['\\\n'])

1446

u.prev_row = 2

1447

u.add_whitespace((4, 4))

1448

self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1449

TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1450

Terry Jan Reedy

5b8d2c3

2014-02-17 23:12:16 -0500

[diff] [blame]

1451

def test_iter_compat(self):

1452

u = Untokenizer()

1453

token = (NAME, 'Hello')

1454

tokens = [(ENCODING, 'utf-8'), token]

1455

u.compat(token, iter([]))

1456

self.assertEqual(u.tokens, ["Hello "])

1457

u = Untokenizer()

1458

self.assertEqual(u.untokenize(iter([token])), 'Hello ')

1459

u = Untokenizer()

1460

self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')

1461

self.assertEqual(u.encoding, 'utf-8')

1462

self.assertEqual(untokenize(iter(tokens)), b'Hello ')

1463

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1464

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1465

class TestRoundtrip(TestCase):

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1466

1467

def check_roundtrip(self, f):

1468

"""

1469

Test roundtrip for `untokenize`. `f` is an open file or a string.

1470

The source code in f is tokenized to both 5- and 2-tuples.

1471

Both sequences are converted back to source code via

1472

tokenize.untokenize(), and the latter tokenized again to 2-tuples.

1473

The test fails if the 3 pair tokenizations do not match.

1474

1475

When untokenize bugs are fixed, untokenize with 5-tuples should

1476

reproduce code that does not contain a backslash continuation

1477

following spaces. A proper test should test this.

1478

"""

1479

# Get source code and original tokenizations

1480

if isinstance(f, str):

1481

code = f.encode('utf-8')

else:

code = f.read()

f.close()

readline = iter(code.splitlines(keepends=True)).__next__

1486

tokens5 = list(tokenize(readline))

1487

tokens2 = [tok[:2] for tok in tokens5]

1488

# Reproduce tokens2 from pairs

1489

bytes_from2 = untokenize(tokens2)

1490

readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__

1491

tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]

1492

self.assertEqual(tokens2_from2, tokens2)

1493

# Reproduce tokens2 from 5-tuples

1494

bytes_from5 = untokenize(tokens5)

1495

readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__

1496

tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]

1497

self.assertEqual(tokens2_from5, tokens2)

1498

1499

def test_roundtrip(self):

1500

# There are some standard formatting practices that are easy to get right.

1501

1502

self.check_roundtrip("if x == 1:\n"

1503

" print(x)\n")

1504

self.check_roundtrip("# This is a comment\n"

1505

"# This also")

1506

1507

# Some people use different formatting conventions, which makes

1508

# untokenize a little trickier. Note that this test involves trailing

1509

# whitespace after the colon. Note that we use hex escapes to make the

1510

# two trailing blanks apparent in the expected output.

1511

1512

self.check_roundtrip("if x == 1 : \n"

1513

" print(x)\n")

1514

fn = support.findfile("tokenize_tests.txt")

1515

with open(fn, 'rb') as f:

1516

self.check_roundtrip(f)

1517

self.check_roundtrip("if x == 1:\n"

1518

" # A comment by itself.\n"

1519

" print(x) # Comment here, too.\n"

1520

" # Another comment.\n"

1521

"after_if = True\n")

1522

self.check_roundtrip("if (x # The comments need to go in the right place\n"

1523

" == 1):\n"

1524

" print('x==1')\n")

1525

self.check_roundtrip("class Test: # A comment here\n"

1526

" # A comment with weird indent\n"

1527

" after_com = 5\n"

1528

" def x(m): return m*5 # a one liner\n"

1529

" def y(m): # A whitespace after the colon\n"

1530

" return y*4 # 3-space indent\n")

1531

1532

# Some error-handling code

1533

self.check_roundtrip("try: import somemodule\n"

1534

"except ImportError: # comment\n"

1535

" print('Can not import' # comment2\n)"

1536

"else: print('Loaded')\n")

1537

1538

def test_continuation(self):

1539

# Balancing continuation

1540

self.check_roundtrip("a = (3,4, \n"

"5,6)\n"

"y = [3, 4,\n"

"5]\n"

"z = {'a': 5,\n"

"'b':15, 'c':True}\n"

1546

"x = len(y) + 5 - a[\n"

"3] - a[2]\n"

"+ len(z) - z[\n"

"'b']\n")

def test_backslash_continuation(self):

1552

# Backslash means line continuation, except for comments

1553

self.check_roundtrip("x=1+\\\n"

1554

"1\n"

1555

"# This is a comment\\\n"

1556

"# This also\n")

1557

self.check_roundtrip("# Comment \\\n"

1558

"x = 0")

1559

1560

def test_string_concatenation(self):

1561

# Two string literals on the same line

1562

self.check_roundtrip("'' ''")

1563

1564

def test_random_files(self):

1565

# Test roundtrip on random python modules.

1566

# pass the '-ucpu' option to process the full directory.

1567

1568

import glob, random

1569

fn = support.findfile("tokenize_tests.txt")

1570

tempdir = os.path.dirname(fn) or os.curdir

1571

testfiles = glob.glob(os.path.join(tempdir, "test*.py"))

1572

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

1573

# Tokenize is broken on test_pep3131.py because regular expressions are

1574

# broken on the obscure unicode identifiers in it. *sigh*

1575

# With roundtrip extended to test the 5-tuple mode of untokenize,

1576

# 7 more testfiles fail. Remove them also until the failure is diagnosed.

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1577

Zachary Ware

724f6a6

2016-09-09 12:55:37 -0700

[diff] [blame]

1578

testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1579

for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):

1580

testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)

1581

1582

if not support.is_resource_enabled("cpu"):

1583

testfiles = random.sample(testfiles, 10)

1584

1585

for testfile in testfiles:

1586

with open(testfile, 'rb') as f:

1587

with self.subTest(file=testfile):

1588

self.check_roundtrip(f)

1589

1590

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1591

def roundtrip(self, code):

1592

if isinstance(code, str):

1593

code = code.encode('utf-8')

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1594

return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1595

1596

def test_indentation_semantics_retained(self):

1597

"""

1598

Ensure that although whitespace might be mutated in a roundtrip,

1599

the semantic meaning of the indentation remains consistent.

1600

"""

1601

code = "if False:\n\tx=3\n\tx=3\n"

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1602

codelines = self.roundtrip(code).split('\n')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1603

self.assertEqual(codelines[1], codelines[2])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1604

self.check_roundtrip(code)

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1605

1606

Thomas Wouters

49fd7fa

2006-04-21 10:40:58 +0000

[diff] [blame]

1607

if __name__ == "__main__":

Brett Cannon