Blame - Lib/test/test_tokenize.py - platform/external/python/cpython3

2015-10-06 18:16:28 +0300

[diff] [blame]

1

from test import support

2

from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,

3

STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,

4

open as tokenize_open, Untokenizer)

5

from io import BytesIO

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

6

from unittest import TestCase, mock

7

from test.test_grammar import (VALID_UNDERSCORE_LITERALS,

8

INVALID_UNDERSCORE_LITERALS)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

9

import os

10

import token

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

11

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

12

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

13

class TokenizeTest(TestCase):

14

# Tests for the tokenize module.

Jason R. Coombs

7cf3638

2015-06-20 19:13:50 -0400

[diff] [blame]

15

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

16

# The tests can be really simple. Given a small fragment of source

17

# code, print out a table with tokens. The ENDMARKER is omitted for

18

# brevity.

19

20

def check_tokenize(self, s, expected):

21

# Format the tokens in s in a table format.

22

# The ENDMARKER is omitted.

23

result = []

24

f = BytesIO(s.encode('utf-8'))

25

for type, token, start, end, line in tokenize(f.readline):

26

if type == ENDMARKER:

27

break

28

type = tok_name[type]

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

29

result.append(f" {type:10} {token!r:13} {start} {end}")

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

30

self.assertEqual(result,

31

[" ENCODING 'utf-8' (0, 0) (0, 0)"] +

32

expected.rstrip().splitlines())

33

34

def test_basic(self):

35

self.check_tokenize("1 + 1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

36

NUMBER '1' (1, 0) (1, 1)

37

OP '+' (1, 2) (1, 3)

38

NUMBER '1' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

39

""")

40

self.check_tokenize("if False:\n"

41

" # NL\n"

42

" True = False # NEWLINE\n", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

43

NAME 'if' (1, 0) (1, 2)

44

NAME 'False' (1, 3) (1, 8)

45

OP ':' (1, 8) (1, 9)

46

NEWLINE '\\n' (1, 9) (1, 10)

47

COMMENT '# NL' (2, 4) (2, 8)

48

NL '\\n' (2, 8) (2, 9)

49

INDENT ' ' (3, 0) (3, 4)

50

NAME 'True' (3, 4) (3, 8)

51

OP '=' (3, 9) (3, 10)

52

NAME 'False' (3, 11) (3, 16)

53

COMMENT '# NEWLINE' (3, 17) (3, 26)

54

NEWLINE '\\n' (3, 26) (3, 27)

55

DEDENT '' (4, 0) (4, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

56

""")

57

indent_error_file = b"""\

def k(x):

x += 2

x += 5

"""

readline = BytesIO(indent_error_file).readline

63

with self.assertRaisesRegex(IndentationError,

64

"unindent does not match any "

65

"outer indentation level"):

66

for tok in tokenize(readline):

67

pass

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

68

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

69

def test_int(self):

70

# Ordinary integers and binary operators

71

self.check_tokenize("0xff <= 255", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

72

NUMBER '0xff' (1, 0) (1, 4)

73

OP '<=' (1, 5) (1, 7)

74

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

75

""")

76

self.check_tokenize("0b10 <= 255", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

77

NUMBER '0b10' (1, 0) (1, 4)

78

OP '<=' (1, 5) (1, 7)

79

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

80

""")

81

self.check_tokenize("0o123 <= 0O123", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

82

NUMBER '0o123' (1, 0) (1, 5)

83

OP '<=' (1, 6) (1, 8)

84

NUMBER '0O123' (1, 9) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

85

""")

86

self.check_tokenize("1234567 > ~0x15", """\

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

87

NUMBER '1234567' (1, 0) (1, 7)

88

OP '>' (1, 8) (1, 9)

89

OP '~' (1, 10) (1, 11)

90

NUMBER '0x15' (1, 11) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

91

""")

92

self.check_tokenize("2134568 != 1231515", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

93

NUMBER '2134568' (1, 0) (1, 7)

94

OP '!=' (1, 8) (1, 10)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

95

NUMBER '1231515' (1, 11) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

96

""")

97

self.check_tokenize("(-124561-1) & 200000000", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

98

OP '(' (1, 0) (1, 1)

99

OP '-' (1, 1) (1, 2)

100

NUMBER '124561' (1, 2) (1, 8)

101

OP '-' (1, 8) (1, 9)

102

NUMBER '1' (1, 9) (1, 10)

103

OP ')' (1, 10) (1, 11)

104

OP '&' (1, 12) (1, 13)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

105

NUMBER '200000000' (1, 14) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

106

""")

107

self.check_tokenize("0xdeadbeef != -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

108

NUMBER '0xdeadbeef' (1, 0) (1, 10)

109

OP '!=' (1, 11) (1, 13)

110

OP '-' (1, 14) (1, 15)

111

NUMBER '1' (1, 15) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

112

""")

113

self.check_tokenize("0xdeadc0de & 12345", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

114

NUMBER '0xdeadc0de' (1, 0) (1, 10)

115

OP '&' (1, 11) (1, 12)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

116

NUMBER '12345' (1, 13) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

117

""")

118

self.check_tokenize("0xFF & 0x15 | 1234", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

119

NUMBER '0xFF' (1, 0) (1, 4)

120

OP '&' (1, 5) (1, 6)

121

NUMBER '0x15' (1, 7) (1, 11)

122

OP '|' (1, 12) (1, 13)

123

NUMBER '1234' (1, 14) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

124

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

125

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

126

def test_long(self):

127

# Long integers

128

self.check_tokenize("x = 0", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

129

NAME 'x' (1, 0) (1, 1)

130

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

131

NUMBER '0' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

132

""")

133

self.check_tokenize("x = 0xfffffffffff", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

134

NAME 'x' (1, 0) (1, 1)

135

OP '=' (1, 2) (1, 3)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

136

NUMBER '0xfffffffffff' (1, 4) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

137

""")

138

self.check_tokenize("x = 123141242151251616110", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

139

NAME 'x' (1, 0) (1, 1)

140

OP '=' (1, 2) (1, 3)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

141

NUMBER '123141242151251616110' (1, 4) (1, 25)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

142

""")

143

self.check_tokenize("x = -15921590215012591", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

144

NAME 'x' (1, 0) (1, 1)

145

OP '=' (1, 2) (1, 3)

146

OP '-' (1, 4) (1, 5)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

147

NUMBER '15921590215012591' (1, 5) (1, 22)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

148

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

149

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

150

def test_float(self):

151

# Floating point numbers

152

self.check_tokenize("x = 3.14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

153

NAME 'x' (1, 0) (1, 1)

154

OP '=' (1, 2) (1, 3)

155

NUMBER '3.14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

156

""")

157

self.check_tokenize("x = 314159.", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

158

NAME 'x' (1, 0) (1, 1)

159

OP '=' (1, 2) (1, 3)

160

NUMBER '314159.' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

161

""")

162

self.check_tokenize("x = .314159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

163

NAME 'x' (1, 0) (1, 1)

164

OP '=' (1, 2) (1, 3)

165

NUMBER '.314159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

166

""")

167

self.check_tokenize("x = 3e14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

168

NAME 'x' (1, 0) (1, 1)

169

OP '=' (1, 2) (1, 3)

170

NUMBER '3e14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

171

""")

172

self.check_tokenize("x = 3E123", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

173

NAME 'x' (1, 0) (1, 1)

174

OP '=' (1, 2) (1, 3)

175

NUMBER '3E123' (1, 4) (1, 9)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

176

""")

177

self.check_tokenize("x+y = 3e-1230", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

178

NAME 'x' (1, 0) (1, 1)

179

OP '+' (1, 1) (1, 2)

180

NAME 'y' (1, 2) (1, 3)

181

OP '=' (1, 4) (1, 5)

182

NUMBER '3e-1230' (1, 6) (1, 13)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

183

""")

184

self.check_tokenize("x = 3.14e159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

185

NAME 'x' (1, 0) (1, 1)

186

OP '=' (1, 2) (1, 3)

187

NUMBER '3.14e159' (1, 4) (1, 12)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

188

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

189

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

190

def test_underscore_literals(self):

191

def number_token(s):

192

f = BytesIO(s.encode('utf-8'))

193

for toktype, token, start, end, line in tokenize(f.readline):

194

if toktype == NUMBER:

195

return token

196

return 'invalid token'

197

for lit in VALID_UNDERSCORE_LITERALS:

198

if '(' in lit:

199

# this won't work with compound complex inputs

200

continue

201

self.assertEqual(number_token(lit), lit)

202

for lit in INVALID_UNDERSCORE_LITERALS:

203

self.assertNotEqual(number_token(lit), lit)

204

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

205

def test_string(self):

206

# String literals

207

self.check_tokenize("x = ''; y = \"\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

208

NAME 'x' (1, 0) (1, 1)

209

OP '=' (1, 2) (1, 3)

210

STRING "''" (1, 4) (1, 6)

211

OP ';' (1, 6) (1, 7)

212

NAME 'y' (1, 8) (1, 9)

213

OP '=' (1, 10) (1, 11)

214

STRING '""' (1, 12) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

215

""")

216

self.check_tokenize("x = '\"'; y = \"'\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

217

NAME 'x' (1, 0) (1, 1)

218

OP '=' (1, 2) (1, 3)

219

STRING '\\'"\\'' (1, 4) (1, 7)

220

OP ';' (1, 7) (1, 8)

221

NAME 'y' (1, 9) (1, 10)

222

OP '=' (1, 11) (1, 12)

223

STRING '"\\'"' (1, 13) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

224

""")

225

self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

226

NAME 'x' (1, 0) (1, 1)

227

OP '=' (1, 2) (1, 3)

228

STRING '"doesn\\'t "' (1, 4) (1, 14)

229

NAME 'shrink' (1, 14) (1, 20)

230

STRING '", does it"' (1, 20) (1, 31)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

231

""")

232

self.check_tokenize("x = 'abc' + 'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

233

NAME 'x' (1, 0) (1, 1)

234

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

235

STRING "'abc'" (1, 4) (1, 9)

236

OP '+' (1, 10) (1, 11)

237

STRING "'ABC'" (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

238

""")

239

self.check_tokenize('y = "ABC" + "ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

240

NAME 'y' (1, 0) (1, 1)

241

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

242

STRING '"ABC"' (1, 4) (1, 9)

243

OP '+' (1, 10) (1, 11)

244

STRING '"ABC"' (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

245

""")

246

self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

247

NAME 'x' (1, 0) (1, 1)

248

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

249

STRING "r'abc'" (1, 4) (1, 10)

250

OP '+' (1, 11) (1, 12)

251

STRING "r'ABC'" (1, 13) (1, 19)

252

OP '+' (1, 20) (1, 21)

253

STRING "R'ABC'" (1, 22) (1, 28)

254

OP '+' (1, 29) (1, 30)

255

STRING "R'ABC'" (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

256

""")

257

self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

258

NAME 'y' (1, 0) (1, 1)

259

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

260

STRING 'r"abc"' (1, 4) (1, 10)

261

OP '+' (1, 11) (1, 12)

262

STRING 'r"ABC"' (1, 13) (1, 19)

263

OP '+' (1, 20) (1, 21)

264

STRING 'R"ABC"' (1, 22) (1, 28)

265

OP '+' (1, 29) (1, 30)

266

STRING 'R"ABC"' (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

267

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

268

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

269

self.check_tokenize("u'abc' + U'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

270

STRING "u'abc'" (1, 0) (1, 6)

271

OP '+' (1, 7) (1, 8)

272

STRING "U'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

273

""")

274

self.check_tokenize('u"abc" + U"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

275

STRING 'u"abc"' (1, 0) (1, 6)

276

OP '+' (1, 7) (1, 8)

277

STRING 'U"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

278

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

279

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

280

self.check_tokenize("b'abc' + B'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

281

STRING "b'abc'" (1, 0) (1, 6)

282

OP '+' (1, 7) (1, 8)

283

STRING "B'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

284

""")

285

self.check_tokenize('b"abc" + B"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

286

STRING 'b"abc"' (1, 0) (1, 6)

287

OP '+' (1, 7) (1, 8)

288

STRING 'B"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

289

""")

290

self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

291

STRING "br'abc'" (1, 0) (1, 7)

292

OP '+' (1, 8) (1, 9)

293

STRING "bR'abc'" (1, 10) (1, 17)

294

OP '+' (1, 18) (1, 19)

295

STRING "Br'abc'" (1, 20) (1, 27)

296

OP '+' (1, 28) (1, 29)

297

STRING "BR'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

298

""")

299

self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

300

STRING 'br"abc"' (1, 0) (1, 7)

301

OP '+' (1, 8) (1, 9)

302

STRING 'bR"abc"' (1, 10) (1, 17)

303

OP '+' (1, 18) (1, 19)

304

STRING 'Br"abc"' (1, 20) (1, 27)

305

OP '+' (1, 28) (1, 29)

306

STRING 'BR"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

307

""")

308

self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

309

STRING "rb'abc'" (1, 0) (1, 7)

310

OP '+' (1, 8) (1, 9)

311

STRING "rB'abc'" (1, 10) (1, 17)

312

OP '+' (1, 18) (1, 19)

313

STRING "Rb'abc'" (1, 20) (1, 27)

314

OP '+' (1, 28) (1, 29)

315

STRING "RB'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

316

""")

317

self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

318

STRING 'rb"abc"' (1, 0) (1, 7)

319

OP '+' (1, 8) (1, 9)

320

STRING 'rB"abc"' (1, 10) (1, 17)

321

OP '+' (1, 18) (1, 19)

322

STRING 'Rb"abc"' (1, 20) (1, 27)

323

OP '+' (1, 28) (1, 29)

324

STRING 'RB"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

325

""")

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

326

# Check 0, 1, and 2 character string prefixes.

327

self.check_tokenize(r'"a\

328

de\

329

fg"', """\

330

STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)

331

""")

332

self.check_tokenize(r'u"a\

333

de"', """\

334

STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3)

335

""")

336

self.check_tokenize(r'rb"a\

337

d"', """\

338

STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2)

339

""")

340

self.check_tokenize(r'"""a\

341

b"""', """\

342

STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)

343

""")

344

self.check_tokenize(r'u"""a\

345

b"""', """\

346

STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)

347

""")

348

self.check_tokenize(r'rb"""a\

349

b\

350

c"""', """\

351

STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)

352

""")

Eric V. Smith

1c8222c

2015-10-26 04:37:55 -0400

[diff] [blame]

353

self.check_tokenize('f"abc"', """\

354

STRING 'f"abc"' (1, 0) (1, 6)

355

""")

356

self.check_tokenize('fR"a{b}c"', """\

357

STRING 'fR"a{b}c"' (1, 0) (1, 9)

358

""")

359

self.check_tokenize('f"""abc"""', """\

360

STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10)

361

""")

362

self.check_tokenize(r'f"abc\

363

def"', """\

364

STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4)

365

""")

366

self.check_tokenize(r'Rf"abc\

367

def"', """\

368

STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)

369

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

370

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

371

def test_function(self):

372

self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

373

NAME 'def' (1, 0) (1, 3)

374

NAME 'd22' (1, 4) (1, 7)

375

OP '(' (1, 7) (1, 8)

376

NAME 'a' (1, 8) (1, 9)

377

OP ',' (1, 9) (1, 10)

378

NAME 'b' (1, 11) (1, 12)

379

OP ',' (1, 12) (1, 13)

380

NAME 'c' (1, 14) (1, 15)

381

OP '=' (1, 15) (1, 16)

382

NUMBER '2' (1, 16) (1, 17)

383

OP ',' (1, 17) (1, 18)

384

NAME 'd' (1, 19) (1, 20)

385

OP '=' (1, 20) (1, 21)

386

NUMBER '2' (1, 21) (1, 22)

387

OP ',' (1, 22) (1, 23)

388

OP '*' (1, 24) (1, 25)

389

NAME 'k' (1, 25) (1, 26)

390

OP ')' (1, 26) (1, 27)

391

OP ':' (1, 27) (1, 28)

392

NAME 'pass' (1, 29) (1, 33)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

393

""")

394

self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

395

NAME 'def' (1, 0) (1, 3)

396

NAME 'd01v_' (1, 4) (1, 9)

397

OP '(' (1, 9) (1, 10)

398

NAME 'a' (1, 10) (1, 11)

399

OP '=' (1, 11) (1, 12)

400

NUMBER '1' (1, 12) (1, 13)

401

OP ',' (1, 13) (1, 14)

402

OP '*' (1, 15) (1, 16)

403

NAME 'k' (1, 16) (1, 17)

404

OP ',' (1, 17) (1, 18)

405

OP '**' (1, 19) (1, 21)

406

NAME 'w' (1, 21) (1, 22)

407

OP ')' (1, 22) (1, 23)

408

OP ':' (1, 23) (1, 24)

409

NAME 'pass' (1, 25) (1, 29)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

410

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

411

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

412

def test_comparison(self):

413

# Comparison

414

self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "

415

"1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

416

NAME 'if' (1, 0) (1, 2)

417

NUMBER '1' (1, 3) (1, 4)

418

OP '<' (1, 5) (1, 6)

419

NUMBER '1' (1, 7) (1, 8)

420

OP '>' (1, 9) (1, 10)

421

NUMBER '1' (1, 11) (1, 12)

422

OP '==' (1, 13) (1, 15)

423

NUMBER '1' (1, 16) (1, 17)

424

OP '>=' (1, 18) (1, 20)

425

NUMBER '5' (1, 21) (1, 22)

426

OP '<=' (1, 23) (1, 25)

427

NUMBER '0x15' (1, 26) (1, 30)

428

OP '<=' (1, 31) (1, 33)

429

NUMBER '0x12' (1, 34) (1, 38)

430

OP '!=' (1, 39) (1, 41)

431

NUMBER '1' (1, 42) (1, 43)

432

NAME 'and' (1, 44) (1, 47)

433

NUMBER '5' (1, 48) (1, 49)

434

NAME 'in' (1, 50) (1, 52)

435

NUMBER '1' (1, 53) (1, 54)

436

NAME 'not' (1, 55) (1, 58)

437

NAME 'in' (1, 59) (1, 61)

438

NUMBER '1' (1, 62) (1, 63)

439

NAME 'is' (1, 64) (1, 66)

440

NUMBER '1' (1, 67) (1, 68)

441

NAME 'or' (1, 69) (1, 71)

442

NUMBER '5' (1, 72) (1, 73)

443

NAME 'is' (1, 74) (1, 76)

444

NAME 'not' (1, 77) (1, 80)

445

NUMBER '1' (1, 81) (1, 82)

446

OP ':' (1, 82) (1, 83)

447

NAME 'pass' (1, 84) (1, 88)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

448

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

449

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

450

def test_shift(self):

451

# Shift

452

self.check_tokenize("x = 1 << 1 >> 5", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

453

NAME 'x' (1, 0) (1, 1)

454

OP '=' (1, 2) (1, 3)

455

NUMBER '1' (1, 4) (1, 5)

456

OP '<<' (1, 6) (1, 8)

457

NUMBER '1' (1, 9) (1, 10)

458

OP '>>' (1, 11) (1, 13)

459

NUMBER '5' (1, 14) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

460

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

461

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

462

def test_additive(self):

463

# Additive

464

self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

465

NAME 'x' (1, 0) (1, 1)

466

OP '=' (1, 2) (1, 3)

467

NUMBER '1' (1, 4) (1, 5)

468

OP '-' (1, 6) (1, 7)

469

NAME 'y' (1, 8) (1, 9)

470

OP '+' (1, 10) (1, 11)

471

NUMBER '15' (1, 12) (1, 14)

472

OP '-' (1, 15) (1, 16)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

473

NUMBER '1' (1, 17) (1, 18)

474

OP '+' (1, 19) (1, 20)

475

NUMBER '0x124' (1, 21) (1, 26)

476

OP '+' (1, 27) (1, 28)

477

NAME 'z' (1, 29) (1, 30)

478

OP '+' (1, 31) (1, 32)

479

NAME 'a' (1, 33) (1, 34)

480

OP '[' (1, 34) (1, 35)

481

NUMBER '5' (1, 35) (1, 36)

482

OP ']' (1, 36) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

483

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

484

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

485

def test_multiplicative(self):

486

# Multiplicative

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

487

self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

488

NAME 'x' (1, 0) (1, 1)

489

OP '=' (1, 2) (1, 3)

490

NUMBER '1' (1, 4) (1, 5)

491

OP '//' (1, 5) (1, 7)

492

NUMBER '1' (1, 7) (1, 8)

493

OP '*' (1, 8) (1, 9)

494

NUMBER '1' (1, 9) (1, 10)

495

OP '/' (1, 10) (1, 11)

496

NUMBER '5' (1, 11) (1, 12)

497

OP '*' (1, 12) (1, 13)

498

NUMBER '12' (1, 13) (1, 15)

499

OP '%' (1, 15) (1, 16)

500

NUMBER '0x12' (1, 16) (1, 20)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

501

OP '@' (1, 20) (1, 21)

502

NUMBER '42' (1, 21) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

503

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

504

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

505

def test_unary(self):

506

# Unary

507

self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

508

OP '~' (1, 0) (1, 1)

509

NUMBER '1' (1, 1) (1, 2)

510

OP '^' (1, 3) (1, 4)

511

NUMBER '1' (1, 5) (1, 6)

512

OP '&' (1, 7) (1, 8)

513

NUMBER '1' (1, 9) (1, 10)

514

OP '|' (1, 11) (1, 12)

515

NUMBER '1' (1, 12) (1, 13)

516

OP '^' (1, 14) (1, 15)

517

OP '-' (1, 16) (1, 17)

518

NUMBER '1' (1, 17) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

519

""")

520

self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

521

OP '-' (1, 0) (1, 1)

522

NUMBER '1' (1, 1) (1, 2)

523

OP '*' (1, 2) (1, 3)

524

NUMBER '1' (1, 3) (1, 4)

525

OP '/' (1, 4) (1, 5)

526

NUMBER '1' (1, 5) (1, 6)

527

OP '+' (1, 6) (1, 7)

528

NUMBER '1' (1, 7) (1, 8)

529

OP '*' (1, 8) (1, 9)

530

NUMBER '1' (1, 9) (1, 10)

531

OP '//' (1, 10) (1, 12)

532

NUMBER '1' (1, 12) (1, 13)

533

OP '-' (1, 14) (1, 15)

534

OP '-' (1, 16) (1, 17)

535

OP '-' (1, 17) (1, 18)

536

OP '-' (1, 18) (1, 19)

537

NUMBER '1' (1, 19) (1, 20)

538

OP '**' (1, 20) (1, 22)

539

NUMBER '1' (1, 22) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

540

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

541

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

542

def test_selector(self):

543

# Selector

544

self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

545

NAME 'import' (1, 0) (1, 6)

546

NAME 'sys' (1, 7) (1, 10)

547

OP ',' (1, 10) (1, 11)

548

NAME 'time' (1, 12) (1, 16)

549

NEWLINE '\\n' (1, 16) (1, 17)

550

NAME 'x' (2, 0) (2, 1)

551

OP '=' (2, 2) (2, 3)

552

NAME 'sys' (2, 4) (2, 7)

553

OP '.' (2, 7) (2, 8)

554

NAME 'modules' (2, 8) (2, 15)

555

OP '[' (2, 15) (2, 16)

556

STRING "'time'" (2, 16) (2, 22)

557

OP ']' (2, 22) (2, 23)

558

OP '.' (2, 23) (2, 24)

559

NAME 'time' (2, 24) (2, 28)

560

OP '(' (2, 28) (2, 29)

561

OP ')' (2, 29) (2, 30)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

562

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

563

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

564

def test_method(self):

565

# Methods

566

self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

567

OP '@' (1, 0) (1, 1)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

568

NAME 'staticmethod' (1, 1) (1, 13)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

569

NEWLINE '\\n' (1, 13) (1, 14)

570

NAME 'def' (2, 0) (2, 3)

571

NAME 'foo' (2, 4) (2, 7)

572

OP '(' (2, 7) (2, 8)

573

NAME 'x' (2, 8) (2, 9)

574

OP ',' (2, 9) (2, 10)

575

NAME 'y' (2, 10) (2, 11)

576

OP ')' (2, 11) (2, 12)

577

OP ':' (2, 12) (2, 13)

578

NAME 'pass' (2, 14) (2, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

579

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

580

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

581

def test_tabs(self):

582

# Evil tabs

583

self.check_tokenize("def f():\n"

584

"\tif x\n"

585

" \tpass", """\

Benjamin Peterson

a0dfa82

2009-11-13 02:25:08 +0000

[diff] [blame]

586

NAME 'def' (1, 0) (1, 3)

587

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

592

INDENT '\\t' (2, 0) (2, 1)

593

NAME 'if' (2, 1) (2, 3)

594

NAME 'x' (2, 4) (2, 5)

595

NEWLINE '\\n' (2, 5) (2, 6)

596

INDENT ' \\t' (3, 0) (3, 9)

597

NAME 'pass' (3, 9) (3, 13)

598

DEDENT '' (4, 0) (4, 0)

599

DEDENT '' (4, 0) (4, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

600

""")

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

601

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

602

def test_non_ascii_identifiers(self):

603

# Non-ascii identifiers

604

self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

605

NAME 'Örter' (1, 0) (1, 5)

606

OP '=' (1, 6) (1, 7)

607

STRING "'places'" (1, 8) (1, 16)

608

NEWLINE '\\n' (1, 16) (1, 17)

609

NAME 'grün' (2, 0) (2, 4)

610

OP '=' (2, 5) (2, 6)

611

STRING "'green'" (2, 7) (2, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

612

""")

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

613

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

614

def test_unicode(self):

615

# Legacy unicode literals:

616

self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

617

NAME 'Örter' (1, 0) (1, 5)

618

OP '=' (1, 6) (1, 7)

619

STRING "u'places'" (1, 8) (1, 17)

620

NEWLINE '\\n' (1, 17) (1, 18)

621

NAME 'grün' (2, 0) (2, 4)

622

OP '=' (2, 5) (2, 6)

Christian Heimes

0b3847d

2012-06-20 11:17:58 +0200

[diff] [blame]

623

STRING "U'green'" (2, 7) (2, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

624

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

625

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

626

def test_async(self):

627

# Async/await extension:

628

self.check_tokenize("async = 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

629

NAME 'async' (1, 0) (1, 5)

630

OP '=' (1, 6) (1, 7)

631

NUMBER '1' (1, 8) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

632

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

633

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

634

self.check_tokenize("a = (async = 1)", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

635

NAME 'a' (1, 0) (1, 1)

636

OP '=' (1, 2) (1, 3)

637

OP '(' (1, 4) (1, 5)

638

NAME 'async' (1, 5) (1, 10)

639

OP '=' (1, 11) (1, 12)

640

NUMBER '1' (1, 13) (1, 14)

641

OP ')' (1, 14) (1, 15)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

642

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

643

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

644

self.check_tokenize("async()", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

645

NAME 'async' (1, 0) (1, 5)

646

OP '(' (1, 5) (1, 6)

647

OP ')' (1, 6) (1, 7)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

648

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

649

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

650

self.check_tokenize("class async(Bar):pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

651

NAME 'class' (1, 0) (1, 5)

652

NAME 'async' (1, 6) (1, 11)

653

OP '(' (1, 11) (1, 12)

654

NAME 'Bar' (1, 12) (1, 15)

655

OP ')' (1, 15) (1, 16)

656

OP ':' (1, 16) (1, 17)

657

NAME 'pass' (1, 17) (1, 21)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

658

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

659

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

660

self.check_tokenize("class async:pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

661

NAME 'class' (1, 0) (1, 5)

662

NAME 'async' (1, 6) (1, 11)

663

OP ':' (1, 11) (1, 12)

664

NAME 'pass' (1, 12) (1, 16)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

665

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

666

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

667

self.check_tokenize("await = 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

668

NAME 'await' (1, 0) (1, 5)

669

OP '=' (1, 6) (1, 7)

670

NUMBER '1' (1, 8) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

671

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

672

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

673

self.check_tokenize("foo.async", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

674

NAME 'foo' (1, 0) (1, 3)

675

OP '.' (1, 3) (1, 4)

676

NAME 'async' (1, 4) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

677

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

678

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

679

self.check_tokenize("async for a in b: pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

680

NAME 'async' (1, 0) (1, 5)

681

NAME 'for' (1, 6) (1, 9)

682

NAME 'a' (1, 10) (1, 11)

683

NAME 'in' (1, 12) (1, 14)

684

NAME 'b' (1, 15) (1, 16)

685

OP ':' (1, 16) (1, 17)

686

NAME 'pass' (1, 18) (1, 22)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

687

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

688

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

689

self.check_tokenize("async with a as b: pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

690

NAME 'async' (1, 0) (1, 5)

691

NAME 'with' (1, 6) (1, 10)

692

NAME 'a' (1, 11) (1, 12)

693

NAME 'as' (1, 13) (1, 15)

694

NAME 'b' (1, 16) (1, 17)

695

OP ':' (1, 17) (1, 18)

696

NAME 'pass' (1, 19) (1, 23)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

697

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

698

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

699

self.check_tokenize("async.foo", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

700

NAME 'async' (1, 0) (1, 5)

701

OP '.' (1, 5) (1, 6)

702

NAME 'foo' (1, 6) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

703

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

704

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

705

self.check_tokenize("async", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

706

NAME 'async' (1, 0) (1, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

707

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

708

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

709

self.check_tokenize("async\n#comment\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

710

NAME 'async' (1, 0) (1, 5)

711

NEWLINE '\\n' (1, 5) (1, 6)

712

COMMENT '#comment' (2, 0) (2, 8)

713

NL '\\n' (2, 8) (2, 9)

714

NAME 'await' (3, 0) (3, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

715

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

716

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

717

self.check_tokenize("async\n...\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

718

NAME 'async' (1, 0) (1, 5)

719

NEWLINE '\\n' (1, 5) (1, 6)

720

OP '...' (2, 0) (2, 3)

721

NEWLINE '\\n' (2, 3) (2, 4)

722

NAME 'await' (3, 0) (3, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

723

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

724

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

725

self.check_tokenize("async\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

726

NAME 'async' (1, 0) (1, 5)

727

NEWLINE '\\n' (1, 5) (1, 6)

728

NAME 'await' (2, 0) (2, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

729

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

730

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

731

self.check_tokenize("foo.async + 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

732

NAME 'foo' (1, 0) (1, 3)

733

OP '.' (1, 3) (1, 4)

734

NAME 'async' (1, 4) (1, 9)

735

OP '+' (1, 10) (1, 11)

736

NUMBER '1' (1, 12) (1, 13)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

737

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

738

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

739

self.check_tokenize("async def foo(): pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

740

ASYNC 'async' (1, 0) (1, 5)

741

NAME 'def' (1, 6) (1, 9)

742

NAME 'foo' (1, 10) (1, 13)

743

OP '(' (1, 13) (1, 14)

744

OP ')' (1, 14) (1, 15)

745

OP ':' (1, 15) (1, 16)

746

NAME 'pass' (1, 17) (1, 21)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

747

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

748

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

749

self.check_tokenize('''\

async def foo():

def foo(await):

await = 1

if 1:

await

async += 1

''', """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

757

ASYNC 'async' (1, 0) (1, 5)

758

NAME 'def' (1, 6) (1, 9)

759

NAME 'foo' (1, 10) (1, 13)

760

OP '(' (1, 13) (1, 14)

761

OP ')' (1, 14) (1, 15)

762

OP ':' (1, 15) (1, 16)

763

NEWLINE '\\n' (1, 16) (1, 17)

764

INDENT ' ' (2, 0) (2, 2)

765

NAME 'def' (2, 2) (2, 5)

766

NAME 'foo' (2, 6) (2, 9)

767

OP '(' (2, 9) (2, 10)

Yury Selivanov

2015-07-22 13:33:45 +0300

[diff] [blame]

768

AWAIT 'await' (2, 10) (2, 15)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

769

OP ')' (2, 15) (2, 16)

770

OP ':' (2, 16) (2, 17)

771

NEWLINE '\\n' (2, 17) (2, 18)

772

INDENT ' ' (3, 0) (3, 4)

Yury Selivanov

2015-07-22 13:33:45 +0300

[diff] [blame]

773

AWAIT 'await' (3, 4) (3, 9)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

774

OP '=' (3, 10) (3, 11)

775

NUMBER '1' (3, 12) (3, 13)

776

NEWLINE '\\n' (3, 13) (3, 14)

777

DEDENT '' (4, 2) (4, 2)

778

NAME 'if' (4, 2) (4, 4)

779

NUMBER '1' (4, 5) (4, 6)

780

OP ':' (4, 6) (4, 7)

781

NEWLINE '\\n' (4, 7) (4, 8)

782

INDENT ' ' (5, 0) (5, 4)

783

AWAIT 'await' (5, 4) (5, 9)

784

NEWLINE '\\n' (5, 9) (5, 10)

785

DEDENT '' (6, 0) (6, 0)

786

DEDENT '' (6, 0) (6, 0)

787

NAME 'async' (6, 0) (6, 5)

788

OP '+=' (6, 6) (6, 8)

789

NUMBER '1' (6, 9) (6, 10)

790

NEWLINE '\\n' (6, 10) (6, 11)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

791

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

792

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

793

self.check_tokenize('''\

794

async def foo():

795

async for i in 1: pass''', """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

796

ASYNC 'async' (1, 0) (1, 5)

797

NAME 'def' (1, 6) (1, 9)

798

NAME 'foo' (1, 10) (1, 13)

799

OP '(' (1, 13) (1, 14)

800

OP ')' (1, 14) (1, 15)

801

OP ':' (1, 15) (1, 16)

802

NEWLINE '\\n' (1, 16) (1, 17)

803

INDENT ' ' (2, 0) (2, 2)

804

ASYNC 'async' (2, 2) (2, 7)

805

NAME 'for' (2, 8) (2, 11)

806

NAME 'i' (2, 12) (2, 13)

807

NAME 'in' (2, 14) (2, 16)

808

NUMBER '1' (2, 17) (2, 18)

809

OP ':' (2, 18) (2, 19)

810

NAME 'pass' (2, 20) (2, 24)

811

DEDENT '' (3, 0) (3, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

812

""")

Yury Selivanov

2015-07-22 13:33:45 +0300

[diff] [blame]

813

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

814

self.check_tokenize('''async def foo(async): await''', """\

Yury Selivanov

2015-07-22 13:33:45 +0300

[diff] [blame]

815

ASYNC 'async' (1, 0) (1, 5)

816

NAME 'def' (1, 6) (1, 9)

817

NAME 'foo' (1, 10) (1, 13)

818

OP '(' (1, 13) (1, 14)

819

ASYNC 'async' (1, 14) (1, 19)

820

OP ')' (1, 19) (1, 20)

821

OP ':' (1, 20) (1, 21)

822

AWAIT 'await' (1, 22) (1, 27)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

823

""")

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

824

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

825

self.check_tokenize('''\

def f():

def baz(): pass

async def bar(): pass

830

831

await = 2''', """\

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

832

NAME 'def' (1, 0) (1, 3)

833

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

838

NL '\\n' (2, 0) (2, 1)

839

INDENT ' ' (3, 0) (3, 2)

840

NAME 'def' (3, 2) (3, 5)

841

NAME 'baz' (3, 6) (3, 9)

842

OP '(' (3, 9) (3, 10)

843

OP ')' (3, 10) (3, 11)

844

OP ':' (3, 11) (3, 12)

845

NAME 'pass' (3, 13) (3, 17)

846

NEWLINE '\\n' (3, 17) (3, 18)

847

ASYNC 'async' (4, 2) (4, 7)

848

NAME 'def' (4, 8) (4, 11)

849

NAME 'bar' (4, 12) (4, 15)

850

OP '(' (4, 15) (4, 16)

851

OP ')' (4, 16) (4, 17)

852

OP ':' (4, 17) (4, 18)

853

NAME 'pass' (4, 19) (4, 23)

854

NEWLINE '\\n' (4, 23) (4, 24)

855

NL '\\n' (5, 0) (5, 1)

856

NAME 'await' (6, 2) (6, 7)

857

OP '=' (6, 8) (6, 9)

858

NUMBER '2' (6, 10) (6, 11)

859

DEDENT '' (7, 0) (7, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

860

""")

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

861

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

862

self.check_tokenize('''\

async def f():

def baz(): pass

async def bar(): pass

867

868

await = 2''', """\

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

869

ASYNC 'async' (1, 0) (1, 5)

870

NAME 'def' (1, 6) (1, 9)

871

NAME 'f' (1, 10) (1, 11)

872

OP '(' (1, 11) (1, 12)

873

OP ')' (1, 12) (1, 13)

874

OP ':' (1, 13) (1, 14)

875

NEWLINE '\\n' (1, 14) (1, 15)

876

NL '\\n' (2, 0) (2, 1)

877

INDENT ' ' (3, 0) (3, 2)

878

NAME 'def' (3, 2) (3, 5)

879

NAME 'baz' (3, 6) (3, 9)

880

OP '(' (3, 9) (3, 10)

881

OP ')' (3, 10) (3, 11)

882

OP ':' (3, 11) (3, 12)

883

NAME 'pass' (3, 13) (3, 17)

884

NEWLINE '\\n' (3, 17) (3, 18)

885

ASYNC 'async' (4, 2) (4, 7)

886

NAME 'def' (4, 8) (4, 11)

887

NAME 'bar' (4, 12) (4, 15)

888

OP '(' (4, 15) (4, 16)

889

OP ')' (4, 16) (4, 17)

890

OP ':' (4, 17) (4, 18)

891

NAME 'pass' (4, 19) (4, 23)

892

NEWLINE '\\n' (4, 23) (4, 24)

893

NL '\\n' (5, 0) (5, 1)

894

AWAIT 'await' (6, 2) (6, 7)

895

OP '=' (6, 8) (6, 9)

896

NUMBER '2' (6, 10) (6, 11)

897

DEDENT '' (7, 0) (7, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

898

""")

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

899

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

900

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

901

def decistmt(s):

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

902

result = []

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

903

g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

904

for toknum, tokval, _, _, _ in g:

905

if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens

result.extend([

(NAME, 'Decimal'),

(OP, '('),

(STRING, repr(tokval)),

(OP, ')')

])

else:

result.append((toknum, tokval))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

914

return untokenize(result).decode('utf-8')

915

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

916

class TestMisc(TestCase):

917

918

def test_decistmt(self):

919

# Substitute Decimals for floats in a string of statements.

920

# This is an example from the docs.

921

922

from decimal import Decimal

923

s = '+21.3e-5*-.1234/81.7'

924

self.assertEqual(decistmt(s),

925

"+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")

926

927

# The format of the exponent is inherited from the platform C library.

928

# Known cases are "e-007" (Windows) and "e-07" (not Windows). Since

929

# we're only showing 11 digits, and the 12th isn't close to 5, the

930

# rest of the output should be platform-independent.

931

self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')

932

933

# Output from calculations with Decimal should be identical across all

934

# platforms.

935

self.assertEqual(eval(decistmt(s)),

936

Decimal('-3.217160342717258261933904529E-7'))

937

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

938

939

class TestTokenizerAdheresToPep0263(TestCase):

940

"""

941

Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.

942

"""

943

944

def _testFile(self, filename):

945

path = os.path.join(os.path.dirname(__file__), filename)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

946

TestRoundtrip.check_roundtrip(self, open(path, 'rb'))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

947

948

def test_utf8_coding_cookie_and_no_utf8_bom(self):

Ned Deily

2ea6fcc

2011-07-19 16:15:27 -0700

[diff] [blame]

949

f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

950

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

951

952

def test_latin1_coding_cookie_and_utf8_bom(self):

953

"""

954

As per PEP 0263, if a file starts with a utf-8 BOM signature, the only

955

allowed encoding for the comment is 'utf-8'. The text file used in

956

this test starts with a BOM signature, but specifies latin1 as the

957

coding, so verify that a SyntaxError is raised, which matches the

958

behaviour of the interpreter when it encounters a similar condition.

959

"""

960

f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'

Benjamin Peterson

c9c0f20

2009-06-30 23:06:06 +0000

[diff] [blame]

961

self.assertRaises(SyntaxError, self._testFile, f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

962

963

def test_no_coding_cookie_and_utf8_bom(self):

964

f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

965

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

966

967

def test_utf8_coding_cookie_and_utf8_bom(self):

968

f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

969

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

970

Florent Xicluna

11f0b41

2012-07-07 12:13:35 +0200

[diff] [blame]

971

def test_bad_coding_cookie(self):

972

self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')

973

self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')

974

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

975

976

class Test_Tokenize(TestCase):

977

978

def test__tokenize_decodes_with_specified_encoding(self):

979

literal = '"ЉЊЈЁЂ"'

980

line = literal.encode('utf-8')

first = False

def readline():

nonlocal first

if not first:

first = True

return line

else:

return b''

# skip the initial encoding token and the end token

991

tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]

992

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

993

self.assertEqual(tokens, expected_tokens,

994

"bytes not decoded with encoding")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

995

996

def test__tokenize_does_not_decode_with_encoding_none(self):

literal = '"ЉЊЈЁЂ"'

first = False

def readline():

nonlocal first

if not first:

first = True

return literal

else:

return b''

# skip the end token

tokens = list(_tokenize(readline, encoding=None))[:-1]

1009

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1010

self.assertEqual(tokens, expected_tokens,

1011

"string not tokenized when encoding is None")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1012

1013

1014

class TestDetectEncoding(TestCase):

1015

1016

def get_readline(self, lines):

index = 0

def readline():

nonlocal index

if index == len(lines):

raise StopIteration

line = lines[index]

index += 1

return line

return readline

def test_no_bom_no_encoding_cookie(self):

1028

lines = (

1029

b'# something\n',

1030

b'print(something)\n',

1031

b'do_something(else)\n'

1032

)

1033

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1034

self.assertEqual(encoding, 'utf-8')

1035

self.assertEqual(consumed_lines, list(lines[:2]))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1036

1037

def test_bom_no_cookie(self):

1038

lines = (

1039

b'\xef\xbb\xbf# something\n',

1040

b'print(something)\n',

1041

b'do_something(else)\n'

1042

)

1043

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1044

self.assertEqual(encoding, 'utf-8-sig')

1045

self.assertEqual(consumed_lines,

1046

[b'# something\n', b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1047

1048

def test_cookie_first_line_no_bom(self):

1049

lines = (

1050

b'# -*- coding: latin-1 -*-\n',

1051

b'print(something)\n',

1052

b'do_something(else)\n'

1053

)

1054

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1055

self.assertEqual(encoding, 'iso-8859-1')

1056

self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1057

1058

def test_matched_bom_and_cookie_first_line(self):

1059

lines = (

1060

b'\xef\xbb\xbf# coding=utf-8\n',

1061

b'print(something)\n',

1062

b'do_something(else)\n'

1063

)

1064

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1065

self.assertEqual(encoding, 'utf-8-sig')

1066

self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1067

1068

def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):

1069

lines = (

1070

b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',

1071

b'print(something)\n',

1072

b'do_something(else)\n'

1073

)

1074

readline = self.get_readline(lines)

1075

self.assertRaises(SyntaxError, detect_encoding, readline)

1076

1077

def test_cookie_second_line_no_bom(self):

1078

lines = (

1079

b'#! something\n',

1080

b'# vim: set fileencoding=ascii :\n',

1081

b'print(something)\n',

1082

b'do_something(else)\n'

1083

)

1084

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1085

self.assertEqual(encoding, 'ascii')

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1086

expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1087

self.assertEqual(consumed_lines, expected)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1088

1089

def test_matched_bom_and_cookie_second_line(self):

1090

lines = (

1091

b'\xef\xbb\xbf#! something\n',

1092

b'f# coding=utf-8\n',

1093

b'print(something)\n',

1094

b'do_something(else)\n'

1095

)

1096

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1097

self.assertEqual(encoding, 'utf-8-sig')

1098

self.assertEqual(consumed_lines,

1099

[b'#! something\n', b'f# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1100

1101

def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):

1102

lines = (

1103

b'\xef\xbb\xbf#! something\n',

1104

b'# vim: set fileencoding=ascii :\n',

1105

b'print(something)\n',

1106

b'do_something(else)\n'

1107

)

1108

readline = self.get_readline(lines)

1109

self.assertRaises(SyntaxError, detect_encoding, readline)

1110

Serhiy Storchaka

768c16c

2014-01-09 18:36:09 +0200

[diff] [blame]

1111

def test_cookie_second_line_noncommented_first_line(self):

1112

lines = (

1113

b"print('\xc2\xa3')\n",

1114

b'# vim: set fileencoding=iso8859-15 :\n',

1115

b"print('\xe2\x82\xac')\n"

1116

)

1117

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1118

self.assertEqual(encoding, 'utf-8')

1119

expected = [b"print('\xc2\xa3')\n"]

1120

self.assertEqual(consumed_lines, expected)

1121

1122

def test_cookie_second_line_commented_first_line(self):

1123

lines = (

1124

b"#print('\xc2\xa3')\n",

1125

b'# vim: set fileencoding=iso8859-15 :\n',

1126

b"print('\xe2\x82\xac')\n"

1127

)

1128

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1129

self.assertEqual(encoding, 'iso8859-15')

1130

expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']

1131

self.assertEqual(consumed_lines, expected)

1132

1133

def test_cookie_second_line_empty_first_line(self):

1134

lines = (

1135

b'\n',

1136

b'# vim: set fileencoding=iso8859-15 :\n',

1137

b"print('\xe2\x82\xac')\n"

1138

)

1139

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1140

self.assertEqual(encoding, 'iso8859-15')

1141

expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']

1142

self.assertEqual(consumed_lines, expected)

1143

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1144

def test_latin1_normalization(self):

1145

# See get_normal_name() in tokenizer.c.

1146

encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",

1147

"iso-8859-1-unix", "iso-latin-1-mac")

1148

for encoding in encodings:

1149

for rep in ("-", "_"):

1150

enc = encoding.replace("-", rep)

1151

lines = (b"#!/usr/bin/python\n",

1152

b"# coding: " + enc.encode("ascii") + b"\n",

1153

b"print(things)\n",

1154

b"do_something += 4\n")

1155

rl = self.get_readline(lines)

1156

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1157

self.assertEqual(found, "iso-8859-1")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1158

Martin v. Löwis

63674f4

2012-04-20 14:36:47 +0200

[diff] [blame]

1159

def test_syntaxerror_latin1(self):

1160

# Issue 14629: need to raise SyntaxError if the first

1161

# line(s) have non-UTF-8 characters

1162

lines = (

1163

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1164

)

1165

readline = self.get_readline(lines)

1166

self.assertRaises(SyntaxError, detect_encoding, readline)

1167

1168

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1169

def test_utf8_normalization(self):

1170

# See get_normal_name() in tokenizer.c.

1171

encodings = ("utf-8", "utf-8-mac", "utf-8-unix")

1172

for encoding in encodings:

1173

for rep in ("-", "_"):

1174

enc = encoding.replace("-", rep)

1175

lines = (b"#!/usr/bin/python\n",

1176

b"# coding: " + enc.encode("ascii") + b"\n",

1177

b"1 + 3\n")

1178

rl = self.get_readline(lines)

1179

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1180

self.assertEqual(found, "utf-8")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1181

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1182

def test_short_files(self):

1183

readline = self.get_readline((b'print(something)\n',))

1184

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1185

self.assertEqual(encoding, 'utf-8')

1186

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1187

1188

encoding, consumed_lines = detect_encoding(self.get_readline(()))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1189

self.assertEqual(encoding, 'utf-8')

1190

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1191

1192

readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))

1193

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1194

self.assertEqual(encoding, 'utf-8-sig')

1195

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1196

1197

readline = self.get_readline((b'\xef\xbb\xbf',))

1198

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1199

self.assertEqual(encoding, 'utf-8-sig')

1200

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1201

Benjamin Peterson

433f32c

2008-12-12 01:25:05 +0000

[diff] [blame]

1202

readline = self.get_readline((b'# coding: bad\n',))

1203

self.assertRaises(SyntaxError, detect_encoding, readline)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1204

Serhiy Storchaka

dafea85

2013-09-16 23:51:56 +0300

[diff] [blame]

1205

def test_false_encoding(self):

1206

# Issue 18873: "Encoding" detected in non-comment lines

1207

readline = self.get_readline((b'print("#coding=fake")',))

1208

encoding, consumed_lines = detect_encoding(readline)

1209

self.assertEqual(encoding, 'utf-8')

1210

self.assertEqual(consumed_lines, [b'print("#coding=fake")'])

1211

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1212

def test_open(self):

1213

filename = support.TESTFN + '.py'

1214

self.addCleanup(support.unlink, filename)

1215

1216

# test coding cookie

1217

for encoding in ('iso-8859-15', 'utf-8'):

1218

with open(filename, 'w', encoding=encoding) as fp:

1219

print("# coding: %s" % encoding, file=fp)

1220

print("print('euro:\u20ac')", file=fp)

1221

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1222

self.assertEqual(fp.encoding, encoding)

1223

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1224

1225

# test BOM (no coding cookie)

1226

with open(filename, 'w', encoding='utf-8-sig') as fp:

1227

print("print('euro:\u20ac')", file=fp)

1228

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1229

self.assertEqual(fp.encoding, 'utf-8-sig')

1230

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1231

Brett Cannon

c33f3f2

2012-04-20 13:23:54 -0400

[diff] [blame]

1232

def test_filename_in_exception(self):

1233

# When possible, include the file name in the exception.

1234

path = 'some_file_path'

1235

lines = (

1236

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1237

)

1238

class Bunk:

1239

def __init__(self, lines, path):

self.name = path

self._lines = lines

self._index = 0

def readline(self):

if self._index == len(lines):

1246

raise StopIteration

1247

line = lines[self._index]

self._index += 1

return line

with self.assertRaises(SyntaxError):

1252

ins = Bunk(lines, path)

1253

# Make sure lacking a name isn't an issue.

1254

del ins.name

1255

detect_encoding(ins.readline)

1256

with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):

1257

ins = Bunk(lines, path)

1258

detect_encoding(ins.readline)

1259

Victor Stinner

387729e

2015-05-26 00:43:58 +0200

[diff] [blame]

1260

def test_open_error(self):

1261

# Issue #23840: open() must close the binary file on error

1262

m = BytesIO(b'#coding:xxx')

1263

with mock.patch('tokenize._builtin_open', return_value=m):

1264

self.assertRaises(SyntaxError, tokenize_open, 'foobar')

1265

self.assertTrue(m.closed)

1266

1267

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1268

class TestTokenize(TestCase):

1269

1270

def test_tokenize(self):

1271

import tokenize as tokenize_module

1272

encoding = object()

1273

encoding_used = None

1274

def mock_detect_encoding(readline):

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1275

return encoding, [b'first', b'second']

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1276

1277

def mock__tokenize(readline, encoding):

1278

nonlocal encoding_used

1279

encoding_used = encoding

1280

out = []

1281

while True:

1282

next_line = readline()

1283

if next_line:

1284

out.append(next_line)

continue

return out

counter = 0

def mock_readline():

nonlocal counter

counter += 1

if counter == 5:

return b''

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1294

return str(counter).encode()

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1295

1296

orig_detect_encoding = tokenize_module.detect_encoding

1297

orig__tokenize = tokenize_module._tokenize

1298

tokenize_module.detect_encoding = mock_detect_encoding

1299

tokenize_module._tokenize = mock__tokenize

1300

try:

1301

results = tokenize(mock_readline)

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1302

self.assertEqual(list(results),

1303

[b'first', b'second', b'1', b'2', b'3', b'4'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1304

finally:

1305

tokenize_module.detect_encoding = orig_detect_encoding

1306

tokenize_module._tokenize = orig__tokenize

1307

1308

self.assertTrue(encoding_used, encoding)

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

1309

Yury Selivanov

8085b80

2015-05-18 12:50:52 -0400

[diff] [blame]

1310

def test_oneline_defs(self):

1311

buf = []

1312

for i in range(500):

1313

buf.append('def i{i}(): return {i}'.format(i=i))

buf.append('OK')

buf = '\n'.join(buf)

# Test that 500 consequent, one-line defs is OK

1318

toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))

1319

self.assertEqual(toks[-2].string, 'OK') # [-1] is always ENDMARKER

1320

Meador Inge

00c7f85

2012-01-19 00:44:45 -0600

[diff] [blame]

1321

def assertExactTypeEqual(self, opstr, *optypes):

1322

tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))

1323

num_optypes = len(optypes)

1324

self.assertEqual(len(tokens), 2 + num_optypes)

1325

self.assertEqual(token.tok_name[tokens[0].exact_type],

1326

token.tok_name[ENCODING])

1327

for i in range(num_optypes):

1328

self.assertEqual(token.tok_name[tokens[i + 1].exact_type],

1329

token.tok_name[optypes[i]])

1330

self.assertEqual(token.tok_name[tokens[1 + num_optypes].exact_type],

1331

token.tok_name[token.ENDMARKER])

1332

1333

def test_exact_type(self):

1334

self.assertExactTypeEqual('()', token.LPAR, token.RPAR)

1335

self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)

1336

self.assertExactTypeEqual(':', token.COLON)

1337

self.assertExactTypeEqual(',', token.COMMA)

1338

self.assertExactTypeEqual(';', token.SEMI)

1339

self.assertExactTypeEqual('+', token.PLUS)

1340

self.assertExactTypeEqual('-', token.MINUS)

1341

self.assertExactTypeEqual('*', token.STAR)

1342

self.assertExactTypeEqual('/', token.SLASH)

1343

self.assertExactTypeEqual('|', token.VBAR)

1344

self.assertExactTypeEqual('&', token.AMPER)

1345

self.assertExactTypeEqual('<', token.LESS)

1346

self.assertExactTypeEqual('>', token.GREATER)

1347

self.assertExactTypeEqual('=', token.EQUAL)

1348

self.assertExactTypeEqual('.', token.DOT)

1349

self.assertExactTypeEqual('%', token.PERCENT)

1350

self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)

1351

self.assertExactTypeEqual('==', token.EQEQUAL)

1352

self.assertExactTypeEqual('!=', token.NOTEQUAL)

1353

self.assertExactTypeEqual('<=', token.LESSEQUAL)

1354

self.assertExactTypeEqual('>=', token.GREATEREQUAL)

1355

self.assertExactTypeEqual('~', token.TILDE)

1356

self.assertExactTypeEqual('^', token.CIRCUMFLEX)

1357

self.assertExactTypeEqual('<<', token.LEFTSHIFT)

1358

self.assertExactTypeEqual('>>', token.RIGHTSHIFT)

1359

self.assertExactTypeEqual('**', token.DOUBLESTAR)

1360

self.assertExactTypeEqual('+=', token.PLUSEQUAL)

1361

self.assertExactTypeEqual('-=', token.MINEQUAL)

1362

self.assertExactTypeEqual('*=', token.STAREQUAL)

1363

self.assertExactTypeEqual('/=', token.SLASHEQUAL)

1364

self.assertExactTypeEqual('%=', token.PERCENTEQUAL)

1365

self.assertExactTypeEqual('&=', token.AMPEREQUAL)

1366

self.assertExactTypeEqual('|=', token.VBAREQUAL)

1367

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1368

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1369

self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)

1370

self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)

1371

self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)

1372

self.assertExactTypeEqual('//', token.DOUBLESLASH)

1373

self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)

1374

self.assertExactTypeEqual('@', token.AT)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

1375

self.assertExactTypeEqual('@=', token.ATEQUAL)

Meador Inge

00c7f85

2012-01-19 00:44:45 -0600

[diff] [blame]

1376

1377

self.assertExactTypeEqual('a**2+b**2==c**2',

1378

NAME, token.DOUBLESTAR, NUMBER,

1379

token.PLUS,

1380

NAME, token.DOUBLESTAR, NUMBER,

1381

token.EQEQUAL,

1382

NAME, token.DOUBLESTAR, NUMBER)

1383

self.assertExactTypeEqual('{1, 2, 3}',

1384

token.LBRACE,

1385

token.NUMBER, token.COMMA,

1386

token.NUMBER, token.COMMA,

1387

token.NUMBER,

1388

token.RBRACE)

1389

self.assertExactTypeEqual('^(x & 0x1)',

1390

token.CIRCUMFLEX,

1391

token.LPAR,

1392

token.NAME, token.AMPER, token.NUMBER,

1393

token.RPAR)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1394

Ezio Melotti

fafa8b7

2012-11-03 17:46:51 +0200

[diff] [blame]

1395

def test_pathological_trailing_whitespace(self):

1396

# See http://bugs.python.org/issue16152

1397

self.assertExactTypeEqual('@ ', token.AT)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1398

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1399

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1400

class UntokenizeTest(TestCase):

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1401

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1402

def test_bad_input_order(self):

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1403

# raise if previous row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

u = Untokenizer()

u.prev_row = 2

u.prev_col = 2

with self.assertRaises(ValueError) as cm:

1408

u.add_whitespace((1,3))

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1409

self.assertEqual(cm.exception.args[0],

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1410

'start (1,3) precedes previous end (2,2)')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1411

# raise if previous column in row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1412

self.assertRaises(ValueError, u.add_whitespace, (2,1))

1413

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1414

def test_backslash_continuation(self):

1415

# The problem is that <whitespace>\<newline> leaves no token

u = Untokenizer()

u.prev_row = 1

u.prev_col = 1

u.tokens = []

u.add_whitespace((2, 0))

1421

self.assertEqual(u.tokens, ['\\\n'])

1422

u.prev_row = 2

1423

u.add_whitespace((4, 4))

1424

self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1425

TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1426

Terry Jan Reedy

5b8d2c3

2014-02-17 23:12:16 -0500

[diff] [blame]

1427

def test_iter_compat(self):

1428

u = Untokenizer()

1429

token = (NAME, 'Hello')

1430

tokens = [(ENCODING, 'utf-8'), token]

1431

u.compat(token, iter([]))

1432

self.assertEqual(u.tokens, ["Hello "])

1433

u = Untokenizer()

1434

self.assertEqual(u.untokenize(iter([token])), 'Hello ')

1435

u = Untokenizer()

1436

self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')

1437

self.assertEqual(u.encoding, 'utf-8')

1438

self.assertEqual(untokenize(iter(tokens)), b'Hello ')

1439

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1440

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1441

class TestRoundtrip(TestCase):

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1442

1443

def check_roundtrip(self, f):

1444

"""

1445

Test roundtrip for `untokenize`. `f` is an open file or a string.

1446

The source code in f is tokenized to both 5- and 2-tuples.

1447

Both sequences are converted back to source code via

1448

tokenize.untokenize(), and the latter tokenized again to 2-tuples.

1449

The test fails if the 3 pair tokenizations do not match.

1450

1451

When untokenize bugs are fixed, untokenize with 5-tuples should

1452

reproduce code that does not contain a backslash continuation

1453

following spaces. A proper test should test this.

1454

"""

1455

# Get source code and original tokenizations

1456

if isinstance(f, str):

1457

code = f.encode('utf-8')

else:

code = f.read()

f.close()

readline = iter(code.splitlines(keepends=True)).__next__

1462

tokens5 = list(tokenize(readline))

1463

tokens2 = [tok[:2] for tok in tokens5]

1464

# Reproduce tokens2 from pairs

1465

bytes_from2 = untokenize(tokens2)

1466

readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__

1467

tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]

1468

self.assertEqual(tokens2_from2, tokens2)

1469

# Reproduce tokens2 from 5-tuples

1470

bytes_from5 = untokenize(tokens5)

1471

readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__

1472

tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]

1473

self.assertEqual(tokens2_from5, tokens2)

1474

1475

def test_roundtrip(self):

1476

# There are some standard formatting practices that are easy to get right.

1477

1478

self.check_roundtrip("if x == 1:\n"

1479

" print(x)\n")

1480

self.check_roundtrip("# This is a comment\n"

1481

"# This also")

1482

1483

# Some people use different formatting conventions, which makes

1484

# untokenize a little trickier. Note that this test involves trailing

1485

# whitespace after the colon. Note that we use hex escapes to make the

1486

# two trailing blanks apparent in the expected output.

1487

1488

self.check_roundtrip("if x == 1 : \n"

1489

" print(x)\n")

1490

fn = support.findfile("tokenize_tests.txt")

1491

with open(fn, 'rb') as f:

1492

self.check_roundtrip(f)

1493

self.check_roundtrip("if x == 1:\n"

1494

" # A comment by itself.\n"

1495

" print(x) # Comment here, too.\n"

1496

" # Another comment.\n"

1497

"after_if = True\n")

1498

self.check_roundtrip("if (x # The comments need to go in the right place\n"

1499

" == 1):\n"

1500

" print('x==1')\n")

1501

self.check_roundtrip("class Test: # A comment here\n"

1502

" # A comment with weird indent\n"

1503

" after_com = 5\n"

1504

" def x(m): return m*5 # a one liner\n"

1505

" def y(m): # A whitespace after the colon\n"

1506

" return y*4 # 3-space indent\n")

1507

1508

# Some error-handling code

1509

self.check_roundtrip("try: import somemodule\n"

1510

"except ImportError: # comment\n"

1511

" print('Can not import' # comment2\n)"

1512

"else: print('Loaded')\n")

1513

1514

def test_continuation(self):

1515

# Balancing continuation

1516

self.check_roundtrip("a = (3,4, \n"

"5,6)\n"

"y = [3, 4,\n"

"5]\n"

"z = {'a': 5,\n"

"'b':15, 'c':True}\n"

1522

"x = len(y) + 5 - a[\n"

"3] - a[2]\n"

"+ len(z) - z[\n"

"'b']\n")

def test_backslash_continuation(self):

1528

# Backslash means line continuation, except for comments

1529

self.check_roundtrip("x=1+\\\n"

1530

"1\n"

1531

"# This is a comment\\\n"

1532

"# This also\n")

1533

self.check_roundtrip("# Comment \\\n"

1534

"x = 0")

1535

1536

def test_string_concatenation(self):

1537

# Two string literals on the same line

1538

self.check_roundtrip("'' ''")

1539

1540

def test_random_files(self):

1541

# Test roundtrip on random python modules.

1542

# pass the '-ucpu' option to process the full directory.

1543

1544

import glob, random

1545

fn = support.findfile("tokenize_tests.txt")

1546

tempdir = os.path.dirname(fn) or os.curdir

1547

testfiles = glob.glob(os.path.join(tempdir, "test*.py"))

1548

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

1549

# Tokenize is broken on test_pep3131.py because regular expressions are

1550

# broken on the obscure unicode identifiers in it. *sigh*

1551

# With roundtrip extended to test the 5-tuple mode of untokenize,

1552

# 7 more testfiles fail. Remove them also until the failure is diagnosed.

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1553

Zachary Ware

724f6a6

2016-09-09 12:55:37 -0700

[diff] [blame]

1554

testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1555

for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):

1556

testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)

1557

1558

if not support.is_resource_enabled("cpu"):

1559

testfiles = random.sample(testfiles, 10)

1560

1561

for testfile in testfiles:

1562

with open(testfile, 'rb') as f:

1563

with self.subTest(file=testfile):

1564

self.check_roundtrip(f)

1565

1566

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1567

def roundtrip(self, code):

1568

if isinstance(code, str):

1569

code = code.encode('utf-8')

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1570

return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1571

1572

def test_indentation_semantics_retained(self):

1573

"""

1574

Ensure that although whitespace might be mutated in a roundtrip,

1575

the semantic meaning of the indentation remains consistent.

1576

"""

1577

code = "if False:\n\tx=3\n\tx=3\n"

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1578

codelines = self.roundtrip(code).split('\n')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1579

self.assertEqual(codelines[1], codelines[2])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1580

self.check_roundtrip(code)

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1581

1582

Thomas Wouters

49fd7fa

2006-04-21 10:40:58 +0000

[diff] [blame]

1583

if __name__ == "__main__":

Brett Cannon