Blame - Lib/test/test_tokenize.py - platform/external/python/cpython3

2015-10-06 18:16:28 +0300

[diff] [blame]

1

from test import support

2

from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,

3

STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,

4

open as tokenize_open, Untokenizer)

5

from io import BytesIO

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

6

from unittest import TestCase, mock

7

from test.test_grammar import (VALID_UNDERSCORE_LITERALS,

8

INVALID_UNDERSCORE_LITERALS)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

9

import os

10

import token

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

11

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

12

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

13

class TokenizeTest(TestCase):

14

# Tests for the tokenize module.

Jason R. Coombs

7cf3638

2015-06-20 19:13:50 -0400

[diff] [blame]

15

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

16

# The tests can be really simple. Given a small fragment of source

17

# code, print out a table with tokens. The ENDMARKER is omitted for

18

# brevity.

19

20

def check_tokenize(self, s, expected):

21

# Format the tokens in s in a table format.

22

# The ENDMARKER is omitted.

23

result = []

24

f = BytesIO(s.encode('utf-8'))

25

for type, token, start, end, line in tokenize(f.readline):

26

if type == ENDMARKER:

27

break

28

type = tok_name[type]

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

29

result.append(f" {type:10} {token!r:13} {start} {end}")

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

30

self.assertEqual(result,

31

[" ENCODING 'utf-8' (0, 0) (0, 0)"] +

32

expected.rstrip().splitlines())

33

34

def test_basic(self):

35

self.check_tokenize("1 + 1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

36

NUMBER '1' (1, 0) (1, 1)

37

OP '+' (1, 2) (1, 3)

38

NUMBER '1' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

39

""")

40

self.check_tokenize("if False:\n"

41

" # NL\n"

42

" True = False # NEWLINE\n", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

43

NAME 'if' (1, 0) (1, 2)

44

NAME 'False' (1, 3) (1, 8)

45

OP ':' (1, 8) (1, 9)

46

NEWLINE '\\n' (1, 9) (1, 10)

47

COMMENT '# NL' (2, 4) (2, 8)

48

NL '\\n' (2, 8) (2, 9)

49

INDENT ' ' (3, 0) (3, 4)

50

NAME 'True' (3, 4) (3, 8)

51

OP '=' (3, 9) (3, 10)

52

NAME 'False' (3, 11) (3, 16)

53

COMMENT '# NEWLINE' (3, 17) (3, 26)

54

NEWLINE '\\n' (3, 26) (3, 27)

55

DEDENT '' (4, 0) (4, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

56

""")

57

indent_error_file = b"""\

def k(x):

x += 2

x += 5

"""

readline = BytesIO(indent_error_file).readline

63

with self.assertRaisesRegex(IndentationError,

64

"unindent does not match any "

65

"outer indentation level"):

66

for tok in tokenize(readline):

67

pass

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

68

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

69

def test_int(self):

70

# Ordinary integers and binary operators

71

self.check_tokenize("0xff <= 255", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

72

NUMBER '0xff' (1, 0) (1, 4)

73

OP '<=' (1, 5) (1, 7)

74

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

75

""")

76

self.check_tokenize("0b10 <= 255", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

77

NUMBER '0b10' (1, 0) (1, 4)

78

OP '<=' (1, 5) (1, 7)

79

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

80

""")

81

self.check_tokenize("0o123 <= 0O123", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

82

NUMBER '0o123' (1, 0) (1, 5)

83

OP '<=' (1, 6) (1, 8)

84

NUMBER '0O123' (1, 9) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

85

""")

86

self.check_tokenize("1234567 > ~0x15", """\

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

87

NUMBER '1234567' (1, 0) (1, 7)

88

OP '>' (1, 8) (1, 9)

89

OP '~' (1, 10) (1, 11)

90

NUMBER '0x15' (1, 11) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

91

""")

92

self.check_tokenize("2134568 != 1231515", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

93

NUMBER '2134568' (1, 0) (1, 7)

94

OP '!=' (1, 8) (1, 10)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

95

NUMBER '1231515' (1, 11) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

96

""")

97

self.check_tokenize("(-124561-1) & 200000000", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

98

OP '(' (1, 0) (1, 1)

99

OP '-' (1, 1) (1, 2)

100

NUMBER '124561' (1, 2) (1, 8)

101

OP '-' (1, 8) (1, 9)

102

NUMBER '1' (1, 9) (1, 10)

103

OP ')' (1, 10) (1, 11)

104

OP '&' (1, 12) (1, 13)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

105

NUMBER '200000000' (1, 14) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

106

""")

107

self.check_tokenize("0xdeadbeef != -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

108

NUMBER '0xdeadbeef' (1, 0) (1, 10)

109

OP '!=' (1, 11) (1, 13)

110

OP '-' (1, 14) (1, 15)

111

NUMBER '1' (1, 15) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

112

""")

113

self.check_tokenize("0xdeadc0de & 12345", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

114

NUMBER '0xdeadc0de' (1, 0) (1, 10)

115

OP '&' (1, 11) (1, 12)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

116

NUMBER '12345' (1, 13) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

117

""")

118

self.check_tokenize("0xFF & 0x15 | 1234", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

119

NUMBER '0xFF' (1, 0) (1, 4)

120

OP '&' (1, 5) (1, 6)

121

NUMBER '0x15' (1, 7) (1, 11)

122

OP '|' (1, 12) (1, 13)

123

NUMBER '1234' (1, 14) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

124

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

125

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

126

def test_long(self):

127

# Long integers

128

self.check_tokenize("x = 0", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

129

NAME 'x' (1, 0) (1, 1)

130

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

131

NUMBER '0' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

132

""")

133

self.check_tokenize("x = 0xfffffffffff", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

134

NAME 'x' (1, 0) (1, 1)

135

OP '=' (1, 2) (1, 3)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

136

NUMBER '0xfffffffffff' (1, 4) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

137

""")

138

self.check_tokenize("x = 123141242151251616110", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

139

NAME 'x' (1, 0) (1, 1)

140

OP '=' (1, 2) (1, 3)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

141

NUMBER '123141242151251616110' (1, 4) (1, 25)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

142

""")

143

self.check_tokenize("x = -15921590215012591", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

144

NAME 'x' (1, 0) (1, 1)

145

OP '=' (1, 2) (1, 3)

146

OP '-' (1, 4) (1, 5)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

147

NUMBER '15921590215012591' (1, 5) (1, 22)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

148

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

149

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

150

def test_float(self):

151

# Floating point numbers

152

self.check_tokenize("x = 3.14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

153

NAME 'x' (1, 0) (1, 1)

154

OP '=' (1, 2) (1, 3)

155

NUMBER '3.14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

156

""")

157

self.check_tokenize("x = 314159.", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

158

NAME 'x' (1, 0) (1, 1)

159

OP '=' (1, 2) (1, 3)

160

NUMBER '314159.' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

161

""")

162

self.check_tokenize("x = .314159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

163

NAME 'x' (1, 0) (1, 1)

164

OP '=' (1, 2) (1, 3)

165

NUMBER '.314159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

166

""")

167

self.check_tokenize("x = 3e14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

168

NAME 'x' (1, 0) (1, 1)

169

OP '=' (1, 2) (1, 3)

170

NUMBER '3e14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

171

""")

172

self.check_tokenize("x = 3E123", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

173

NAME 'x' (1, 0) (1, 1)

174

OP '=' (1, 2) (1, 3)

175

NUMBER '3E123' (1, 4) (1, 9)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

176

""")

177

self.check_tokenize("x+y = 3e-1230", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

178

NAME 'x' (1, 0) (1, 1)

179

OP '+' (1, 1) (1, 2)

180

NAME 'y' (1, 2) (1, 3)

181

OP '=' (1, 4) (1, 5)

182

NUMBER '3e-1230' (1, 6) (1, 13)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

183

""")

184

self.check_tokenize("x = 3.14e159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

185

NAME 'x' (1, 0) (1, 1)

186

OP '=' (1, 2) (1, 3)

187

NUMBER '3.14e159' (1, 4) (1, 12)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

188

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

189

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

190

def test_underscore_literals(self):

191

def number_token(s):

192

f = BytesIO(s.encode('utf-8'))

193

for toktype, token, start, end, line in tokenize(f.readline):

194

if toktype == NUMBER:

195

return token

196

return 'invalid token'

197

for lit in VALID_UNDERSCORE_LITERALS:

198

if '(' in lit:

199

# this won't work with compound complex inputs

200

continue

201

self.assertEqual(number_token(lit), lit)

202

for lit in INVALID_UNDERSCORE_LITERALS:

203

self.assertNotEqual(number_token(lit), lit)

204

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

205

def test_string(self):

206

# String literals

207

self.check_tokenize("x = ''; y = \"\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

208

NAME 'x' (1, 0) (1, 1)

209

OP '=' (1, 2) (1, 3)

210

STRING "''" (1, 4) (1, 6)

211

OP ';' (1, 6) (1, 7)

212

NAME 'y' (1, 8) (1, 9)

213

OP '=' (1, 10) (1, 11)

214

STRING '""' (1, 12) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

215

""")

216

self.check_tokenize("x = '\"'; y = \"'\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

217

NAME 'x' (1, 0) (1, 1)

218

OP '=' (1, 2) (1, 3)

219

STRING '\\'"\\'' (1, 4) (1, 7)

220

OP ';' (1, 7) (1, 8)

221

NAME 'y' (1, 9) (1, 10)

222

OP '=' (1, 11) (1, 12)

223

STRING '"\\'"' (1, 13) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

224

""")

225

self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

226

NAME 'x' (1, 0) (1, 1)

227

OP '=' (1, 2) (1, 3)

228

STRING '"doesn\\'t "' (1, 4) (1, 14)

229

NAME 'shrink' (1, 14) (1, 20)

230

STRING '", does it"' (1, 20) (1, 31)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

231

""")

232

self.check_tokenize("x = 'abc' + 'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

233

NAME 'x' (1, 0) (1, 1)

234

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

235

STRING "'abc'" (1, 4) (1, 9)

236

OP '+' (1, 10) (1, 11)

237

STRING "'ABC'" (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

238

""")

239

self.check_tokenize('y = "ABC" + "ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

240

NAME 'y' (1, 0) (1, 1)

241

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

242

STRING '"ABC"' (1, 4) (1, 9)

243

OP '+' (1, 10) (1, 11)

244

STRING '"ABC"' (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

245

""")

246

self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

247

NAME 'x' (1, 0) (1, 1)

248

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

249

STRING "r'abc'" (1, 4) (1, 10)

250

OP '+' (1, 11) (1, 12)

251

STRING "r'ABC'" (1, 13) (1, 19)

252

OP '+' (1, 20) (1, 21)

253

STRING "R'ABC'" (1, 22) (1, 28)

254

OP '+' (1, 29) (1, 30)

255

STRING "R'ABC'" (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

256

""")

257

self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

258

NAME 'y' (1, 0) (1, 1)

259

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

260

STRING 'r"abc"' (1, 4) (1, 10)

261

OP '+' (1, 11) (1, 12)

262

STRING 'r"ABC"' (1, 13) (1, 19)

263

OP '+' (1, 20) (1, 21)

264

STRING 'R"ABC"' (1, 22) (1, 28)

265

OP '+' (1, 29) (1, 30)

266

STRING 'R"ABC"' (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

267

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

268

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

269

self.check_tokenize("u'abc' + U'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

270

STRING "u'abc'" (1, 0) (1, 6)

271

OP '+' (1, 7) (1, 8)

272

STRING "U'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

273

""")

274

self.check_tokenize('u"abc" + U"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

275

STRING 'u"abc"' (1, 0) (1, 6)

276

OP '+' (1, 7) (1, 8)

277

STRING 'U"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

278

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

279

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

280

self.check_tokenize("b'abc' + B'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

281

STRING "b'abc'" (1, 0) (1, 6)

282

OP '+' (1, 7) (1, 8)

283

STRING "B'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

284

""")

285

self.check_tokenize('b"abc" + B"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

286

STRING 'b"abc"' (1, 0) (1, 6)

287

OP '+' (1, 7) (1, 8)

288

STRING 'B"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

289

""")

290

self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

291

STRING "br'abc'" (1, 0) (1, 7)

292

OP '+' (1, 8) (1, 9)

293

STRING "bR'abc'" (1, 10) (1, 17)

294

OP '+' (1, 18) (1, 19)

295

STRING "Br'abc'" (1, 20) (1, 27)

296

OP '+' (1, 28) (1, 29)

297

STRING "BR'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

298

""")

299

self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

300

STRING 'br"abc"' (1, 0) (1, 7)

301

OP '+' (1, 8) (1, 9)

302

STRING 'bR"abc"' (1, 10) (1, 17)

303

OP '+' (1, 18) (1, 19)

304

STRING 'Br"abc"' (1, 20) (1, 27)

305

OP '+' (1, 28) (1, 29)

306

STRING 'BR"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

307

""")

308

self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

309

STRING "rb'abc'" (1, 0) (1, 7)

310

OP '+' (1, 8) (1, 9)

311

STRING "rB'abc'" (1, 10) (1, 17)

312

OP '+' (1, 18) (1, 19)

313

STRING "Rb'abc'" (1, 20) (1, 27)

314

OP '+' (1, 28) (1, 29)

315

STRING "RB'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

316

""")

317

self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

318

STRING 'rb"abc"' (1, 0) (1, 7)

319

OP '+' (1, 8) (1, 9)

320

STRING 'rB"abc"' (1, 10) (1, 17)

321

OP '+' (1, 18) (1, 19)

322

STRING 'Rb"abc"' (1, 20) (1, 27)

323

OP '+' (1, 28) (1, 29)

324

STRING 'RB"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

325

""")

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

326

# Check 0, 1, and 2 character string prefixes.

327

self.check_tokenize(r'"a\

328

de\

329

fg"', """\

330

STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)

331

""")

332

self.check_tokenize(r'u"a\

333

de"', """\

334

STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3)

335

""")

336

self.check_tokenize(r'rb"a\

337

d"', """\

338

STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2)

339

""")

340

self.check_tokenize(r'"""a\

341

b"""', """\

342

STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)

343

""")

344

self.check_tokenize(r'u"""a\

345

b"""', """\

346

STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)

347

""")

348

self.check_tokenize(r'rb"""a\

349

b\

350

c"""', """\

351

STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)

352

""")

Eric V. Smith

1c8222c

2015-10-26 04:37:55 -0400

[diff] [blame]

353

self.check_tokenize('f"abc"', """\

354

STRING 'f"abc"' (1, 0) (1, 6)

355

""")

356

self.check_tokenize('fR"a{b}c"', """\

357

STRING 'fR"a{b}c"' (1, 0) (1, 9)

358

""")

359

self.check_tokenize('f"""abc"""', """\

360

STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10)

361

""")

362

self.check_tokenize(r'f"abc\

363

def"', """\

364

STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4)

365

""")

366

self.check_tokenize(r'Rf"abc\

367

def"', """\

368

STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)

369

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

370

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

371

def test_function(self):

372

self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

373

NAME 'def' (1, 0) (1, 3)

374

NAME 'd22' (1, 4) (1, 7)

375

OP '(' (1, 7) (1, 8)

376

NAME 'a' (1, 8) (1, 9)

377

OP ',' (1, 9) (1, 10)

378

NAME 'b' (1, 11) (1, 12)

379

OP ',' (1, 12) (1, 13)

380

NAME 'c' (1, 14) (1, 15)

381

OP '=' (1, 15) (1, 16)

382

NUMBER '2' (1, 16) (1, 17)

383

OP ',' (1, 17) (1, 18)

384

NAME 'd' (1, 19) (1, 20)

385

OP '=' (1, 20) (1, 21)

386

NUMBER '2' (1, 21) (1, 22)

387

OP ',' (1, 22) (1, 23)

388

OP '*' (1, 24) (1, 25)

389

NAME 'k' (1, 25) (1, 26)

390

OP ')' (1, 26) (1, 27)

391

OP ':' (1, 27) (1, 28)

392

NAME 'pass' (1, 29) (1, 33)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

393

""")

394

self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

395

NAME 'def' (1, 0) (1, 3)

396

NAME 'd01v_' (1, 4) (1, 9)

397

OP '(' (1, 9) (1, 10)

398

NAME 'a' (1, 10) (1, 11)

399

OP '=' (1, 11) (1, 12)

400

NUMBER '1' (1, 12) (1, 13)

401

OP ',' (1, 13) (1, 14)

402

OP '*' (1, 15) (1, 16)

403

NAME 'k' (1, 16) (1, 17)

404

OP ',' (1, 17) (1, 18)

405

OP '**' (1, 19) (1, 21)

406

NAME 'w' (1, 21) (1, 22)

407

OP ')' (1, 22) (1, 23)

408

OP ':' (1, 23) (1, 24)

409

NAME 'pass' (1, 25) (1, 29)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

410

""")

Jim Fasarakis-Hilliard

d4914e9

2017-03-14 22:16:15 +0200

[diff] [blame]

411

self.check_tokenize("def d23(a: str, b: int=3) -> int: pass", """\

412

NAME 'def' (1, 0) (1, 3)

413

NAME 'd23' (1, 4) (1, 7)

414

OP '(' (1, 7) (1, 8)

415

NAME 'a' (1, 8) (1, 9)

416

OP ':' (1, 9) (1, 10)

417

NAME 'str' (1, 11) (1, 14)

418

OP ',' (1, 14) (1, 15)

419

NAME 'b' (1, 16) (1, 17)

420

OP ':' (1, 17) (1, 18)

421

NAME 'int' (1, 19) (1, 22)

422

OP '=' (1, 22) (1, 23)

423

NUMBER '3' (1, 23) (1, 24)

424

OP ')' (1, 24) (1, 25)

425

OP '->' (1, 26) (1, 28)

426

NAME 'int' (1, 29) (1, 32)

427

OP ':' (1, 32) (1, 33)

428

NAME 'pass' (1, 34) (1, 38)

429

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

430

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

431

def test_comparison(self):

432

# Comparison

433

self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "

434

"1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

435

NAME 'if' (1, 0) (1, 2)

436

NUMBER '1' (1, 3) (1, 4)

437

OP '<' (1, 5) (1, 6)

438

NUMBER '1' (1, 7) (1, 8)

439

OP '>' (1, 9) (1, 10)

440

NUMBER '1' (1, 11) (1, 12)

441

OP '==' (1, 13) (1, 15)

442

NUMBER '1' (1, 16) (1, 17)

443

OP '>=' (1, 18) (1, 20)

444

NUMBER '5' (1, 21) (1, 22)

445

OP '<=' (1, 23) (1, 25)

446

NUMBER '0x15' (1, 26) (1, 30)

447

OP '<=' (1, 31) (1, 33)

448

NUMBER '0x12' (1, 34) (1, 38)

449

OP '!=' (1, 39) (1, 41)

450

NUMBER '1' (1, 42) (1, 43)

451

NAME 'and' (1, 44) (1, 47)

452

NUMBER '5' (1, 48) (1, 49)

453

NAME 'in' (1, 50) (1, 52)

454

NUMBER '1' (1, 53) (1, 54)

455

NAME 'not' (1, 55) (1, 58)

456

NAME 'in' (1, 59) (1, 61)

457

NUMBER '1' (1, 62) (1, 63)

458

NAME 'is' (1, 64) (1, 66)

459

NUMBER '1' (1, 67) (1, 68)

460

NAME 'or' (1, 69) (1, 71)

461

NUMBER '5' (1, 72) (1, 73)

462

NAME 'is' (1, 74) (1, 76)

463

NAME 'not' (1, 77) (1, 80)

464

NUMBER '1' (1, 81) (1, 82)

465

OP ':' (1, 82) (1, 83)

466

NAME 'pass' (1, 84) (1, 88)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

467

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

468

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

469

def test_shift(self):

470

# Shift

471

self.check_tokenize("x = 1 << 1 >> 5", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

472

NAME 'x' (1, 0) (1, 1)

473

OP '=' (1, 2) (1, 3)

474

NUMBER '1' (1, 4) (1, 5)

475

OP '<<' (1, 6) (1, 8)

476

NUMBER '1' (1, 9) (1, 10)

477

OP '>>' (1, 11) (1, 13)

478

NUMBER '5' (1, 14) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

479

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

480

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

481

def test_additive(self):

482

# Additive

483

self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

484

NAME 'x' (1, 0) (1, 1)

485

OP '=' (1, 2) (1, 3)

486

NUMBER '1' (1, 4) (1, 5)

487

OP '-' (1, 6) (1, 7)

488

NAME 'y' (1, 8) (1, 9)

489

OP '+' (1, 10) (1, 11)

490

NUMBER '15' (1, 12) (1, 14)

491

OP '-' (1, 15) (1, 16)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

492

NUMBER '1' (1, 17) (1, 18)

493

OP '+' (1, 19) (1, 20)

494

NUMBER '0x124' (1, 21) (1, 26)

495

OP '+' (1, 27) (1, 28)

496

NAME 'z' (1, 29) (1, 30)

497

OP '+' (1, 31) (1, 32)

498

NAME 'a' (1, 33) (1, 34)

499

OP '[' (1, 34) (1, 35)

500

NUMBER '5' (1, 35) (1, 36)

501

OP ']' (1, 36) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

502

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

503

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

504

def test_multiplicative(self):

505

# Multiplicative

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

506

self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

507

NAME 'x' (1, 0) (1, 1)

508

OP '=' (1, 2) (1, 3)

509

NUMBER '1' (1, 4) (1, 5)

510

OP '//' (1, 5) (1, 7)

511

NUMBER '1' (1, 7) (1, 8)

512

OP '*' (1, 8) (1, 9)

513

NUMBER '1' (1, 9) (1, 10)

514

OP '/' (1, 10) (1, 11)

515

NUMBER '5' (1, 11) (1, 12)

516

OP '*' (1, 12) (1, 13)

517

NUMBER '12' (1, 13) (1, 15)

518

OP '%' (1, 15) (1, 16)

519

NUMBER '0x12' (1, 16) (1, 20)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

520

OP '@' (1, 20) (1, 21)

521

NUMBER '42' (1, 21) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

522

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

523

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

524

def test_unary(self):

525

# Unary

526

self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

527

OP '~' (1, 0) (1, 1)

528

NUMBER '1' (1, 1) (1, 2)

529

OP '^' (1, 3) (1, 4)

530

NUMBER '1' (1, 5) (1, 6)

531

OP '&' (1, 7) (1, 8)

532

NUMBER '1' (1, 9) (1, 10)

533

OP '|' (1, 11) (1, 12)

534

NUMBER '1' (1, 12) (1, 13)

535

OP '^' (1, 14) (1, 15)

536

OP '-' (1, 16) (1, 17)

537

NUMBER '1' (1, 17) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

538

""")

539

self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

540

OP '-' (1, 0) (1, 1)

541

NUMBER '1' (1, 1) (1, 2)

542

OP '*' (1, 2) (1, 3)

543

NUMBER '1' (1, 3) (1, 4)

544

OP '/' (1, 4) (1, 5)

545

NUMBER '1' (1, 5) (1, 6)

546

OP '+' (1, 6) (1, 7)

547

NUMBER '1' (1, 7) (1, 8)

548

OP '*' (1, 8) (1, 9)

549

NUMBER '1' (1, 9) (1, 10)

550

OP '//' (1, 10) (1, 12)

551

NUMBER '1' (1, 12) (1, 13)

552

OP '-' (1, 14) (1, 15)

553

OP '-' (1, 16) (1, 17)

554

OP '-' (1, 17) (1, 18)

555

OP '-' (1, 18) (1, 19)

556

NUMBER '1' (1, 19) (1, 20)

557

OP '**' (1, 20) (1, 22)

558

NUMBER '1' (1, 22) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

559

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

560

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

561

def test_selector(self):

562

# Selector

563

self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

564

NAME 'import' (1, 0) (1, 6)

565

NAME 'sys' (1, 7) (1, 10)

566

OP ',' (1, 10) (1, 11)

567

NAME 'time' (1, 12) (1, 16)

568

NEWLINE '\\n' (1, 16) (1, 17)

569

NAME 'x' (2, 0) (2, 1)

570

OP '=' (2, 2) (2, 3)

571

NAME 'sys' (2, 4) (2, 7)

572

OP '.' (2, 7) (2, 8)

573

NAME 'modules' (2, 8) (2, 15)

574

OP '[' (2, 15) (2, 16)

575

STRING "'time'" (2, 16) (2, 22)

576

OP ']' (2, 22) (2, 23)

577

OP '.' (2, 23) (2, 24)

578

NAME 'time' (2, 24) (2, 28)

579

OP '(' (2, 28) (2, 29)

580

OP ')' (2, 29) (2, 30)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

581

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

582

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

583

def test_method(self):

584

# Methods

585

self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

586

OP '@' (1, 0) (1, 1)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

587

NAME 'staticmethod' (1, 1) (1, 13)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

588

NEWLINE '\\n' (1, 13) (1, 14)

589

NAME 'def' (2, 0) (2, 3)

590

NAME 'foo' (2, 4) (2, 7)

591

OP '(' (2, 7) (2, 8)

592

NAME 'x' (2, 8) (2, 9)

593

OP ',' (2, 9) (2, 10)

594

NAME 'y' (2, 10) (2, 11)

595

OP ')' (2, 11) (2, 12)

596

OP ':' (2, 12) (2, 13)

597

NAME 'pass' (2, 14) (2, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

598

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

599

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

600

def test_tabs(self):

601

# Evil tabs

602

self.check_tokenize("def f():\n"

603

"\tif x\n"

604

" \tpass", """\

Benjamin Peterson

a0dfa82

2009-11-13 02:25:08 +0000

[diff] [blame]

605

NAME 'def' (1, 0) (1, 3)

606

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

611

INDENT '\\t' (2, 0) (2, 1)

612

NAME 'if' (2, 1) (2, 3)

613

NAME 'x' (2, 4) (2, 5)

614

NEWLINE '\\n' (2, 5) (2, 6)

615

INDENT ' \\t' (3, 0) (3, 9)

616

NAME 'pass' (3, 9) (3, 13)

617

DEDENT '' (4, 0) (4, 0)

618

DEDENT '' (4, 0) (4, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

619

""")

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

620

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

621

def test_non_ascii_identifiers(self):

622

# Non-ascii identifiers

623

self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

624

NAME 'Örter' (1, 0) (1, 5)

625

OP '=' (1, 6) (1, 7)

626

STRING "'places'" (1, 8) (1, 16)

627

NEWLINE '\\n' (1, 16) (1, 17)

628

NAME 'grün' (2, 0) (2, 4)

629

OP '=' (2, 5) (2, 6)

630

STRING "'green'" (2, 7) (2, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

631

""")

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

632

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

633

def test_unicode(self):

634

# Legacy unicode literals:

635

self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

636

NAME 'Örter' (1, 0) (1, 5)

637

OP '=' (1, 6) (1, 7)

638

STRING "u'places'" (1, 8) (1, 17)

639

NEWLINE '\\n' (1, 17) (1, 18)

640

NAME 'grün' (2, 0) (2, 4)

641

OP '=' (2, 5) (2, 6)

Christian Heimes

0b3847d

2012-06-20 11:17:58 +0200

[diff] [blame]

642

STRING "U'green'" (2, 7) (2, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

643

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

644

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

645

def test_async(self):

646

# Async/await extension:

647

self.check_tokenize("async = 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

648

NAME 'async' (1, 0) (1, 5)

649

OP '=' (1, 6) (1, 7)

650

NUMBER '1' (1, 8) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

651

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

652

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

653

self.check_tokenize("a = (async = 1)", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

654

NAME 'a' (1, 0) (1, 1)

655

OP '=' (1, 2) (1, 3)

656

OP '(' (1, 4) (1, 5)

657

NAME 'async' (1, 5) (1, 10)

658

OP '=' (1, 11) (1, 12)

659

NUMBER '1' (1, 13) (1, 14)

660

OP ')' (1, 14) (1, 15)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

661

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

662

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

663

self.check_tokenize("async()", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

664

NAME 'async' (1, 0) (1, 5)

665

OP '(' (1, 5) (1, 6)

666

OP ')' (1, 6) (1, 7)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

667

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

668

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

669

self.check_tokenize("class async(Bar):pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

670

NAME 'class' (1, 0) (1, 5)

671

NAME 'async' (1, 6) (1, 11)

672

OP '(' (1, 11) (1, 12)

673

NAME 'Bar' (1, 12) (1, 15)

674

OP ')' (1, 15) (1, 16)

675

OP ':' (1, 16) (1, 17)

676

NAME 'pass' (1, 17) (1, 21)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

677

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

678

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

679

self.check_tokenize("class async:pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

680

NAME 'class' (1, 0) (1, 5)

681

NAME 'async' (1, 6) (1, 11)

682

OP ':' (1, 11) (1, 12)

683

NAME 'pass' (1, 12) (1, 16)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

684

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

685

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

686

self.check_tokenize("await = 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

687

NAME 'await' (1, 0) (1, 5)

688

OP '=' (1, 6) (1, 7)

689

NUMBER '1' (1, 8) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

690

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

691

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

692

self.check_tokenize("foo.async", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

693

NAME 'foo' (1, 0) (1, 3)

694

OP '.' (1, 3) (1, 4)

695

NAME 'async' (1, 4) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

696

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

697

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

698

self.check_tokenize("async for a in b: pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

699

NAME 'async' (1, 0) (1, 5)

700

NAME 'for' (1, 6) (1, 9)

701

NAME 'a' (1, 10) (1, 11)

702

NAME 'in' (1, 12) (1, 14)

703

NAME 'b' (1, 15) (1, 16)

704

OP ':' (1, 16) (1, 17)

705

NAME 'pass' (1, 18) (1, 22)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

706

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

707

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

708

self.check_tokenize("async with a as b: pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

709

NAME 'async' (1, 0) (1, 5)

710

NAME 'with' (1, 6) (1, 10)

711

NAME 'a' (1, 11) (1, 12)

712

NAME 'as' (1, 13) (1, 15)

713

NAME 'b' (1, 16) (1, 17)

714

OP ':' (1, 17) (1, 18)

715

NAME 'pass' (1, 19) (1, 23)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

716

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

717

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

718

self.check_tokenize("async.foo", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

719

NAME 'async' (1, 0) (1, 5)

720

OP '.' (1, 5) (1, 6)

721

NAME 'foo' (1, 6) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

722

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

723

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

724

self.check_tokenize("async", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

725

NAME 'async' (1, 0) (1, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

726

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

727

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

728

self.check_tokenize("async\n#comment\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

729

NAME 'async' (1, 0) (1, 5)

730

NEWLINE '\\n' (1, 5) (1, 6)

731

COMMENT '#comment' (2, 0) (2, 8)

732

NL '\\n' (2, 8) (2, 9)

733

NAME 'await' (3, 0) (3, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

734

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

735

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

736

self.check_tokenize("async\n...\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

737

NAME 'async' (1, 0) (1, 5)

738

NEWLINE '\\n' (1, 5) (1, 6)

739

OP '...' (2, 0) (2, 3)

740

NEWLINE '\\n' (2, 3) (2, 4)

741

NAME 'await' (3, 0) (3, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

742

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

743

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

744

self.check_tokenize("async\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

745

NAME 'async' (1, 0) (1, 5)

746

NEWLINE '\\n' (1, 5) (1, 6)

747

NAME 'await' (2, 0) (2, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

748

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

749

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

750

self.check_tokenize("foo.async + 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

751

NAME 'foo' (1, 0) (1, 3)

752

OP '.' (1, 3) (1, 4)

753

NAME 'async' (1, 4) (1, 9)

754

OP '+' (1, 10) (1, 11)

755

NUMBER '1' (1, 12) (1, 13)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

756

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

757

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

758

self.check_tokenize("async def foo(): pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

759

ASYNC 'async' (1, 0) (1, 5)

760

NAME 'def' (1, 6) (1, 9)

761

NAME 'foo' (1, 10) (1, 13)

762

OP '(' (1, 13) (1, 14)

763

OP ')' (1, 14) (1, 15)

764

OP ':' (1, 15) (1, 16)

765

NAME 'pass' (1, 17) (1, 21)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

766

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

767

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

768

self.check_tokenize('''\

async def foo():

def foo(await):

await = 1

if 1:

await

async += 1

''', """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

776

ASYNC 'async' (1, 0) (1, 5)

777

NAME 'def' (1, 6) (1, 9)

778

NAME 'foo' (1, 10) (1, 13)

779

OP '(' (1, 13) (1, 14)

780

OP ')' (1, 14) (1, 15)

781

OP ':' (1, 15) (1, 16)

782

NEWLINE '\\n' (1, 16) (1, 17)

783

INDENT ' ' (2, 0) (2, 2)

784

NAME 'def' (2, 2) (2, 5)

785

NAME 'foo' (2, 6) (2, 9)

786

OP '(' (2, 9) (2, 10)

Yury Selivanov

2015-07-22 13:33:45 +0300

[diff] [blame]

787

AWAIT 'await' (2, 10) (2, 15)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

788

OP ')' (2, 15) (2, 16)

789

OP ':' (2, 16) (2, 17)

790

NEWLINE '\\n' (2, 17) (2, 18)

791

INDENT ' ' (3, 0) (3, 4)

Yury Selivanov

2015-07-22 13:33:45 +0300

[diff] [blame]

792

AWAIT 'await' (3, 4) (3, 9)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

793

OP '=' (3, 10) (3, 11)

794

NUMBER '1' (3, 12) (3, 13)

795

NEWLINE '\\n' (3, 13) (3, 14)

796

DEDENT '' (4, 2) (4, 2)

797

NAME 'if' (4, 2) (4, 4)

798

NUMBER '1' (4, 5) (4, 6)

799

OP ':' (4, 6) (4, 7)

800

NEWLINE '\\n' (4, 7) (4, 8)

801

INDENT ' ' (5, 0) (5, 4)

802

AWAIT 'await' (5, 4) (5, 9)

803

NEWLINE '\\n' (5, 9) (5, 10)

804

DEDENT '' (6, 0) (6, 0)

805

DEDENT '' (6, 0) (6, 0)

806

NAME 'async' (6, 0) (6, 5)

807

OP '+=' (6, 6) (6, 8)

808

NUMBER '1' (6, 9) (6, 10)

809

NEWLINE '\\n' (6, 10) (6, 11)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

810

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

811

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

812

self.check_tokenize('''\

813

async def foo():

814

async for i in 1: pass''', """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

815

ASYNC 'async' (1, 0) (1, 5)

816

NAME 'def' (1, 6) (1, 9)

817

NAME 'foo' (1, 10) (1, 13)

818

OP '(' (1, 13) (1, 14)

819

OP ')' (1, 14) (1, 15)

820

OP ':' (1, 15) (1, 16)

821

NEWLINE '\\n' (1, 16) (1, 17)

822

INDENT ' ' (2, 0) (2, 2)

823

ASYNC 'async' (2, 2) (2, 7)

824

NAME 'for' (2, 8) (2, 11)

825

NAME 'i' (2, 12) (2, 13)

826

NAME 'in' (2, 14) (2, 16)

827

NUMBER '1' (2, 17) (2, 18)

828

OP ':' (2, 18) (2, 19)

829

NAME 'pass' (2, 20) (2, 24)

830

DEDENT '' (3, 0) (3, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

831

""")

Yury Selivanov

2015-07-22 13:33:45 +0300

[diff] [blame]

832

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

833

self.check_tokenize('''async def foo(async): await''', """\

Yury Selivanov

2015-07-22 13:33:45 +0300

[diff] [blame]

834

ASYNC 'async' (1, 0) (1, 5)

835

NAME 'def' (1, 6) (1, 9)

836

NAME 'foo' (1, 10) (1, 13)

837

OP '(' (1, 13) (1, 14)

838

ASYNC 'async' (1, 14) (1, 19)

839

OP ')' (1, 19) (1, 20)

840

OP ':' (1, 20) (1, 21)

841

AWAIT 'await' (1, 22) (1, 27)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

842

""")

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

843

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

844

self.check_tokenize('''\

def f():

def baz(): pass

async def bar(): pass

849

850

await = 2''', """\

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

851

NAME 'def' (1, 0) (1, 3)

852

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

857

NL '\\n' (2, 0) (2, 1)

858

INDENT ' ' (3, 0) (3, 2)

859

NAME 'def' (3, 2) (3, 5)

860

NAME 'baz' (3, 6) (3, 9)

861

OP '(' (3, 9) (3, 10)

862

OP ')' (3, 10) (3, 11)

863

OP ':' (3, 11) (3, 12)

864

NAME 'pass' (3, 13) (3, 17)

865

NEWLINE '\\n' (3, 17) (3, 18)

866

ASYNC 'async' (4, 2) (4, 7)

867

NAME 'def' (4, 8) (4, 11)

868

NAME 'bar' (4, 12) (4, 15)

869

OP '(' (4, 15) (4, 16)

870

OP ')' (4, 16) (4, 17)

871

OP ':' (4, 17) (4, 18)

872

NAME 'pass' (4, 19) (4, 23)

873

NEWLINE '\\n' (4, 23) (4, 24)

874

NL '\\n' (5, 0) (5, 1)

875

NAME 'await' (6, 2) (6, 7)

876

OP '=' (6, 8) (6, 9)

877

NUMBER '2' (6, 10) (6, 11)

878

DEDENT '' (7, 0) (7, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

879

""")

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

880

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

881

self.check_tokenize('''\

async def f():

def baz(): pass

async def bar(): pass

886

887

await = 2''', """\

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

888

ASYNC 'async' (1, 0) (1, 5)

889

NAME 'def' (1, 6) (1, 9)

890

NAME 'f' (1, 10) (1, 11)

891

OP '(' (1, 11) (1, 12)

892

OP ')' (1, 12) (1, 13)

893

OP ':' (1, 13) (1, 14)

894

NEWLINE '\\n' (1, 14) (1, 15)

895

NL '\\n' (2, 0) (2, 1)

896

INDENT ' ' (3, 0) (3, 2)

897

NAME 'def' (3, 2) (3, 5)

898

NAME 'baz' (3, 6) (3, 9)

899

OP '(' (3, 9) (3, 10)

900

OP ')' (3, 10) (3, 11)

901

OP ':' (3, 11) (3, 12)

902

NAME 'pass' (3, 13) (3, 17)

903

NEWLINE '\\n' (3, 17) (3, 18)

904

ASYNC 'async' (4, 2) (4, 7)

905

NAME 'def' (4, 8) (4, 11)

906

NAME 'bar' (4, 12) (4, 15)

907

OP '(' (4, 15) (4, 16)

908

OP ')' (4, 16) (4, 17)

909

OP ':' (4, 17) (4, 18)

910

NAME 'pass' (4, 19) (4, 23)

911

NEWLINE '\\n' (4, 23) (4, 24)

912

NL '\\n' (5, 0) (5, 1)

913

AWAIT 'await' (6, 2) (6, 7)

914

OP '=' (6, 8) (6, 9)

915

NUMBER '2' (6, 10) (6, 11)

916

DEDENT '' (7, 0) (7, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

917

""")

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

918

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

919

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

920

def decistmt(s):

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

921

result = []

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

922

g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

923

for toknum, tokval, _, _, _ in g:

924

if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens

result.extend([

(NAME, 'Decimal'),

(OP, '('),

(STRING, repr(tokval)),

(OP, ')')

])

else:

result.append((toknum, tokval))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

933

return untokenize(result).decode('utf-8')

934

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

935

class TestMisc(TestCase):

936

937

def test_decistmt(self):

938

# Substitute Decimals for floats in a string of statements.

939

# This is an example from the docs.

940

941

from decimal import Decimal

942

s = '+21.3e-5*-.1234/81.7'

943

self.assertEqual(decistmt(s),

944

"+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")

945

946

# The format of the exponent is inherited from the platform C library.

947

# Known cases are "e-007" (Windows) and "e-07" (not Windows). Since

948

# we're only showing 11 digits, and the 12th isn't close to 5, the

949

# rest of the output should be platform-independent.

950

self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')

951

952

# Output from calculations with Decimal should be identical across all

953

# platforms.

954

self.assertEqual(eval(decistmt(s)),

955

Decimal('-3.217160342717258261933904529E-7'))

956

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

957

958

class TestTokenizerAdheresToPep0263(TestCase):

959

"""

960

Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.

961

"""

962

963

def _testFile(self, filename):

964

path = os.path.join(os.path.dirname(__file__), filename)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

965

TestRoundtrip.check_roundtrip(self, open(path, 'rb'))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

966

967

def test_utf8_coding_cookie_and_no_utf8_bom(self):

Ned Deily

2ea6fcc

2011-07-19 16:15:27 -0700

[diff] [blame]

968

f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

969

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

970

971

def test_latin1_coding_cookie_and_utf8_bom(self):

972

"""

973

As per PEP 0263, if a file starts with a utf-8 BOM signature, the only

974

allowed encoding for the comment is 'utf-8'. The text file used in

975

this test starts with a BOM signature, but specifies latin1 as the

976

coding, so verify that a SyntaxError is raised, which matches the

977

behaviour of the interpreter when it encounters a similar condition.

978

"""

979

f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'

Benjamin Peterson

c9c0f20

2009-06-30 23:06:06 +0000

[diff] [blame]

980

self.assertRaises(SyntaxError, self._testFile, f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

981

982

def test_no_coding_cookie_and_utf8_bom(self):

983

f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

984

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

985

986

def test_utf8_coding_cookie_and_utf8_bom(self):

987

f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

988

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

989

Florent Xicluna

11f0b41

2012-07-07 12:13:35 +0200

[diff] [blame]

990

def test_bad_coding_cookie(self):

991

self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')

992

self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')

993

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

994

995

class Test_Tokenize(TestCase):

996

997

def test__tokenize_decodes_with_specified_encoding(self):

998

literal = '"ЉЊЈЁЂ"'

999

line = literal.encode('utf-8')

first = False

def readline():

nonlocal first

if not first:

first = True

return line

else:

return b''

# skip the initial encoding token and the end token

1010

tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]

1011

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1012

self.assertEqual(tokens, expected_tokens,

1013

"bytes not decoded with encoding")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1014

1015

def test__tokenize_does_not_decode_with_encoding_none(self):

literal = '"ЉЊЈЁЂ"'

first = False

def readline():

nonlocal first

if not first:

first = True

return literal

else:

return b''

# skip the end token

tokens = list(_tokenize(readline, encoding=None))[:-1]

1028

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1029

self.assertEqual(tokens, expected_tokens,

1030

"string not tokenized when encoding is None")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1031

1032

1033

class TestDetectEncoding(TestCase):

1034

1035

def get_readline(self, lines):

index = 0

def readline():

nonlocal index

if index == len(lines):

raise StopIteration

line = lines[index]

index += 1

return line

return readline

def test_no_bom_no_encoding_cookie(self):

1047

lines = (

1048

b'# something\n',

1049

b'print(something)\n',

1050

b'do_something(else)\n'

1051

)

1052

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1053

self.assertEqual(encoding, 'utf-8')

1054

self.assertEqual(consumed_lines, list(lines[:2]))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1055

1056

def test_bom_no_cookie(self):

1057

lines = (

1058

b'\xef\xbb\xbf# something\n',

1059

b'print(something)\n',

1060

b'do_something(else)\n'

1061

)

1062

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1063

self.assertEqual(encoding, 'utf-8-sig')

1064

self.assertEqual(consumed_lines,

1065

[b'# something\n', b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1066

1067

def test_cookie_first_line_no_bom(self):

1068

lines = (

1069

b'# -*- coding: latin-1 -*-\n',

1070

b'print(something)\n',

1071

b'do_something(else)\n'

1072

)

1073

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1074

self.assertEqual(encoding, 'iso-8859-1')

1075

self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1076

1077

def test_matched_bom_and_cookie_first_line(self):

1078

lines = (

1079

b'\xef\xbb\xbf# coding=utf-8\n',

1080

b'print(something)\n',

1081

b'do_something(else)\n'

1082

)

1083

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1084

self.assertEqual(encoding, 'utf-8-sig')

1085

self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1086

1087

def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):

1088

lines = (

1089

b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',

1090

b'print(something)\n',

1091

b'do_something(else)\n'

1092

)

1093

readline = self.get_readline(lines)

1094

self.assertRaises(SyntaxError, detect_encoding, readline)

1095

1096

def test_cookie_second_line_no_bom(self):

1097

lines = (

1098

b'#! something\n',

1099

b'# vim: set fileencoding=ascii :\n',

1100

b'print(something)\n',

1101

b'do_something(else)\n'

1102

)

1103

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1104

self.assertEqual(encoding, 'ascii')

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1105

expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1106

self.assertEqual(consumed_lines, expected)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1107

1108

def test_matched_bom_and_cookie_second_line(self):

1109

lines = (

1110

b'\xef\xbb\xbf#! something\n',

1111

b'f# coding=utf-8\n',

1112

b'print(something)\n',

1113

b'do_something(else)\n'

1114

)

1115

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1116

self.assertEqual(encoding, 'utf-8-sig')

1117

self.assertEqual(consumed_lines,

1118

[b'#! something\n', b'f# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1119

1120

def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):

1121

lines = (

1122

b'\xef\xbb\xbf#! something\n',

1123

b'# vim: set fileencoding=ascii :\n',

1124

b'print(something)\n',

1125

b'do_something(else)\n'

1126

)

1127

readline = self.get_readline(lines)

1128

self.assertRaises(SyntaxError, detect_encoding, readline)

1129

Serhiy Storchaka

768c16c

2014-01-09 18:36:09 +0200

[diff] [blame]

1130

def test_cookie_second_line_noncommented_first_line(self):

1131

lines = (

1132

b"print('\xc2\xa3')\n",

1133

b'# vim: set fileencoding=iso8859-15 :\n',

1134

b"print('\xe2\x82\xac')\n"

1135

)

1136

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1137

self.assertEqual(encoding, 'utf-8')

1138

expected = [b"print('\xc2\xa3')\n"]

1139

self.assertEqual(consumed_lines, expected)

1140

1141

def test_cookie_second_line_commented_first_line(self):

1142

lines = (

1143

b"#print('\xc2\xa3')\n",

1144

b'# vim: set fileencoding=iso8859-15 :\n',

1145

b"print('\xe2\x82\xac')\n"

1146

)

1147

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1148

self.assertEqual(encoding, 'iso8859-15')

1149

expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']

1150

self.assertEqual(consumed_lines, expected)

1151

1152

def test_cookie_second_line_empty_first_line(self):

1153

lines = (

1154

b'\n',

1155

b'# vim: set fileencoding=iso8859-15 :\n',

1156

b"print('\xe2\x82\xac')\n"

1157

)

1158

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1159

self.assertEqual(encoding, 'iso8859-15')

1160

expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']

1161

self.assertEqual(consumed_lines, expected)

1162

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1163

def test_latin1_normalization(self):

1164

# See get_normal_name() in tokenizer.c.

1165

encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",

1166

"iso-8859-1-unix", "iso-latin-1-mac")

1167

for encoding in encodings:

1168

for rep in ("-", "_"):

1169

enc = encoding.replace("-", rep)

1170

lines = (b"#!/usr/bin/python\n",

1171

b"# coding: " + enc.encode("ascii") + b"\n",

1172

b"print(things)\n",

1173

b"do_something += 4\n")

1174

rl = self.get_readline(lines)

1175

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1176

self.assertEqual(found, "iso-8859-1")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1177

Martin v. Löwis

63674f4

2012-04-20 14:36:47 +0200

[diff] [blame]

1178

def test_syntaxerror_latin1(self):

1179

# Issue 14629: need to raise SyntaxError if the first

1180

# line(s) have non-UTF-8 characters

1181

lines = (

1182

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1183

)

1184

readline = self.get_readline(lines)

1185

self.assertRaises(SyntaxError, detect_encoding, readline)

1186

1187

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1188

def test_utf8_normalization(self):

1189

# See get_normal_name() in tokenizer.c.

1190

encodings = ("utf-8", "utf-8-mac", "utf-8-unix")

1191

for encoding in encodings:

1192

for rep in ("-", "_"):

1193

enc = encoding.replace("-", rep)

1194

lines = (b"#!/usr/bin/python\n",

1195

b"# coding: " + enc.encode("ascii") + b"\n",

1196

b"1 + 3\n")

1197

rl = self.get_readline(lines)

1198

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1199

self.assertEqual(found, "utf-8")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1200

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1201

def test_short_files(self):

1202

readline = self.get_readline((b'print(something)\n',))

1203

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1204

self.assertEqual(encoding, 'utf-8')

1205

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1206

1207

encoding, consumed_lines = detect_encoding(self.get_readline(()))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1208

self.assertEqual(encoding, 'utf-8')

1209

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1210

1211

readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))

1212

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1213

self.assertEqual(encoding, 'utf-8-sig')

1214

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1215

1216

readline = self.get_readline((b'\xef\xbb\xbf',))

1217

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1218

self.assertEqual(encoding, 'utf-8-sig')

1219

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1220

Benjamin Peterson

433f32c

2008-12-12 01:25:05 +0000

[diff] [blame]

1221

readline = self.get_readline((b'# coding: bad\n',))

1222

self.assertRaises(SyntaxError, detect_encoding, readline)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1223

Serhiy Storchaka

dafea85

2013-09-16 23:51:56 +0300

[diff] [blame]

1224

def test_false_encoding(self):

1225

# Issue 18873: "Encoding" detected in non-comment lines

1226

readline = self.get_readline((b'print("#coding=fake")',))

1227

encoding, consumed_lines = detect_encoding(readline)

1228

self.assertEqual(encoding, 'utf-8')

1229

self.assertEqual(consumed_lines, [b'print("#coding=fake")'])

1230

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1231

def test_open(self):

1232

filename = support.TESTFN + '.py'

1233

self.addCleanup(support.unlink, filename)

1234

1235

# test coding cookie

1236

for encoding in ('iso-8859-15', 'utf-8'):

1237

with open(filename, 'w', encoding=encoding) as fp:

1238

print("# coding: %s" % encoding, file=fp)

1239

print("print('euro:\u20ac')", file=fp)

1240

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1241

self.assertEqual(fp.encoding, encoding)

1242

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1243

1244

# test BOM (no coding cookie)

1245

with open(filename, 'w', encoding='utf-8-sig') as fp:

1246

print("print('euro:\u20ac')", file=fp)

1247

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1248

self.assertEqual(fp.encoding, 'utf-8-sig')

1249

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1250

Brett Cannon

c33f3f2

2012-04-20 13:23:54 -0400

[diff] [blame]

1251

def test_filename_in_exception(self):

1252

# When possible, include the file name in the exception.

1253

path = 'some_file_path'

1254

lines = (

1255

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1256

)

1257

class Bunk:

1258

def __init__(self, lines, path):

self.name = path

self._lines = lines

self._index = 0

def readline(self):

if self._index == len(lines):

1265

raise StopIteration

1266

line = lines[self._index]

self._index += 1

return line

with self.assertRaises(SyntaxError):

1271

ins = Bunk(lines, path)

1272

# Make sure lacking a name isn't an issue.

1273

del ins.name

1274

detect_encoding(ins.readline)

1275

with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):

1276

ins = Bunk(lines, path)

1277

detect_encoding(ins.readline)

1278

Victor Stinner

387729e

2015-05-26 00:43:58 +0200

[diff] [blame]

1279

def test_open_error(self):

1280

# Issue #23840: open() must close the binary file on error

1281

m = BytesIO(b'#coding:xxx')

1282

with mock.patch('tokenize._builtin_open', return_value=m):

1283

self.assertRaises(SyntaxError, tokenize_open, 'foobar')

1284

self.assertTrue(m.closed)

1285

1286

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1287

class TestTokenize(TestCase):

1288

1289

def test_tokenize(self):

1290

import tokenize as tokenize_module

1291

encoding = object()

1292

encoding_used = None

1293

def mock_detect_encoding(readline):

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1294

return encoding, [b'first', b'second']

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1295

1296

def mock__tokenize(readline, encoding):

1297

nonlocal encoding_used

1298

encoding_used = encoding

1299

out = []

1300

while True:

1301

next_line = readline()

1302

if next_line:

1303

out.append(next_line)

continue

return out

counter = 0

def mock_readline():

nonlocal counter

counter += 1

if counter == 5:

return b''

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1313

return str(counter).encode()

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1314

1315

orig_detect_encoding = tokenize_module.detect_encoding

1316

orig__tokenize = tokenize_module._tokenize

1317

tokenize_module.detect_encoding = mock_detect_encoding

1318

tokenize_module._tokenize = mock__tokenize

1319

try:

1320

results = tokenize(mock_readline)

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1321

self.assertEqual(list(results),

1322

[b'first', b'second', b'1', b'2', b'3', b'4'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1323

finally:

1324

tokenize_module.detect_encoding = orig_detect_encoding

1325

tokenize_module._tokenize = orig__tokenize

1326

1327

self.assertTrue(encoding_used, encoding)

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

1328

Yury Selivanov

8085b80

2015-05-18 12:50:52 -0400

[diff] [blame]

1329

def test_oneline_defs(self):

1330

buf = []

1331

for i in range(500):

1332

buf.append('def i{i}(): return {i}'.format(i=i))

buf.append('OK')

buf = '\n'.join(buf)

# Test that 500 consequent, one-line defs is OK

1337

toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))

1338

self.assertEqual(toks[-2].string, 'OK') # [-1] is always ENDMARKER

1339

Meador Inge

00c7f85

2012-01-19 00:44:45 -0600

[diff] [blame]

1340

def assertExactTypeEqual(self, opstr, *optypes):

1341

tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))

1342

num_optypes = len(optypes)

1343

self.assertEqual(len(tokens), 2 + num_optypes)

1344

self.assertEqual(token.tok_name[tokens[0].exact_type],

1345

token.tok_name[ENCODING])

1346

for i in range(num_optypes):

1347

self.assertEqual(token.tok_name[tokens[i + 1].exact_type],

1348

token.tok_name[optypes[i]])

1349

self.assertEqual(token.tok_name[tokens[1 + num_optypes].exact_type],

1350

token.tok_name[token.ENDMARKER])

1351

1352

def test_exact_type(self):

1353

self.assertExactTypeEqual('()', token.LPAR, token.RPAR)

1354

self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)

1355

self.assertExactTypeEqual(':', token.COLON)

1356

self.assertExactTypeEqual(',', token.COMMA)

1357

self.assertExactTypeEqual(';', token.SEMI)

1358

self.assertExactTypeEqual('+', token.PLUS)

1359

self.assertExactTypeEqual('-', token.MINUS)

1360

self.assertExactTypeEqual('*', token.STAR)

1361

self.assertExactTypeEqual('/', token.SLASH)

1362

self.assertExactTypeEqual('|', token.VBAR)

1363

self.assertExactTypeEqual('&', token.AMPER)

1364

self.assertExactTypeEqual('<', token.LESS)

1365

self.assertExactTypeEqual('>', token.GREATER)

1366

self.assertExactTypeEqual('=', token.EQUAL)

1367

self.assertExactTypeEqual('.', token.DOT)

1368

self.assertExactTypeEqual('%', token.PERCENT)

1369

self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)

1370

self.assertExactTypeEqual('==', token.EQEQUAL)

1371

self.assertExactTypeEqual('!=', token.NOTEQUAL)

1372

self.assertExactTypeEqual('<=', token.LESSEQUAL)

1373

self.assertExactTypeEqual('>=', token.GREATEREQUAL)

1374

self.assertExactTypeEqual('~', token.TILDE)

1375

self.assertExactTypeEqual('^', token.CIRCUMFLEX)

1376

self.assertExactTypeEqual('<<', token.LEFTSHIFT)

1377

self.assertExactTypeEqual('>>', token.RIGHTSHIFT)

1378

self.assertExactTypeEqual('**', token.DOUBLESTAR)

1379

self.assertExactTypeEqual('+=', token.PLUSEQUAL)

1380

self.assertExactTypeEqual('-=', token.MINEQUAL)

1381

self.assertExactTypeEqual('*=', token.STAREQUAL)

1382

self.assertExactTypeEqual('/=', token.SLASHEQUAL)

1383

self.assertExactTypeEqual('%=', token.PERCENTEQUAL)

1384

self.assertExactTypeEqual('&=', token.AMPEREQUAL)

1385

self.assertExactTypeEqual('|=', token.VBAREQUAL)

1386

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1387

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1388

self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)

1389

self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)

1390

self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)

1391

self.assertExactTypeEqual('//', token.DOUBLESLASH)

1392

self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)

Jim Fasarakis-Hilliard

d4914e9

2017-03-14 22:16:15 +0200

[diff] [blame]

1393

self.assertExactTypeEqual('...', token.ELLIPSIS)

1394

self.assertExactTypeEqual('->', token.RARROW)

Meador Inge

00c7f85

2012-01-19 00:44:45 -0600

[diff] [blame]

1395

self.assertExactTypeEqual('@', token.AT)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

1396

self.assertExactTypeEqual('@=', token.ATEQUAL)

Meador Inge

00c7f85

2012-01-19 00:44:45 -0600

[diff] [blame]

1397

1398

self.assertExactTypeEqual('a**2+b**2==c**2',

1399

NAME, token.DOUBLESTAR, NUMBER,

1400

token.PLUS,

1401

NAME, token.DOUBLESTAR, NUMBER,

1402

token.EQEQUAL,

1403

NAME, token.DOUBLESTAR, NUMBER)

1404

self.assertExactTypeEqual('{1, 2, 3}',

1405

token.LBRACE,

1406

token.NUMBER, token.COMMA,

1407

token.NUMBER, token.COMMA,

1408

token.NUMBER,

1409

token.RBRACE)

1410

self.assertExactTypeEqual('^(x & 0x1)',

1411

token.CIRCUMFLEX,

1412

token.LPAR,

1413

token.NAME, token.AMPER, token.NUMBER,

1414

token.RPAR)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1415

Ezio Melotti

fafa8b7

2012-11-03 17:46:51 +0200

[diff] [blame]

1416

def test_pathological_trailing_whitespace(self):

1417

# See http://bugs.python.org/issue16152

1418

self.assertExactTypeEqual('@ ', token.AT)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1419

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1420

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1421

class UntokenizeTest(TestCase):

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1422

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1423

def test_bad_input_order(self):

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1424

# raise if previous row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

u = Untokenizer()

u.prev_row = 2

u.prev_col = 2

with self.assertRaises(ValueError) as cm:

1429

u.add_whitespace((1,3))

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1430

self.assertEqual(cm.exception.args[0],

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1431

'start (1,3) precedes previous end (2,2)')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1432

# raise if previous column in row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1433

self.assertRaises(ValueError, u.add_whitespace, (2,1))

1434

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1435

def test_backslash_continuation(self):

1436

# The problem is that <whitespace>\<newline> leaves no token

u = Untokenizer()

u.prev_row = 1

u.prev_col = 1

u.tokens = []

u.add_whitespace((2, 0))

1442

self.assertEqual(u.tokens, ['\\\n'])

1443

u.prev_row = 2

1444

u.add_whitespace((4, 4))

1445

self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1446

TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1447

Terry Jan Reedy

5b8d2c3

2014-02-17 23:12:16 -0500

[diff] [blame]

1448

def test_iter_compat(self):

1449

u = Untokenizer()

1450

token = (NAME, 'Hello')

1451

tokens = [(ENCODING, 'utf-8'), token]

1452

u.compat(token, iter([]))

1453

self.assertEqual(u.tokens, ["Hello "])

1454

u = Untokenizer()

1455

self.assertEqual(u.untokenize(iter([token])), 'Hello ')

1456

u = Untokenizer()

1457

self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')

1458

self.assertEqual(u.encoding, 'utf-8')

1459

self.assertEqual(untokenize(iter(tokens)), b'Hello ')

1460

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1461

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1462

class TestRoundtrip(TestCase):

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1463

1464

def check_roundtrip(self, f):

1465

"""

1466

Test roundtrip for `untokenize`. `f` is an open file or a string.

1467

The source code in f is tokenized to both 5- and 2-tuples.

1468

Both sequences are converted back to source code via

1469

tokenize.untokenize(), and the latter tokenized again to 2-tuples.

1470

The test fails if the 3 pair tokenizations do not match.

1471

1472

When untokenize bugs are fixed, untokenize with 5-tuples should

1473

reproduce code that does not contain a backslash continuation

1474

following spaces. A proper test should test this.

1475

"""

1476

# Get source code and original tokenizations

1477

if isinstance(f, str):

1478

code = f.encode('utf-8')

else:

code = f.read()

f.close()

readline = iter(code.splitlines(keepends=True)).__next__

1483

tokens5 = list(tokenize(readline))

1484

tokens2 = [tok[:2] for tok in tokens5]

1485

# Reproduce tokens2 from pairs

1486

bytes_from2 = untokenize(tokens2)

1487

readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__

1488

tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]

1489

self.assertEqual(tokens2_from2, tokens2)

1490

# Reproduce tokens2 from 5-tuples

1491

bytes_from5 = untokenize(tokens5)

1492

readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__

1493

tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]

1494

self.assertEqual(tokens2_from5, tokens2)

1495

1496

def test_roundtrip(self):

1497

# There are some standard formatting practices that are easy to get right.

1498

1499

self.check_roundtrip("if x == 1:\n"

1500

" print(x)\n")

1501

self.check_roundtrip("# This is a comment\n"

1502

"# This also")

1503

1504

# Some people use different formatting conventions, which makes

1505

# untokenize a little trickier. Note that this test involves trailing

1506

# whitespace after the colon. Note that we use hex escapes to make the

1507

# two trailing blanks apparent in the expected output.

1508

1509

self.check_roundtrip("if x == 1 : \n"

1510

" print(x)\n")

1511

fn = support.findfile("tokenize_tests.txt")

1512

with open(fn, 'rb') as f:

1513

self.check_roundtrip(f)

1514

self.check_roundtrip("if x == 1:\n"

1515

" # A comment by itself.\n"

1516

" print(x) # Comment here, too.\n"

1517

" # Another comment.\n"

1518

"after_if = True\n")

1519

self.check_roundtrip("if (x # The comments need to go in the right place\n"

1520

" == 1):\n"

1521

" print('x==1')\n")

1522

self.check_roundtrip("class Test: # A comment here\n"

1523

" # A comment with weird indent\n"

1524

" after_com = 5\n"

1525

" def x(m): return m*5 # a one liner\n"

1526

" def y(m): # A whitespace after the colon\n"

1527

" return y*4 # 3-space indent\n")

1528

1529

# Some error-handling code

1530

self.check_roundtrip("try: import somemodule\n"

1531

"except ImportError: # comment\n"

1532

" print('Can not import' # comment2\n)"

1533

"else: print('Loaded')\n")

1534

1535

def test_continuation(self):

1536

# Balancing continuation

1537

self.check_roundtrip("a = (3,4, \n"

"5,6)\n"

"y = [3, 4,\n"

"5]\n"

"z = {'a': 5,\n"

"'b':15, 'c':True}\n"

1543

"x = len(y) + 5 - a[\n"

"3] - a[2]\n"

"+ len(z) - z[\n"

"'b']\n")

def test_backslash_continuation(self):

1549

# Backslash means line continuation, except for comments

1550

self.check_roundtrip("x=1+\\\n"

1551

"1\n"

1552

"# This is a comment\\\n"

1553

"# This also\n")

1554

self.check_roundtrip("# Comment \\\n"

1555

"x = 0")

1556

1557

def test_string_concatenation(self):

1558

# Two string literals on the same line

1559

self.check_roundtrip("'' ''")

1560

1561

def test_random_files(self):

1562

# Test roundtrip on random python modules.

1563

# pass the '-ucpu' option to process the full directory.

1564

1565

import glob, random

1566

fn = support.findfile("tokenize_tests.txt")

1567

tempdir = os.path.dirname(fn) or os.curdir

1568

testfiles = glob.glob(os.path.join(tempdir, "test*.py"))

1569

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

1570

# Tokenize is broken on test_pep3131.py because regular expressions are

1571

# broken on the obscure unicode identifiers in it. *sigh*

1572

# With roundtrip extended to test the 5-tuple mode of untokenize,

1573

# 7 more testfiles fail. Remove them also until the failure is diagnosed.

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1574

Zachary Ware

724f6a6

2016-09-09 12:55:37 -0700

[diff] [blame]

1575

testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1576

for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):

1577

testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)

1578

1579

if not support.is_resource_enabled("cpu"):

1580

testfiles = random.sample(testfiles, 10)

1581

1582

for testfile in testfiles:

1583

with open(testfile, 'rb') as f:

1584

with self.subTest(file=testfile):

1585

self.check_roundtrip(f)

1586

1587

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1588

def roundtrip(self, code):

1589

if isinstance(code, str):

1590

code = code.encode('utf-8')

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1591

return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1592

1593

def test_indentation_semantics_retained(self):

1594

"""

1595

Ensure that although whitespace might be mutated in a roundtrip,

1596

the semantic meaning of the indentation remains consistent.

1597

"""

1598

code = "if False:\n\tx=3\n\tx=3\n"

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1599

codelines = self.roundtrip(code).split('\n')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1600

self.assertEqual(codelines[1], codelines[2])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1601

self.check_roundtrip(code)

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1602

1603

Thomas Wouters

49fd7fa

2006-04-21 10:40:58 +0000

[diff] [blame]

1604

if __name__ == "__main__":

Brett Cannon