Blame - Lib/test/test_tokenize.py - platform/external/python/cpython3

2015-10-06 18:16:28 +0300

[diff] [blame]

1

from test import support

2

from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,

3

STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,

4

open as tokenize_open, Untokenizer)

5

from io import BytesIO

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

6

from unittest import TestCase, mock

7

from test.test_grammar import (VALID_UNDERSCORE_LITERALS,

8

INVALID_UNDERSCORE_LITERALS)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

9

import os

10

import token

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

11

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

12

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

13

class TokenizeTest(TestCase):

14

# Tests for the tokenize module.

Jason R. Coombs

7cf3638

2015-06-20 19:13:50 -0400

[diff] [blame]

15

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

16

# The tests can be really simple. Given a small fragment of source

17

# code, print out a table with tokens. The ENDMARKER is omitted for

18

# brevity.

19

20

def check_tokenize(self, s, expected):

21

# Format the tokens in s in a table format.

22

# The ENDMARKER is omitted.

23

result = []

24

f = BytesIO(s.encode('utf-8'))

25

for type, token, start, end, line in tokenize(f.readline):

26

if type == ENDMARKER:

27

break

28

type = tok_name[type]

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

29

result.append(f" {type:10} {token!r:13} {start} {end}")

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

30

self.assertEqual(result,

31

[" ENCODING 'utf-8' (0, 0) (0, 0)"] +

32

expected.rstrip().splitlines())

33

34

def test_basic(self):

35

self.check_tokenize("1 + 1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

36

NUMBER '1' (1, 0) (1, 1)

37

OP '+' (1, 2) (1, 3)

38

NUMBER '1' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

39

""")

40

self.check_tokenize("if False:\n"

41

" # NL\n"

Albert-Jan Nijburg

c471ca4

2017-05-24 12:31:57 +0100

[diff] [blame]

42

" \n"

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

43

" True = False # NEWLINE\n", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

44

NAME 'if' (1, 0) (1, 2)

45

NAME 'False' (1, 3) (1, 8)

46

OP ':' (1, 8) (1, 9)

47

NEWLINE '\\n' (1, 9) (1, 10)

48

COMMENT '# NL' (2, 4) (2, 8)

49

NL '\\n' (2, 8) (2, 9)

Albert-Jan Nijburg

c471ca4

2017-05-24 12:31:57 +0100

[diff] [blame]

50

NL '\\n' (3, 4) (3, 5)

51

INDENT ' ' (4, 0) (4, 4)

52

NAME 'True' (4, 4) (4, 8)

53

OP '=' (4, 9) (4, 10)

54

NAME 'False' (4, 11) (4, 16)

55

COMMENT '# NEWLINE' (4, 17) (4, 26)

56

NEWLINE '\\n' (4, 26) (4, 27)

57

DEDENT '' (5, 0) (5, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

58

""")

59

indent_error_file = b"""\

def k(x):

x += 2

x += 5

"""

readline = BytesIO(indent_error_file).readline

65

with self.assertRaisesRegex(IndentationError,

66

"unindent does not match any "

67

"outer indentation level"):

68

for tok in tokenize(readline):

69

pass

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

70

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

71

def test_int(self):

72

# Ordinary integers and binary operators

73

self.check_tokenize("0xff <= 255", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

74

NUMBER '0xff' (1, 0) (1, 4)

75

OP '<=' (1, 5) (1, 7)

76

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

77

""")

78

self.check_tokenize("0b10 <= 255", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

79

NUMBER '0b10' (1, 0) (1, 4)

80

OP '<=' (1, 5) (1, 7)

81

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

82

""")

83

self.check_tokenize("0o123 <= 0O123", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

84

NUMBER '0o123' (1, 0) (1, 5)

85

OP '<=' (1, 6) (1, 8)

86

NUMBER '0O123' (1, 9) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

87

""")

88

self.check_tokenize("1234567 > ~0x15", """\

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

89

NUMBER '1234567' (1, 0) (1, 7)

90

OP '>' (1, 8) (1, 9)

91

OP '~' (1, 10) (1, 11)

92

NUMBER '0x15' (1, 11) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

93

""")

94

self.check_tokenize("2134568 != 1231515", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

95

NUMBER '2134568' (1, 0) (1, 7)

96

OP '!=' (1, 8) (1, 10)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

97

NUMBER '1231515' (1, 11) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

98

""")

99

self.check_tokenize("(-124561-1) & 200000000", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

100

OP '(' (1, 0) (1, 1)

101

OP '-' (1, 1) (1, 2)

102

NUMBER '124561' (1, 2) (1, 8)

103

OP '-' (1, 8) (1, 9)

104

NUMBER '1' (1, 9) (1, 10)

105

OP ')' (1, 10) (1, 11)

106

OP '&' (1, 12) (1, 13)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

107

NUMBER '200000000' (1, 14) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

108

""")

109

self.check_tokenize("0xdeadbeef != -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

110

NUMBER '0xdeadbeef' (1, 0) (1, 10)

111

OP '!=' (1, 11) (1, 13)

112

OP '-' (1, 14) (1, 15)

113

NUMBER '1' (1, 15) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

114

""")

115

self.check_tokenize("0xdeadc0de & 12345", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

116

NUMBER '0xdeadc0de' (1, 0) (1, 10)

117

OP '&' (1, 11) (1, 12)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

118

NUMBER '12345' (1, 13) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

119

""")

120

self.check_tokenize("0xFF & 0x15 | 1234", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

121

NUMBER '0xFF' (1, 0) (1, 4)

122

OP '&' (1, 5) (1, 6)

123

NUMBER '0x15' (1, 7) (1, 11)

124

OP '|' (1, 12) (1, 13)

125

NUMBER '1234' (1, 14) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

126

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

127

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

128

def test_long(self):

129

# Long integers

130

self.check_tokenize("x = 0", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

131

NAME 'x' (1, 0) (1, 1)

132

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

133

NUMBER '0' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

134

""")

135

self.check_tokenize("x = 0xfffffffffff", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

136

NAME 'x' (1, 0) (1, 1)

137

OP '=' (1, 2) (1, 3)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

138

NUMBER '0xfffffffffff' (1, 4) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

139

""")

140

self.check_tokenize("x = 123141242151251616110", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

141

NAME 'x' (1, 0) (1, 1)

142

OP '=' (1, 2) (1, 3)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

143

NUMBER '123141242151251616110' (1, 4) (1, 25)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

144

""")

145

self.check_tokenize("x = -15921590215012591", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

146

NAME 'x' (1, 0) (1, 1)

147

OP '=' (1, 2) (1, 3)

148

OP '-' (1, 4) (1, 5)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

149

NUMBER '15921590215012591' (1, 5) (1, 22)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

150

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

151

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

152

def test_float(self):

153

# Floating point numbers

154

self.check_tokenize("x = 3.14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

155

NAME 'x' (1, 0) (1, 1)

156

OP '=' (1, 2) (1, 3)

157

NUMBER '3.14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

158

""")

159

self.check_tokenize("x = 314159.", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

160

NAME 'x' (1, 0) (1, 1)

161

OP '=' (1, 2) (1, 3)

162

NUMBER '314159.' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

163

""")

164

self.check_tokenize("x = .314159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

165

NAME 'x' (1, 0) (1, 1)

166

OP '=' (1, 2) (1, 3)

167

NUMBER '.314159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

168

""")

169

self.check_tokenize("x = 3e14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

170

NAME 'x' (1, 0) (1, 1)

171

OP '=' (1, 2) (1, 3)

172

NUMBER '3e14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

173

""")

174

self.check_tokenize("x = 3E123", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

175

NAME 'x' (1, 0) (1, 1)

176

OP '=' (1, 2) (1, 3)

177

NUMBER '3E123' (1, 4) (1, 9)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

178

""")

179

self.check_tokenize("x+y = 3e-1230", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

180

NAME 'x' (1, 0) (1, 1)

181

OP '+' (1, 1) (1, 2)

182

NAME 'y' (1, 2) (1, 3)

183

OP '=' (1, 4) (1, 5)

184

NUMBER '3e-1230' (1, 6) (1, 13)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

185

""")

186

self.check_tokenize("x = 3.14e159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

187

NAME 'x' (1, 0) (1, 1)

188

OP '=' (1, 2) (1, 3)

189

NUMBER '3.14e159' (1, 4) (1, 12)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

190

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

191

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

192

def test_underscore_literals(self):

193

def number_token(s):

194

f = BytesIO(s.encode('utf-8'))

195

for toktype, token, start, end, line in tokenize(f.readline):

196

if toktype == NUMBER:

197

return token

198

return 'invalid token'

199

for lit in VALID_UNDERSCORE_LITERALS:

200

if '(' in lit:

201

# this won't work with compound complex inputs

202

continue

203

self.assertEqual(number_token(lit), lit)

204

for lit in INVALID_UNDERSCORE_LITERALS:

205

self.assertNotEqual(number_token(lit), lit)

206

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

207

def test_string(self):

208

# String literals

209

self.check_tokenize("x = ''; y = \"\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

210

NAME 'x' (1, 0) (1, 1)

211

OP '=' (1, 2) (1, 3)

212

STRING "''" (1, 4) (1, 6)

213

OP ';' (1, 6) (1, 7)

214

NAME 'y' (1, 8) (1, 9)

215

OP '=' (1, 10) (1, 11)

216

STRING '""' (1, 12) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

217

""")

218

self.check_tokenize("x = '\"'; y = \"'\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

219

NAME 'x' (1, 0) (1, 1)

220

OP '=' (1, 2) (1, 3)

221

STRING '\\'"\\'' (1, 4) (1, 7)

222

OP ';' (1, 7) (1, 8)

223

NAME 'y' (1, 9) (1, 10)

224

OP '=' (1, 11) (1, 12)

225

STRING '"\\'"' (1, 13) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

226

""")

227

self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

228

NAME 'x' (1, 0) (1, 1)

229

OP '=' (1, 2) (1, 3)

230

STRING '"doesn\\'t "' (1, 4) (1, 14)

231

NAME 'shrink' (1, 14) (1, 20)

232

STRING '", does it"' (1, 20) (1, 31)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

233

""")

234

self.check_tokenize("x = 'abc' + 'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

235

NAME 'x' (1, 0) (1, 1)

236

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

237

STRING "'abc'" (1, 4) (1, 9)

238

OP '+' (1, 10) (1, 11)

239

STRING "'ABC'" (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

240

""")

241

self.check_tokenize('y = "ABC" + "ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

242

NAME 'y' (1, 0) (1, 1)

243

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

244

STRING '"ABC"' (1, 4) (1, 9)

245

OP '+' (1, 10) (1, 11)

246

STRING '"ABC"' (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

247

""")

248

self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

249

NAME 'x' (1, 0) (1, 1)

250

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

251

STRING "r'abc'" (1, 4) (1, 10)

252

OP '+' (1, 11) (1, 12)

253

STRING "r'ABC'" (1, 13) (1, 19)

254

OP '+' (1, 20) (1, 21)

255

STRING "R'ABC'" (1, 22) (1, 28)

256

OP '+' (1, 29) (1, 30)

257

STRING "R'ABC'" (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

258

""")

259

self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

260

NAME 'y' (1, 0) (1, 1)

261

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

262

STRING 'r"abc"' (1, 4) (1, 10)

263

OP '+' (1, 11) (1, 12)

264

STRING 'r"ABC"' (1, 13) (1, 19)

265

OP '+' (1, 20) (1, 21)

266

STRING 'R"ABC"' (1, 22) (1, 28)

267

OP '+' (1, 29) (1, 30)

268

STRING 'R"ABC"' (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

269

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

270

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

271

self.check_tokenize("u'abc' + U'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

272

STRING "u'abc'" (1, 0) (1, 6)

273

OP '+' (1, 7) (1, 8)

274

STRING "U'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

275

""")

276

self.check_tokenize('u"abc" + U"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

277

STRING 'u"abc"' (1, 0) (1, 6)

278

OP '+' (1, 7) (1, 8)

279

STRING 'U"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

280

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

281

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

282

self.check_tokenize("b'abc' + B'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

283

STRING "b'abc'" (1, 0) (1, 6)

284

OP '+' (1, 7) (1, 8)

285

STRING "B'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

286

""")

287

self.check_tokenize('b"abc" + B"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

288

STRING 'b"abc"' (1, 0) (1, 6)

289

OP '+' (1, 7) (1, 8)

290

STRING 'B"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

291

""")

292

self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

293

STRING "br'abc'" (1, 0) (1, 7)

294

OP '+' (1, 8) (1, 9)

295

STRING "bR'abc'" (1, 10) (1, 17)

296

OP '+' (1, 18) (1, 19)

297

STRING "Br'abc'" (1, 20) (1, 27)

298

OP '+' (1, 28) (1, 29)

299

STRING "BR'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

300

""")

301

self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

302

STRING 'br"abc"' (1, 0) (1, 7)

303

OP '+' (1, 8) (1, 9)

304

STRING 'bR"abc"' (1, 10) (1, 17)

305

OP '+' (1, 18) (1, 19)

306

STRING 'Br"abc"' (1, 20) (1, 27)

307

OP '+' (1, 28) (1, 29)

308

STRING 'BR"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

309

""")

310

self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

311

STRING "rb'abc'" (1, 0) (1, 7)

312

OP '+' (1, 8) (1, 9)

313

STRING "rB'abc'" (1, 10) (1, 17)

314

OP '+' (1, 18) (1, 19)

315

STRING "Rb'abc'" (1, 20) (1, 27)

316

OP '+' (1, 28) (1, 29)

317

STRING "RB'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

318

""")

319

self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

320

STRING 'rb"abc"' (1, 0) (1, 7)

321

OP '+' (1, 8) (1, 9)

322

STRING 'rB"abc"' (1, 10) (1, 17)

323

OP '+' (1, 18) (1, 19)

324

STRING 'Rb"abc"' (1, 20) (1, 27)

325

OP '+' (1, 28) (1, 29)

326

STRING 'RB"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

327

""")

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

328

# Check 0, 1, and 2 character string prefixes.

329

self.check_tokenize(r'"a\

330

de\

331

fg"', """\

332

STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)

333

""")

334

self.check_tokenize(r'u"a\

335

de"', """\

336

STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3)

337

""")

338

self.check_tokenize(r'rb"a\

339

d"', """\

340

STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2)

341

""")

342

self.check_tokenize(r'"""a\

343

b"""', """\

344

STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)

345

""")

346

self.check_tokenize(r'u"""a\

347

b"""', """\

348

STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)

349

""")

350

self.check_tokenize(r'rb"""a\

351

b\

352

c"""', """\

353

STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)

354

""")

Eric V. Smith

1c8222c

2015-10-26 04:37:55 -0400

[diff] [blame]

355

self.check_tokenize('f"abc"', """\

356

STRING 'f"abc"' (1, 0) (1, 6)

357

""")

358

self.check_tokenize('fR"a{b}c"', """\

359

STRING 'fR"a{b}c"' (1, 0) (1, 9)

360

""")

361

self.check_tokenize('f"""abc"""', """\

362

STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10)

363

""")

364

self.check_tokenize(r'f"abc\

365

def"', """\

366

STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4)

367

""")

368

self.check_tokenize(r'Rf"abc\

369

def"', """\

370

STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)

371

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

372

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

373

def test_function(self):

374

self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

375

NAME 'def' (1, 0) (1, 3)

376

NAME 'd22' (1, 4) (1, 7)

377

OP '(' (1, 7) (1, 8)

378

NAME 'a' (1, 8) (1, 9)

379

OP ',' (1, 9) (1, 10)

380

NAME 'b' (1, 11) (1, 12)

381

OP ',' (1, 12) (1, 13)

382

NAME 'c' (1, 14) (1, 15)

383

OP '=' (1, 15) (1, 16)

384

NUMBER '2' (1, 16) (1, 17)

385

OP ',' (1, 17) (1, 18)

386

NAME 'd' (1, 19) (1, 20)

387

OP '=' (1, 20) (1, 21)

388

NUMBER '2' (1, 21) (1, 22)

389

OP ',' (1, 22) (1, 23)

390

OP '*' (1, 24) (1, 25)

391

NAME 'k' (1, 25) (1, 26)

392

OP ')' (1, 26) (1, 27)

393

OP ':' (1, 27) (1, 28)

394

NAME 'pass' (1, 29) (1, 33)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

395

""")

396

self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

397

NAME 'def' (1, 0) (1, 3)

398

NAME 'd01v_' (1, 4) (1, 9)

399

OP '(' (1, 9) (1, 10)

400

NAME 'a' (1, 10) (1, 11)

401

OP '=' (1, 11) (1, 12)

402

NUMBER '1' (1, 12) (1, 13)

403

OP ',' (1, 13) (1, 14)

404

OP '*' (1, 15) (1, 16)

405

NAME 'k' (1, 16) (1, 17)

406

OP ',' (1, 17) (1, 18)

407

OP '**' (1, 19) (1, 21)

408

NAME 'w' (1, 21) (1, 22)

409

OP ')' (1, 22) (1, 23)

410

OP ':' (1, 23) (1, 24)

411

NAME 'pass' (1, 25) (1, 29)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

412

""")

Jim Fasarakis-Hilliard

d4914e9

2017-03-14 22:16:15 +0200

[diff] [blame]

413

self.check_tokenize("def d23(a: str, b: int=3) -> int: pass", """\

414

NAME 'def' (1, 0) (1, 3)

415

NAME 'd23' (1, 4) (1, 7)

416

OP '(' (1, 7) (1, 8)

417

NAME 'a' (1, 8) (1, 9)

418

OP ':' (1, 9) (1, 10)

419

NAME 'str' (1, 11) (1, 14)

420

OP ',' (1, 14) (1, 15)

421

NAME 'b' (1, 16) (1, 17)

422

OP ':' (1, 17) (1, 18)

423

NAME 'int' (1, 19) (1, 22)

424

OP '=' (1, 22) (1, 23)

425

NUMBER '3' (1, 23) (1, 24)

426

OP ')' (1, 24) (1, 25)

427

OP '->' (1, 26) (1, 28)

428

NAME 'int' (1, 29) (1, 32)

429

OP ':' (1, 32) (1, 33)

430

NAME 'pass' (1, 34) (1, 38)

431

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

432

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

433

def test_comparison(self):

434

# Comparison

435

self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "

436

"1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

437

NAME 'if' (1, 0) (1, 2)

438

NUMBER '1' (1, 3) (1, 4)

439

OP '<' (1, 5) (1, 6)

440

NUMBER '1' (1, 7) (1, 8)

441

OP '>' (1, 9) (1, 10)

442

NUMBER '1' (1, 11) (1, 12)

443

OP '==' (1, 13) (1, 15)

444

NUMBER '1' (1, 16) (1, 17)

445

OP '>=' (1, 18) (1, 20)

446

NUMBER '5' (1, 21) (1, 22)

447

OP '<=' (1, 23) (1, 25)

448

NUMBER '0x15' (1, 26) (1, 30)

449

OP '<=' (1, 31) (1, 33)

450

NUMBER '0x12' (1, 34) (1, 38)

451

OP '!=' (1, 39) (1, 41)

452

NUMBER '1' (1, 42) (1, 43)

453

NAME 'and' (1, 44) (1, 47)

454

NUMBER '5' (1, 48) (1, 49)

455

NAME 'in' (1, 50) (1, 52)

456

NUMBER '1' (1, 53) (1, 54)

457

NAME 'not' (1, 55) (1, 58)

458

NAME 'in' (1, 59) (1, 61)

459

NUMBER '1' (1, 62) (1, 63)

460

NAME 'is' (1, 64) (1, 66)

461

NUMBER '1' (1, 67) (1, 68)

462

NAME 'or' (1, 69) (1, 71)

463

NUMBER '5' (1, 72) (1, 73)

464

NAME 'is' (1, 74) (1, 76)

465

NAME 'not' (1, 77) (1, 80)

466

NUMBER '1' (1, 81) (1, 82)

467

OP ':' (1, 82) (1, 83)

468

NAME 'pass' (1, 84) (1, 88)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

469

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

470

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

471

def test_shift(self):

472

# Shift

473

self.check_tokenize("x = 1 << 1 >> 5", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

474

NAME 'x' (1, 0) (1, 1)

475

OP '=' (1, 2) (1, 3)

476

NUMBER '1' (1, 4) (1, 5)

477

OP '<<' (1, 6) (1, 8)

478

NUMBER '1' (1, 9) (1, 10)

479

OP '>>' (1, 11) (1, 13)

480

NUMBER '5' (1, 14) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

481

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

482

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

483

def test_additive(self):

484

# Additive

485

self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

486

NAME 'x' (1, 0) (1, 1)

487

OP '=' (1, 2) (1, 3)

488

NUMBER '1' (1, 4) (1, 5)

489

OP '-' (1, 6) (1, 7)

490

NAME 'y' (1, 8) (1, 9)

491

OP '+' (1, 10) (1, 11)

492

NUMBER '15' (1, 12) (1, 14)

493

OP '-' (1, 15) (1, 16)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

494

NUMBER '1' (1, 17) (1, 18)

495

OP '+' (1, 19) (1, 20)

496

NUMBER '0x124' (1, 21) (1, 26)

497

OP '+' (1, 27) (1, 28)

498

NAME 'z' (1, 29) (1, 30)

499

OP '+' (1, 31) (1, 32)

500

NAME 'a' (1, 33) (1, 34)

501

OP '[' (1, 34) (1, 35)

502

NUMBER '5' (1, 35) (1, 36)

503

OP ']' (1, 36) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

504

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

505

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

506

def test_multiplicative(self):

507

# Multiplicative

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

508

self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

509

NAME 'x' (1, 0) (1, 1)

510

OP '=' (1, 2) (1, 3)

511

NUMBER '1' (1, 4) (1, 5)

512

OP '//' (1, 5) (1, 7)

513

NUMBER '1' (1, 7) (1, 8)

514

OP '*' (1, 8) (1, 9)

515

NUMBER '1' (1, 9) (1, 10)

516

OP '/' (1, 10) (1, 11)

517

NUMBER '5' (1, 11) (1, 12)

518

OP '*' (1, 12) (1, 13)

519

NUMBER '12' (1, 13) (1, 15)

520

OP '%' (1, 15) (1, 16)

521

NUMBER '0x12' (1, 16) (1, 20)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

522

OP '@' (1, 20) (1, 21)

523

NUMBER '42' (1, 21) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

524

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

525

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

526

def test_unary(self):

527

# Unary

528

self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

529

OP '~' (1, 0) (1, 1)

530

NUMBER '1' (1, 1) (1, 2)

531

OP '^' (1, 3) (1, 4)

532

NUMBER '1' (1, 5) (1, 6)

533

OP '&' (1, 7) (1, 8)

534

NUMBER '1' (1, 9) (1, 10)

535

OP '|' (1, 11) (1, 12)

536

NUMBER '1' (1, 12) (1, 13)

537

OP '^' (1, 14) (1, 15)

538

OP '-' (1, 16) (1, 17)

539

NUMBER '1' (1, 17) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

540

""")

541

self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

542

OP '-' (1, 0) (1, 1)

543

NUMBER '1' (1, 1) (1, 2)

544

OP '*' (1, 2) (1, 3)

545

NUMBER '1' (1, 3) (1, 4)

546

OP '/' (1, 4) (1, 5)

547

NUMBER '1' (1, 5) (1, 6)

548

OP '+' (1, 6) (1, 7)

549

NUMBER '1' (1, 7) (1, 8)

550

OP '*' (1, 8) (1, 9)

551

NUMBER '1' (1, 9) (1, 10)

552

OP '//' (1, 10) (1, 12)

553

NUMBER '1' (1, 12) (1, 13)

554

OP '-' (1, 14) (1, 15)

555

OP '-' (1, 16) (1, 17)

556

OP '-' (1, 17) (1, 18)

557

OP '-' (1, 18) (1, 19)

558

NUMBER '1' (1, 19) (1, 20)

559

OP '**' (1, 20) (1, 22)

560

NUMBER '1' (1, 22) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

561

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

562

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

563

def test_selector(self):

564

# Selector

565

self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

566

NAME 'import' (1, 0) (1, 6)

567

NAME 'sys' (1, 7) (1, 10)

568

OP ',' (1, 10) (1, 11)

569

NAME 'time' (1, 12) (1, 16)

570

NEWLINE '\\n' (1, 16) (1, 17)

571

NAME 'x' (2, 0) (2, 1)

572

OP '=' (2, 2) (2, 3)

573

NAME 'sys' (2, 4) (2, 7)

574

OP '.' (2, 7) (2, 8)

575

NAME 'modules' (2, 8) (2, 15)

576

OP '[' (2, 15) (2, 16)

577

STRING "'time'" (2, 16) (2, 22)

578

OP ']' (2, 22) (2, 23)

579

OP '.' (2, 23) (2, 24)

580

NAME 'time' (2, 24) (2, 28)

581

OP '(' (2, 28) (2, 29)

582

OP ')' (2, 29) (2, 30)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

583

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

584

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

585

def test_method(self):

586

# Methods

587

self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

588

OP '@' (1, 0) (1, 1)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

589

NAME 'staticmethod' (1, 1) (1, 13)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

590

NEWLINE '\\n' (1, 13) (1, 14)

591

NAME 'def' (2, 0) (2, 3)

592

NAME 'foo' (2, 4) (2, 7)

593

OP '(' (2, 7) (2, 8)

594

NAME 'x' (2, 8) (2, 9)

595

OP ',' (2, 9) (2, 10)

596

NAME 'y' (2, 10) (2, 11)

597

OP ')' (2, 11) (2, 12)

598

OP ':' (2, 12) (2, 13)

599

NAME 'pass' (2, 14) (2, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

600

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

601

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

602

def test_tabs(self):

603

# Evil tabs

604

self.check_tokenize("def f():\n"

605

"\tif x\n"

606

" \tpass", """\

Benjamin Peterson

a0dfa82

2009-11-13 02:25:08 +0000

[diff] [blame]

607

NAME 'def' (1, 0) (1, 3)

608

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

613

INDENT '\\t' (2, 0) (2, 1)

614

NAME 'if' (2, 1) (2, 3)

615

NAME 'x' (2, 4) (2, 5)

616

NEWLINE '\\n' (2, 5) (2, 6)

617

INDENT ' \\t' (3, 0) (3, 9)

618

NAME 'pass' (3, 9) (3, 13)

619

DEDENT '' (4, 0) (4, 0)

620

DEDENT '' (4, 0) (4, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

621

""")

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

622

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

623

def test_non_ascii_identifiers(self):

624

# Non-ascii identifiers

625

self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

626

NAME 'Örter' (1, 0) (1, 5)

627

OP '=' (1, 6) (1, 7)

628

STRING "'places'" (1, 8) (1, 16)

629

NEWLINE '\\n' (1, 16) (1, 17)

630

NAME 'grün' (2, 0) (2, 4)

631

OP '=' (2, 5) (2, 6)

632

STRING "'green'" (2, 7) (2, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

633

""")

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

634

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

635

def test_unicode(self):

636

# Legacy unicode literals:

637

self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

638

NAME 'Örter' (1, 0) (1, 5)

639

OP '=' (1, 6) (1, 7)

640

STRING "u'places'" (1, 8) (1, 17)

641

NEWLINE '\\n' (1, 17) (1, 18)

642

NAME 'grün' (2, 0) (2, 4)

643

OP '=' (2, 5) (2, 6)

Christian Heimes

0b3847d

2012-06-20 11:17:58 +0200

[diff] [blame]

644

STRING "U'green'" (2, 7) (2, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

645

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

646

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

647

def test_async(self):

648

# Async/await extension:

649

self.check_tokenize("async = 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

650

NAME 'async' (1, 0) (1, 5)

651

OP '=' (1, 6) (1, 7)

652

NUMBER '1' (1, 8) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

653

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

654

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

655

self.check_tokenize("a = (async = 1)", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

656

NAME 'a' (1, 0) (1, 1)

657

OP '=' (1, 2) (1, 3)

658

OP '(' (1, 4) (1, 5)

659

NAME 'async' (1, 5) (1, 10)

660

OP '=' (1, 11) (1, 12)

661

NUMBER '1' (1, 13) (1, 14)

662

OP ')' (1, 14) (1, 15)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

663

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

664

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

665

self.check_tokenize("async()", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

666

NAME 'async' (1, 0) (1, 5)

667

OP '(' (1, 5) (1, 6)

668

OP ')' (1, 6) (1, 7)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

669

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

670

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

671

self.check_tokenize("class async(Bar):pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

672

NAME 'class' (1, 0) (1, 5)

673

NAME 'async' (1, 6) (1, 11)

674

OP '(' (1, 11) (1, 12)

675

NAME 'Bar' (1, 12) (1, 15)

676

OP ')' (1, 15) (1, 16)

677

OP ':' (1, 16) (1, 17)

678

NAME 'pass' (1, 17) (1, 21)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

679

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

680

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

681

self.check_tokenize("class async:pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

682

NAME 'class' (1, 0) (1, 5)

683

NAME 'async' (1, 6) (1, 11)

684

OP ':' (1, 11) (1, 12)

685

NAME 'pass' (1, 12) (1, 16)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

686

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

687

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

688

self.check_tokenize("await = 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

689

NAME 'await' (1, 0) (1, 5)

690

OP '=' (1, 6) (1, 7)

691

NUMBER '1' (1, 8) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

692

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

693

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

694

self.check_tokenize("foo.async", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

695

NAME 'foo' (1, 0) (1, 3)

696

OP '.' (1, 3) (1, 4)

697

NAME 'async' (1, 4) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

698

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

699

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

700

self.check_tokenize("async for a in b: pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

701

NAME 'async' (1, 0) (1, 5)

702

NAME 'for' (1, 6) (1, 9)

703

NAME 'a' (1, 10) (1, 11)

704

NAME 'in' (1, 12) (1, 14)

705

NAME 'b' (1, 15) (1, 16)

706

OP ':' (1, 16) (1, 17)

707

NAME 'pass' (1, 18) (1, 22)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

708

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

709

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

710

self.check_tokenize("async with a as b: pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

711

NAME 'async' (1, 0) (1, 5)

712

NAME 'with' (1, 6) (1, 10)

713

NAME 'a' (1, 11) (1, 12)

714

NAME 'as' (1, 13) (1, 15)

715

NAME 'b' (1, 16) (1, 17)

716

OP ':' (1, 17) (1, 18)

717

NAME 'pass' (1, 19) (1, 23)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

718

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

719

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

720

self.check_tokenize("async.foo", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

721

NAME 'async' (1, 0) (1, 5)

722

OP '.' (1, 5) (1, 6)

723

NAME 'foo' (1, 6) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

724

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

725

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

726

self.check_tokenize("async", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

727

NAME 'async' (1, 0) (1, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

728

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

729

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

730

self.check_tokenize("async\n#comment\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

731

NAME 'async' (1, 0) (1, 5)

732

NEWLINE '\\n' (1, 5) (1, 6)

733

COMMENT '#comment' (2, 0) (2, 8)

734

NL '\\n' (2, 8) (2, 9)

735

NAME 'await' (3, 0) (3, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

736

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

737

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

738

self.check_tokenize("async\n...\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

739

NAME 'async' (1, 0) (1, 5)

740

NEWLINE '\\n' (1, 5) (1, 6)

741

OP '...' (2, 0) (2, 3)

742

NEWLINE '\\n' (2, 3) (2, 4)

743

NAME 'await' (3, 0) (3, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

744

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

745

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

746

self.check_tokenize("async\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

747

NAME 'async' (1, 0) (1, 5)

748

NEWLINE '\\n' (1, 5) (1, 6)

749

NAME 'await' (2, 0) (2, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

750

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

751

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

752

self.check_tokenize("foo.async + 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

753

NAME 'foo' (1, 0) (1, 3)

754

OP '.' (1, 3) (1, 4)

755

NAME 'async' (1, 4) (1, 9)

756

OP '+' (1, 10) (1, 11)

757

NUMBER '1' (1, 12) (1, 13)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

758

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

759

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

760

self.check_tokenize("async def foo(): pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

761

ASYNC 'async' (1, 0) (1, 5)

762

NAME 'def' (1, 6) (1, 9)

763

NAME 'foo' (1, 10) (1, 13)

764

OP '(' (1, 13) (1, 14)

765

OP ')' (1, 14) (1, 15)

766

OP ':' (1, 15) (1, 16)

767

NAME 'pass' (1, 17) (1, 21)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

768

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

769

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

770

self.check_tokenize('''\

async def foo():

def foo(await):

await = 1

if 1:

await

async += 1

''', """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

778

ASYNC 'async' (1, 0) (1, 5)

779

NAME 'def' (1, 6) (1, 9)

780

NAME 'foo' (1, 10) (1, 13)

781

OP '(' (1, 13) (1, 14)

782

OP ')' (1, 14) (1, 15)

783

OP ':' (1, 15) (1, 16)

784

NEWLINE '\\n' (1, 16) (1, 17)

785

INDENT ' ' (2, 0) (2, 2)

786

NAME 'def' (2, 2) (2, 5)

787

NAME 'foo' (2, 6) (2, 9)

788

OP '(' (2, 9) (2, 10)

Yury Selivanov

2015-07-22 13:33:45 +0300

[diff] [blame]

789

AWAIT 'await' (2, 10) (2, 15)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

790

OP ')' (2, 15) (2, 16)

791

OP ':' (2, 16) (2, 17)

792

NEWLINE '\\n' (2, 17) (2, 18)

793

INDENT ' ' (3, 0) (3, 4)

Yury Selivanov

2015-07-22 13:33:45 +0300

[diff] [blame]

794

AWAIT 'await' (3, 4) (3, 9)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

795

OP '=' (3, 10) (3, 11)

796

NUMBER '1' (3, 12) (3, 13)

797

NEWLINE '\\n' (3, 13) (3, 14)

798

DEDENT '' (4, 2) (4, 2)

799

NAME 'if' (4, 2) (4, 4)

800

NUMBER '1' (4, 5) (4, 6)

801

OP ':' (4, 6) (4, 7)

802

NEWLINE '\\n' (4, 7) (4, 8)

803

INDENT ' ' (5, 0) (5, 4)

804

AWAIT 'await' (5, 4) (5, 9)

805

NEWLINE '\\n' (5, 9) (5, 10)

806

DEDENT '' (6, 0) (6, 0)

807

DEDENT '' (6, 0) (6, 0)

808

NAME 'async' (6, 0) (6, 5)

809

OP '+=' (6, 6) (6, 8)

810

NUMBER '1' (6, 9) (6, 10)

811

NEWLINE '\\n' (6, 10) (6, 11)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

812

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

813

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

814

self.check_tokenize('''\

815

async def foo():

816

async for i in 1: pass''', """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

817

ASYNC 'async' (1, 0) (1, 5)

818

NAME 'def' (1, 6) (1, 9)

819

NAME 'foo' (1, 10) (1, 13)

820

OP '(' (1, 13) (1, 14)

821

OP ')' (1, 14) (1, 15)

822

OP ':' (1, 15) (1, 16)

823

NEWLINE '\\n' (1, 16) (1, 17)

824

INDENT ' ' (2, 0) (2, 2)

825

ASYNC 'async' (2, 2) (2, 7)

826

NAME 'for' (2, 8) (2, 11)

827

NAME 'i' (2, 12) (2, 13)

828

NAME 'in' (2, 14) (2, 16)

829

NUMBER '1' (2, 17) (2, 18)

830

OP ':' (2, 18) (2, 19)

831

NAME 'pass' (2, 20) (2, 24)

832

DEDENT '' (3, 0) (3, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

833

""")

Yury Selivanov

2015-07-22 13:33:45 +0300

[diff] [blame]

834

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

835

self.check_tokenize('''async def foo(async): await''', """\

Yury Selivanov

2015-07-22 13:33:45 +0300

[diff] [blame]

836

ASYNC 'async' (1, 0) (1, 5)

837

NAME 'def' (1, 6) (1, 9)

838

NAME 'foo' (1, 10) (1, 13)

839

OP '(' (1, 13) (1, 14)

840

ASYNC 'async' (1, 14) (1, 19)

841

OP ')' (1, 19) (1, 20)

842

OP ':' (1, 20) (1, 21)

843

AWAIT 'await' (1, 22) (1, 27)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

844

""")

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

845

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

846

self.check_tokenize('''\

def f():

def baz(): pass

async def bar(): pass

851

852

await = 2''', """\

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

853

NAME 'def' (1, 0) (1, 3)

854

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

859

NL '\\n' (2, 0) (2, 1)

860

INDENT ' ' (3, 0) (3, 2)

861

NAME 'def' (3, 2) (3, 5)

862

NAME 'baz' (3, 6) (3, 9)

863

OP '(' (3, 9) (3, 10)

864

OP ')' (3, 10) (3, 11)

865

OP ':' (3, 11) (3, 12)

866

NAME 'pass' (3, 13) (3, 17)

867

NEWLINE '\\n' (3, 17) (3, 18)

868

ASYNC 'async' (4, 2) (4, 7)

869

NAME 'def' (4, 8) (4, 11)

870

NAME 'bar' (4, 12) (4, 15)

871

OP '(' (4, 15) (4, 16)

872

OP ')' (4, 16) (4, 17)

873

OP ':' (4, 17) (4, 18)

874

NAME 'pass' (4, 19) (4, 23)

875

NEWLINE '\\n' (4, 23) (4, 24)

876

NL '\\n' (5, 0) (5, 1)

877

NAME 'await' (6, 2) (6, 7)

878

OP '=' (6, 8) (6, 9)

879

NUMBER '2' (6, 10) (6, 11)

880

DEDENT '' (7, 0) (7, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

881

""")

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

882

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

883

self.check_tokenize('''\

async def f():

def baz(): pass

async def bar(): pass

888

889

await = 2''', """\

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

890

ASYNC 'async' (1, 0) (1, 5)

891

NAME 'def' (1, 6) (1, 9)

892

NAME 'f' (1, 10) (1, 11)

893

OP '(' (1, 11) (1, 12)

894

OP ')' (1, 12) (1, 13)

895

OP ':' (1, 13) (1, 14)

896

NEWLINE '\\n' (1, 14) (1, 15)

897

NL '\\n' (2, 0) (2, 1)

898

INDENT ' ' (3, 0) (3, 2)

899

NAME 'def' (3, 2) (3, 5)

900

NAME 'baz' (3, 6) (3, 9)

901

OP '(' (3, 9) (3, 10)

902

OP ')' (3, 10) (3, 11)

903

OP ':' (3, 11) (3, 12)

904

NAME 'pass' (3, 13) (3, 17)

905

NEWLINE '\\n' (3, 17) (3, 18)

906

ASYNC 'async' (4, 2) (4, 7)

907

NAME 'def' (4, 8) (4, 11)

908

NAME 'bar' (4, 12) (4, 15)

909

OP '(' (4, 15) (4, 16)

910

OP ')' (4, 16) (4, 17)

911

OP ':' (4, 17) (4, 18)

912

NAME 'pass' (4, 19) (4, 23)

913

NEWLINE '\\n' (4, 23) (4, 24)

914

NL '\\n' (5, 0) (5, 1)

915

AWAIT 'await' (6, 2) (6, 7)

916

OP '=' (6, 8) (6, 9)

917

NUMBER '2' (6, 10) (6, 11)

918

DEDENT '' (7, 0) (7, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

919

""")

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

920

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

921

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

922

def decistmt(s):

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

923

result = []

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

924

g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

925

for toknum, tokval, _, _, _ in g:

926

if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens

result.extend([

(NAME, 'Decimal'),

(OP, '('),

(STRING, repr(tokval)),

(OP, ')')

])

else:

result.append((toknum, tokval))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

935

return untokenize(result).decode('utf-8')

936

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

937

class TestMisc(TestCase):

938

939

def test_decistmt(self):

940

# Substitute Decimals for floats in a string of statements.

941

# This is an example from the docs.

942

943

from decimal import Decimal

944

s = '+21.3e-5*-.1234/81.7'

945

self.assertEqual(decistmt(s),

946

"+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")

947

948

# The format of the exponent is inherited from the platform C library.

949

# Known cases are "e-007" (Windows) and "e-07" (not Windows). Since

950

# we're only showing 11 digits, and the 12th isn't close to 5, the

951

# rest of the output should be platform-independent.

952

self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')

953

954

# Output from calculations with Decimal should be identical across all

955

# platforms.

956

self.assertEqual(eval(decistmt(s)),

957

Decimal('-3.217160342717258261933904529E-7'))

958

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

959

960

class TestTokenizerAdheresToPep0263(TestCase):

961

"""

962

Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.

963

"""

964

965

def _testFile(self, filename):

966

path = os.path.join(os.path.dirname(__file__), filename)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

967

TestRoundtrip.check_roundtrip(self, open(path, 'rb'))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

968

969

def test_utf8_coding_cookie_and_no_utf8_bom(self):

Ned Deily

2ea6fcc

2011-07-19 16:15:27 -0700

[diff] [blame]

970

f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

971

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

972

973

def test_latin1_coding_cookie_and_utf8_bom(self):

974

"""

975

As per PEP 0263, if a file starts with a utf-8 BOM signature, the only

976

allowed encoding for the comment is 'utf-8'. The text file used in

977

this test starts with a BOM signature, but specifies latin1 as the

978

coding, so verify that a SyntaxError is raised, which matches the

979

behaviour of the interpreter when it encounters a similar condition.

980

"""

981

f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'

Benjamin Peterson

c9c0f20

2009-06-30 23:06:06 +0000

[diff] [blame]

982

self.assertRaises(SyntaxError, self._testFile, f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

983

984

def test_no_coding_cookie_and_utf8_bom(self):

985

f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

986

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

987

988

def test_utf8_coding_cookie_and_utf8_bom(self):

989

f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

990

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

991

Florent Xicluna

11f0b41

2012-07-07 12:13:35 +0200

[diff] [blame]

992

def test_bad_coding_cookie(self):

993

self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')

994

self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')

995

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

996

997

class Test_Tokenize(TestCase):

998

999

def test__tokenize_decodes_with_specified_encoding(self):

1000

literal = '"ЉЊЈЁЂ"'

1001

line = literal.encode('utf-8')

first = False

def readline():

nonlocal first

if not first:

first = True

return line

else:

return b''

# skip the initial encoding token and the end token

1012

tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]

1013

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1014

self.assertEqual(tokens, expected_tokens,

1015

"bytes not decoded with encoding")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1016

1017

def test__tokenize_does_not_decode_with_encoding_none(self):

literal = '"ЉЊЈЁЂ"'

first = False

def readline():

nonlocal first

if not first:

first = True

return literal

else:

return b''

# skip the end token

tokens = list(_tokenize(readline, encoding=None))[:-1]

1030

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1031

self.assertEqual(tokens, expected_tokens,

1032

"string not tokenized when encoding is None")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1033

1034

1035

class TestDetectEncoding(TestCase):

1036

1037

def get_readline(self, lines):

index = 0

def readline():

nonlocal index

if index == len(lines):

raise StopIteration

line = lines[index]

index += 1

return line

return readline

def test_no_bom_no_encoding_cookie(self):

1049

lines = (

1050

b'# something\n',

1051

b'print(something)\n',

1052

b'do_something(else)\n'

1053

)

1054

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1055

self.assertEqual(encoding, 'utf-8')

1056

self.assertEqual(consumed_lines, list(lines[:2]))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1057

1058

def test_bom_no_cookie(self):

1059

lines = (

1060

b'\xef\xbb\xbf# something\n',

1061

b'print(something)\n',

1062

b'do_something(else)\n'

1063

)

1064

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1065

self.assertEqual(encoding, 'utf-8-sig')

1066

self.assertEqual(consumed_lines,

1067

[b'# something\n', b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1068

1069

def test_cookie_first_line_no_bom(self):

1070

lines = (

1071

b'# -*- coding: latin-1 -*-\n',

1072

b'print(something)\n',

1073

b'do_something(else)\n'

1074

)

1075

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1076

self.assertEqual(encoding, 'iso-8859-1')

1077

self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1078

1079

def test_matched_bom_and_cookie_first_line(self):

1080

lines = (

1081

b'\xef\xbb\xbf# coding=utf-8\n',

1082

b'print(something)\n',

1083

b'do_something(else)\n'

1084

)

1085

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1086

self.assertEqual(encoding, 'utf-8-sig')

1087

self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1088

1089

def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):

1090

lines = (

1091

b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',

1092

b'print(something)\n',

1093

b'do_something(else)\n'

1094

)

1095

readline = self.get_readline(lines)

1096

self.assertRaises(SyntaxError, detect_encoding, readline)

1097

1098

def test_cookie_second_line_no_bom(self):

1099

lines = (

1100

b'#! something\n',

1101

b'# vim: set fileencoding=ascii :\n',

1102

b'print(something)\n',

1103

b'do_something(else)\n'

1104

)

1105

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1106

self.assertEqual(encoding, 'ascii')

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1107

expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1108

self.assertEqual(consumed_lines, expected)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1109

1110

def test_matched_bom_and_cookie_second_line(self):

1111

lines = (

1112

b'\xef\xbb\xbf#! something\n',

1113

b'f# coding=utf-8\n',

1114

b'print(something)\n',

1115

b'do_something(else)\n'

1116

)

1117

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1118

self.assertEqual(encoding, 'utf-8-sig')

1119

self.assertEqual(consumed_lines,

1120

[b'#! something\n', b'f# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1121

1122

def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):

1123

lines = (

1124

b'\xef\xbb\xbf#! something\n',

1125

b'# vim: set fileencoding=ascii :\n',

1126

b'print(something)\n',

1127

b'do_something(else)\n'

1128

)

1129

readline = self.get_readline(lines)

1130

self.assertRaises(SyntaxError, detect_encoding, readline)

1131

Serhiy Storchaka

768c16c

2014-01-09 18:36:09 +0200

[diff] [blame]

1132

def test_cookie_second_line_noncommented_first_line(self):

1133

lines = (

1134

b"print('\xc2\xa3')\n",

1135

b'# vim: set fileencoding=iso8859-15 :\n',

1136

b"print('\xe2\x82\xac')\n"

1137

)

1138

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1139

self.assertEqual(encoding, 'utf-8')

1140

expected = [b"print('\xc2\xa3')\n"]

1141

self.assertEqual(consumed_lines, expected)

1142

1143

def test_cookie_second_line_commented_first_line(self):

1144

lines = (

1145

b"#print('\xc2\xa3')\n",

1146

b'# vim: set fileencoding=iso8859-15 :\n',

1147

b"print('\xe2\x82\xac')\n"

1148

)

1149

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1150

self.assertEqual(encoding, 'iso8859-15')

1151

expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']

1152

self.assertEqual(consumed_lines, expected)

1153

1154

def test_cookie_second_line_empty_first_line(self):

1155

lines = (

1156

b'\n',

1157

b'# vim: set fileencoding=iso8859-15 :\n',

1158

b"print('\xe2\x82\xac')\n"

1159

)

1160

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1161

self.assertEqual(encoding, 'iso8859-15')

1162

expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']

1163

self.assertEqual(consumed_lines, expected)

1164

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1165

def test_latin1_normalization(self):

1166

# See get_normal_name() in tokenizer.c.

1167

encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",

1168

"iso-8859-1-unix", "iso-latin-1-mac")

1169

for encoding in encodings:

1170

for rep in ("-", "_"):

1171

enc = encoding.replace("-", rep)

1172

lines = (b"#!/usr/bin/python\n",

1173

b"# coding: " + enc.encode("ascii") + b"\n",

1174

b"print(things)\n",

1175

b"do_something += 4\n")

1176

rl = self.get_readline(lines)

1177

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1178

self.assertEqual(found, "iso-8859-1")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1179

Martin v. Löwis

63674f4

2012-04-20 14:36:47 +0200

[diff] [blame]

1180

def test_syntaxerror_latin1(self):

1181

# Issue 14629: need to raise SyntaxError if the first

1182

# line(s) have non-UTF-8 characters

1183

lines = (

1184

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1185

)

1186

readline = self.get_readline(lines)

1187

self.assertRaises(SyntaxError, detect_encoding, readline)

1188

1189

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1190

def test_utf8_normalization(self):

1191

# See get_normal_name() in tokenizer.c.

1192

encodings = ("utf-8", "utf-8-mac", "utf-8-unix")

1193

for encoding in encodings:

1194

for rep in ("-", "_"):

1195

enc = encoding.replace("-", rep)

1196

lines = (b"#!/usr/bin/python\n",

1197

b"# coding: " + enc.encode("ascii") + b"\n",

1198

b"1 + 3\n")

1199

rl = self.get_readline(lines)

1200

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1201

self.assertEqual(found, "utf-8")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1202

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1203

def test_short_files(self):

1204

readline = self.get_readline((b'print(something)\n',))

1205

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1206

self.assertEqual(encoding, 'utf-8')

1207

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1208

1209

encoding, consumed_lines = detect_encoding(self.get_readline(()))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1210

self.assertEqual(encoding, 'utf-8')

1211

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1212

1213

readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))

1214

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1215

self.assertEqual(encoding, 'utf-8-sig')

1216

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1217

1218

readline = self.get_readline((b'\xef\xbb\xbf',))

1219

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1220

self.assertEqual(encoding, 'utf-8-sig')

1221

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1222

Benjamin Peterson

433f32c

2008-12-12 01:25:05 +0000

[diff] [blame]

1223

readline = self.get_readline((b'# coding: bad\n',))

1224

self.assertRaises(SyntaxError, detect_encoding, readline)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1225

Serhiy Storchaka

dafea85

2013-09-16 23:51:56 +0300

[diff] [blame]

1226

def test_false_encoding(self):

1227

# Issue 18873: "Encoding" detected in non-comment lines

1228

readline = self.get_readline((b'print("#coding=fake")',))

1229

encoding, consumed_lines = detect_encoding(readline)

1230

self.assertEqual(encoding, 'utf-8')

1231

self.assertEqual(consumed_lines, [b'print("#coding=fake")'])

1232

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1233

def test_open(self):

1234

filename = support.TESTFN + '.py'

1235

self.addCleanup(support.unlink, filename)

1236

1237

# test coding cookie

1238

for encoding in ('iso-8859-15', 'utf-8'):

1239

with open(filename, 'w', encoding=encoding) as fp:

1240

print("# coding: %s" % encoding, file=fp)

1241

print("print('euro:\u20ac')", file=fp)

1242

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1243

self.assertEqual(fp.encoding, encoding)

1244

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1245

1246

# test BOM (no coding cookie)

1247

with open(filename, 'w', encoding='utf-8-sig') as fp:

1248

print("print('euro:\u20ac')", file=fp)

1249

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1250

self.assertEqual(fp.encoding, 'utf-8-sig')

1251

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1252

Brett Cannon

c33f3f2

2012-04-20 13:23:54 -0400

[diff] [blame]

1253

def test_filename_in_exception(self):

1254

# When possible, include the file name in the exception.

1255

path = 'some_file_path'

1256

lines = (

1257

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1258

)

1259

class Bunk:

1260

def __init__(self, lines, path):

self.name = path

self._lines = lines

self._index = 0

def readline(self):

if self._index == len(lines):

1267

raise StopIteration

1268

line = lines[self._index]

self._index += 1

return line

with self.assertRaises(SyntaxError):

1273

ins = Bunk(lines, path)

1274

# Make sure lacking a name isn't an issue.

1275

del ins.name

1276

detect_encoding(ins.readline)

1277

with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):

1278

ins = Bunk(lines, path)

1279

detect_encoding(ins.readline)

1280

Victor Stinner

387729e

2015-05-26 00:43:58 +0200

[diff] [blame]

1281

def test_open_error(self):

1282

# Issue #23840: open() must close the binary file on error

1283

m = BytesIO(b'#coding:xxx')

1284

with mock.patch('tokenize._builtin_open', return_value=m):

1285

self.assertRaises(SyntaxError, tokenize_open, 'foobar')

1286

self.assertTrue(m.closed)

1287

1288

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1289

class TestTokenize(TestCase):

1290

1291

def test_tokenize(self):

1292

import tokenize as tokenize_module

1293

encoding = object()

1294

encoding_used = None

1295

def mock_detect_encoding(readline):

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1296

return encoding, [b'first', b'second']

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1297

1298

def mock__tokenize(readline, encoding):

1299

nonlocal encoding_used

1300

encoding_used = encoding

1301

out = []

1302

while True:

1303

next_line = readline()

1304

if next_line:

1305

out.append(next_line)

continue

return out

counter = 0

def mock_readline():

nonlocal counter

counter += 1

if counter == 5:

return b''

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1315

return str(counter).encode()

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1316

1317

orig_detect_encoding = tokenize_module.detect_encoding

1318

orig__tokenize = tokenize_module._tokenize

1319

tokenize_module.detect_encoding = mock_detect_encoding

1320

tokenize_module._tokenize = mock__tokenize

1321

try:

1322

results = tokenize(mock_readline)

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1323

self.assertEqual(list(results),

1324

[b'first', b'second', b'1', b'2', b'3', b'4'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1325

finally:

1326

tokenize_module.detect_encoding = orig_detect_encoding

1327

tokenize_module._tokenize = orig__tokenize

1328

1329

self.assertTrue(encoding_used, encoding)

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

1330

Yury Selivanov

8085b80

2015-05-18 12:50:52 -0400

[diff] [blame]

1331

def test_oneline_defs(self):

1332

buf = []

1333

for i in range(500):

1334

buf.append('def i{i}(): return {i}'.format(i=i))

buf.append('OK')

buf = '\n'.join(buf)

# Test that 500 consequent, one-line defs is OK

1339

toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))

1340

self.assertEqual(toks[-2].string, 'OK') # [-1] is always ENDMARKER

1341

Meador Inge

00c7f85

2012-01-19 00:44:45 -0600

[diff] [blame]

1342

def assertExactTypeEqual(self, opstr, *optypes):

1343

tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))

1344

num_optypes = len(optypes)

1345

self.assertEqual(len(tokens), 2 + num_optypes)

1346

self.assertEqual(token.tok_name[tokens[0].exact_type],

1347

token.tok_name[ENCODING])

1348

for i in range(num_optypes):

1349

self.assertEqual(token.tok_name[tokens[i + 1].exact_type],

1350

token.tok_name[optypes[i]])

1351

self.assertEqual(token.tok_name[tokens[1 + num_optypes].exact_type],

1352

token.tok_name[token.ENDMARKER])

1353

1354

def test_exact_type(self):

1355

self.assertExactTypeEqual('()', token.LPAR, token.RPAR)

1356

self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)

1357

self.assertExactTypeEqual(':', token.COLON)

1358

self.assertExactTypeEqual(',', token.COMMA)

1359

self.assertExactTypeEqual(';', token.SEMI)

1360

self.assertExactTypeEqual('+', token.PLUS)

1361

self.assertExactTypeEqual('-', token.MINUS)

1362

self.assertExactTypeEqual('*', token.STAR)

1363

self.assertExactTypeEqual('/', token.SLASH)

1364

self.assertExactTypeEqual('|', token.VBAR)

1365

self.assertExactTypeEqual('&', token.AMPER)

1366

self.assertExactTypeEqual('<', token.LESS)

1367

self.assertExactTypeEqual('>', token.GREATER)

1368

self.assertExactTypeEqual('=', token.EQUAL)

1369

self.assertExactTypeEqual('.', token.DOT)

1370

self.assertExactTypeEqual('%', token.PERCENT)

1371

self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)

1372

self.assertExactTypeEqual('==', token.EQEQUAL)

1373

self.assertExactTypeEqual('!=', token.NOTEQUAL)

1374

self.assertExactTypeEqual('<=', token.LESSEQUAL)

1375

self.assertExactTypeEqual('>=', token.GREATEREQUAL)

1376

self.assertExactTypeEqual('~', token.TILDE)

1377

self.assertExactTypeEqual('^', token.CIRCUMFLEX)

1378

self.assertExactTypeEqual('<<', token.LEFTSHIFT)

1379

self.assertExactTypeEqual('>>', token.RIGHTSHIFT)

1380

self.assertExactTypeEqual('**', token.DOUBLESTAR)

1381

self.assertExactTypeEqual('+=', token.PLUSEQUAL)

1382

self.assertExactTypeEqual('-=', token.MINEQUAL)

1383

self.assertExactTypeEqual('*=', token.STAREQUAL)

1384

self.assertExactTypeEqual('/=', token.SLASHEQUAL)

1385

self.assertExactTypeEqual('%=', token.PERCENTEQUAL)

1386

self.assertExactTypeEqual('&=', token.AMPEREQUAL)

1387

self.assertExactTypeEqual('|=', token.VBAREQUAL)

1388

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1389

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1390

self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)

1391

self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)

1392

self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)

1393

self.assertExactTypeEqual('//', token.DOUBLESLASH)

1394

self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)

Jim Fasarakis-Hilliard

d4914e9

2017-03-14 22:16:15 +0200

[diff] [blame]

1395

self.assertExactTypeEqual('...', token.ELLIPSIS)

1396

self.assertExactTypeEqual('->', token.RARROW)

Meador Inge

00c7f85

2012-01-19 00:44:45 -0600

[diff] [blame]

1397

self.assertExactTypeEqual('@', token.AT)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

1398

self.assertExactTypeEqual('@=', token.ATEQUAL)

Meador Inge

00c7f85

2012-01-19 00:44:45 -0600

[diff] [blame]

1399

1400

self.assertExactTypeEqual('a**2+b**2==c**2',

1401

NAME, token.DOUBLESTAR, NUMBER,

1402

token.PLUS,

1403

NAME, token.DOUBLESTAR, NUMBER,

1404

token.EQEQUAL,

1405

NAME, token.DOUBLESTAR, NUMBER)

1406

self.assertExactTypeEqual('{1, 2, 3}',

1407

token.LBRACE,

1408

token.NUMBER, token.COMMA,

1409

token.NUMBER, token.COMMA,

1410

token.NUMBER,

1411

token.RBRACE)

1412

self.assertExactTypeEqual('^(x & 0x1)',

1413

token.CIRCUMFLEX,

1414

token.LPAR,

1415

token.NAME, token.AMPER, token.NUMBER,

1416

token.RPAR)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1417

Ezio Melotti

fafa8b7

2012-11-03 17:46:51 +0200

[diff] [blame]

1418

def test_pathological_trailing_whitespace(self):

1419

# See http://bugs.python.org/issue16152

1420

self.assertExactTypeEqual('@ ', token.AT)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1421

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1422

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1423

class UntokenizeTest(TestCase):

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1424

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1425

def test_bad_input_order(self):

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1426

# raise if previous row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

u = Untokenizer()

u.prev_row = 2

u.prev_col = 2

with self.assertRaises(ValueError) as cm:

1431

u.add_whitespace((1,3))

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1432

self.assertEqual(cm.exception.args[0],

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1433

'start (1,3) precedes previous end (2,2)')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1434

# raise if previous column in row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1435

self.assertRaises(ValueError, u.add_whitespace, (2,1))

1436

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1437

def test_backslash_continuation(self):

1438

# The problem is that <whitespace>\<newline> leaves no token

u = Untokenizer()

u.prev_row = 1

u.prev_col = 1

u.tokens = []

u.add_whitespace((2, 0))

1444

self.assertEqual(u.tokens, ['\\\n'])

1445

u.prev_row = 2

1446

u.add_whitespace((4, 4))

1447

self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1448

TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1449

Terry Jan Reedy

5b8d2c3

2014-02-17 23:12:16 -0500

[diff] [blame]

1450

def test_iter_compat(self):

1451

u = Untokenizer()

1452

token = (NAME, 'Hello')

1453

tokens = [(ENCODING, 'utf-8'), token]

1454

u.compat(token, iter([]))

1455

self.assertEqual(u.tokens, ["Hello "])

1456

u = Untokenizer()

1457

self.assertEqual(u.untokenize(iter([token])), 'Hello ')

1458

u = Untokenizer()

1459

self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')

1460

self.assertEqual(u.encoding, 'utf-8')

1461

self.assertEqual(untokenize(iter(tokens)), b'Hello ')

1462

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1463

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1464

class TestRoundtrip(TestCase):

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1465

1466

def check_roundtrip(self, f):

1467

"""

1468

Test roundtrip for `untokenize`. `f` is an open file or a string.

1469

The source code in f is tokenized to both 5- and 2-tuples.

1470

Both sequences are converted back to source code via

1471

tokenize.untokenize(), and the latter tokenized again to 2-tuples.

1472

The test fails if the 3 pair tokenizations do not match.

1473

1474

When untokenize bugs are fixed, untokenize with 5-tuples should

1475

reproduce code that does not contain a backslash continuation

1476

following spaces. A proper test should test this.

1477

"""

1478

# Get source code and original tokenizations

1479

if isinstance(f, str):

1480

code = f.encode('utf-8')

else:

code = f.read()

f.close()

readline = iter(code.splitlines(keepends=True)).__next__

1485

tokens5 = list(tokenize(readline))

1486

tokens2 = [tok[:2] for tok in tokens5]

1487

# Reproduce tokens2 from pairs

1488

bytes_from2 = untokenize(tokens2)

1489

readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__

1490

tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]

1491

self.assertEqual(tokens2_from2, tokens2)

1492

# Reproduce tokens2 from 5-tuples

1493

bytes_from5 = untokenize(tokens5)

1494

readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__

1495

tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]

1496

self.assertEqual(tokens2_from5, tokens2)

1497

1498

def test_roundtrip(self):

1499

# There are some standard formatting practices that are easy to get right.

1500

1501

self.check_roundtrip("if x == 1:\n"

1502

" print(x)\n")

1503

self.check_roundtrip("# This is a comment\n"

1504

"# This also")

1505

1506

# Some people use different formatting conventions, which makes

1507

# untokenize a little trickier. Note that this test involves trailing

1508

# whitespace after the colon. Note that we use hex escapes to make the

1509

# two trailing blanks apparent in the expected output.

1510

1511

self.check_roundtrip("if x == 1 : \n"

1512

" print(x)\n")

1513

fn = support.findfile("tokenize_tests.txt")

1514

with open(fn, 'rb') as f:

1515

self.check_roundtrip(f)

1516

self.check_roundtrip("if x == 1:\n"

1517

" # A comment by itself.\n"

1518

" print(x) # Comment here, too.\n"

1519

" # Another comment.\n"

1520

"after_if = True\n")

1521

self.check_roundtrip("if (x # The comments need to go in the right place\n"

1522

" == 1):\n"

1523

" print('x==1')\n")

1524

self.check_roundtrip("class Test: # A comment here\n"

1525

" # A comment with weird indent\n"

1526

" after_com = 5\n"

1527

" def x(m): return m*5 # a one liner\n"

1528

" def y(m): # A whitespace after the colon\n"

1529

" return y*4 # 3-space indent\n")

1530

1531

# Some error-handling code

1532

self.check_roundtrip("try: import somemodule\n"

1533

"except ImportError: # comment\n"

1534

" print('Can not import' # comment2\n)"

1535

"else: print('Loaded')\n")

1536

1537

def test_continuation(self):

1538

# Balancing continuation

1539

self.check_roundtrip("a = (3,4, \n"

"5,6)\n"

"y = [3, 4,\n"

"5]\n"

"z = {'a': 5,\n"

"'b':15, 'c':True}\n"

1545

"x = len(y) + 5 - a[\n"

"3] - a[2]\n"

"+ len(z) - z[\n"

"'b']\n")

def test_backslash_continuation(self):

1551

# Backslash means line continuation, except for comments

1552

self.check_roundtrip("x=1+\\\n"

1553

"1\n"

1554

"# This is a comment\\\n"

1555

"# This also\n")

1556

self.check_roundtrip("# Comment \\\n"

1557

"x = 0")

1558

1559

def test_string_concatenation(self):

1560

# Two string literals on the same line

1561

self.check_roundtrip("'' ''")

1562

1563

def test_random_files(self):

1564

# Test roundtrip on random python modules.

1565

# pass the '-ucpu' option to process the full directory.

1566

1567

import glob, random

1568

fn = support.findfile("tokenize_tests.txt")

1569

tempdir = os.path.dirname(fn) or os.curdir

1570

testfiles = glob.glob(os.path.join(tempdir, "test*.py"))

1571

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

1572

# Tokenize is broken on test_pep3131.py because regular expressions are

1573

# broken on the obscure unicode identifiers in it. *sigh*

1574

# With roundtrip extended to test the 5-tuple mode of untokenize,

1575

# 7 more testfiles fail. Remove them also until the failure is diagnosed.

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1576

Zachary Ware

724f6a6

2016-09-09 12:55:37 -0700

[diff] [blame]

1577

testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1578

for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):

1579

testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)

1580

1581

if not support.is_resource_enabled("cpu"):

1582

testfiles = random.sample(testfiles, 10)

1583

1584

for testfile in testfiles:

1585

with open(testfile, 'rb') as f:

1586

with self.subTest(file=testfile):

1587

self.check_roundtrip(f)

1588

1589

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1590

def roundtrip(self, code):

1591

if isinstance(code, str):

1592

code = code.encode('utf-8')

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1593

return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1594

1595

def test_indentation_semantics_retained(self):

1596

"""

1597

Ensure that although whitespace might be mutated in a roundtrip,

1598

the semantic meaning of the indentation remains consistent.

1599

"""

1600

code = "if False:\n\tx=3\n\tx=3\n"

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1601

codelines = self.roundtrip(code).split('\n')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1602

self.assertEqual(codelines[1], codelines[2])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1603

self.check_roundtrip(code)

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1604

1605

Thomas Wouters

49fd7fa

2006-04-21 10:40:58 +0000

[diff] [blame]

1606

if __name__ == "__main__":

Brett Cannon