Blame - Lib/test/test_tokenize.py - platform/external/python/cpython3

2015-10-06 18:16:28 +0300

[diff] [blame]

1

from test import support

2

from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,

3

STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,

Ammar Askar

2018-07-06 06:21:05 -0400

[diff] [blame]

4

open as tokenize_open, Untokenizer, generate_tokens,

5

NEWLINE)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

6

from io import BytesIO

Stéphane Wirtel

90addd6

2017-07-25 15:33:53 +0200

[diff] [blame]

7

import unittest

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

8

from unittest import TestCase, mock

9

from test.test_grammar import (VALID_UNDERSCORE_LITERALS,

10

INVALID_UNDERSCORE_LITERALS)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

11

import os

12

import token

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

13

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

14

Ammar Askar

2018-07-06 06:21:05 -0400

[diff] [blame]

15

# Converts a source string into a list of textual representation

16

# of the tokens such as:

17

# ` NAME 'if' (1, 0) (1, 2)`

18

# to make writing tests easier.

19

def stringify_tokens_from_source(token_generator, source_string):

20

result = []

21

num_lines = len(source_string.splitlines())

22

missing_trailing_nl = source_string[-1] not in '\r\n'

23

24

for type, token, start, end, line in token_generator:

25

if type == ENDMARKER:

26

break

27

# Ignore the new line on the last line if the input lacks one

28

if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:

29

continue

30

type = tok_name[type]

31

result.append(f" {type:10} {token!r:13} {start} {end}")

return result

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

35

class TokenizeTest(TestCase):

36

# Tests for the tokenize module.

Jason R. Coombs

7cf3638

2015-06-20 19:13:50 -0400

[diff] [blame]

37

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

38

# The tests can be really simple. Given a small fragment of source

Ammar Askar

2018-07-06 06:21:05 -0400

[diff] [blame]

39

# code, print out a table with tokens. The ENDMARKER, ENCODING and

40

# final NEWLINE are omitted for brevity.

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

41

42

def check_tokenize(self, s, expected):

43

# Format the tokens in s in a table format.

Ammar Askar

2018-07-06 06:21:05 -0400

[diff] [blame]

44

# The ENDMARKER and final NEWLINE are omitted.

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

45

f = BytesIO(s.encode('utf-8'))

Ammar Askar

2018-07-06 06:21:05 -0400

[diff] [blame]

46

result = stringify_tokens_from_source(tokenize(f.readline), s)

47

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

48

self.assertEqual(result,

49

[" ENCODING 'utf-8' (0, 0) (0, 0)"] +

50

expected.rstrip().splitlines())

51

Ammar Askar

2018-07-06 06:21:05 -0400

[diff] [blame]

52

def test_implicit_newline(self):

53

# Make sure that the tokenizer puts in an implicit NEWLINE

54

# when the input lacks a trailing new line.

55

f = BytesIO("x".encode('utf-8'))

56

tokens = list(tokenize(f.readline))

57

self.assertEqual(tokens[-2].type, NEWLINE)

58

self.assertEqual(tokens[-1].type, ENDMARKER)

59

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

60

def test_basic(self):

61

self.check_tokenize("1 + 1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

62

NUMBER '1' (1, 0) (1, 1)

63

OP '+' (1, 2) (1, 3)

64

NUMBER '1' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

65

""")

66

self.check_tokenize("if False:\n"

67

" # NL\n"

Albert-Jan Nijburg

c471ca4

2017-05-24 12:31:57 +0100

[diff] [blame]

68

" \n"

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

69

" True = False # NEWLINE\n", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

70

NAME 'if' (1, 0) (1, 2)

71

NAME 'False' (1, 3) (1, 8)

72

OP ':' (1, 8) (1, 9)

73

NEWLINE '\\n' (1, 9) (1, 10)

74

COMMENT '# NL' (2, 4) (2, 8)

75

NL '\\n' (2, 8) (2, 9)

Albert-Jan Nijburg

c471ca4

2017-05-24 12:31:57 +0100

[diff] [blame]

76

NL '\\n' (3, 4) (3, 5)

77

INDENT ' ' (4, 0) (4, 4)

78

NAME 'True' (4, 4) (4, 8)

79

OP '=' (4, 9) (4, 10)

80

NAME 'False' (4, 11) (4, 16)

81

COMMENT '# NEWLINE' (4, 17) (4, 26)

82

NEWLINE '\\n' (4, 26) (4, 27)

83

DEDENT '' (5, 0) (5, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

84

""")

85

indent_error_file = b"""\

def k(x):

x += 2

x += 5

"""

readline = BytesIO(indent_error_file).readline

91

with self.assertRaisesRegex(IndentationError,

92

"unindent does not match any "

93

"outer indentation level"):

94

for tok in tokenize(readline):

95

pass

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

96

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

97

def test_int(self):

98

# Ordinary integers and binary operators

99

self.check_tokenize("0xff <= 255", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

100

NUMBER '0xff' (1, 0) (1, 4)

101

OP '<=' (1, 5) (1, 7)

102

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

103

""")

104

self.check_tokenize("0b10 <= 255", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

105

NUMBER '0b10' (1, 0) (1, 4)

106

OP '<=' (1, 5) (1, 7)

107

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

108

""")

109

self.check_tokenize("0o123 <= 0O123", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

110

NUMBER '0o123' (1, 0) (1, 5)

111

OP '<=' (1, 6) (1, 8)

112

NUMBER '0O123' (1, 9) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

113

""")

114

self.check_tokenize("1234567 > ~0x15", """\

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

115

NUMBER '1234567' (1, 0) (1, 7)

116

OP '>' (1, 8) (1, 9)

117

OP '~' (1, 10) (1, 11)

118

NUMBER '0x15' (1, 11) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

119

""")

120

self.check_tokenize("2134568 != 1231515", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

121

NUMBER '2134568' (1, 0) (1, 7)

122

OP '!=' (1, 8) (1, 10)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

123

NUMBER '1231515' (1, 11) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

124

""")

125

self.check_tokenize("(-124561-1) & 200000000", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

126

OP '(' (1, 0) (1, 1)

127

OP '-' (1, 1) (1, 2)

128

NUMBER '124561' (1, 2) (1, 8)

129

OP '-' (1, 8) (1, 9)

130

NUMBER '1' (1, 9) (1, 10)

131

OP ')' (1, 10) (1, 11)

132

OP '&' (1, 12) (1, 13)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

133

NUMBER '200000000' (1, 14) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

134

""")

135

self.check_tokenize("0xdeadbeef != -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

136

NUMBER '0xdeadbeef' (1, 0) (1, 10)

137

OP '!=' (1, 11) (1, 13)

138

OP '-' (1, 14) (1, 15)

139

NUMBER '1' (1, 15) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

140

""")

141

self.check_tokenize("0xdeadc0de & 12345", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

142

NUMBER '0xdeadc0de' (1, 0) (1, 10)

143

OP '&' (1, 11) (1, 12)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

144

NUMBER '12345' (1, 13) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

145

""")

146

self.check_tokenize("0xFF & 0x15 | 1234", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

147

NUMBER '0xFF' (1, 0) (1, 4)

148

OP '&' (1, 5) (1, 6)

149

NUMBER '0x15' (1, 7) (1, 11)

150

OP '|' (1, 12) (1, 13)

151

NUMBER '1234' (1, 14) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

152

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

153

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

154

def test_long(self):

155

# Long integers

156

self.check_tokenize("x = 0", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

157

NAME 'x' (1, 0) (1, 1)

158

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

159

NUMBER '0' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

160

""")

161

self.check_tokenize("x = 0xfffffffffff", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

162

NAME 'x' (1, 0) (1, 1)

163

OP '=' (1, 2) (1, 3)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

164

NUMBER '0xfffffffffff' (1, 4) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

165

""")

166

self.check_tokenize("x = 123141242151251616110", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

167

NAME 'x' (1, 0) (1, 1)

168

OP '=' (1, 2) (1, 3)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

169

NUMBER '123141242151251616110' (1, 4) (1, 25)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

170

""")

171

self.check_tokenize("x = -15921590215012591", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

172

NAME 'x' (1, 0) (1, 1)

173

OP '=' (1, 2) (1, 3)

174

OP '-' (1, 4) (1, 5)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

175

NUMBER '15921590215012591' (1, 5) (1, 22)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

176

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

177

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

178

def test_float(self):

179

# Floating point numbers

180

self.check_tokenize("x = 3.14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

181

NAME 'x' (1, 0) (1, 1)

182

OP '=' (1, 2) (1, 3)

183

NUMBER '3.14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

184

""")

185

self.check_tokenize("x = 314159.", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

186

NAME 'x' (1, 0) (1, 1)

187

OP '=' (1, 2) (1, 3)

188

NUMBER '314159.' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

189

""")

190

self.check_tokenize("x = .314159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

191

NAME 'x' (1, 0) (1, 1)

192

OP '=' (1, 2) (1, 3)

193

NUMBER '.314159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

194

""")

195

self.check_tokenize("x = 3e14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

196

NAME 'x' (1, 0) (1, 1)

197

OP '=' (1, 2) (1, 3)

198

NUMBER '3e14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

199

""")

200

self.check_tokenize("x = 3E123", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

201

NAME 'x' (1, 0) (1, 1)

202

OP '=' (1, 2) (1, 3)

203

NUMBER '3E123' (1, 4) (1, 9)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

204

""")

205

self.check_tokenize("x+y = 3e-1230", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

206

NAME 'x' (1, 0) (1, 1)

207

OP '+' (1, 1) (1, 2)

208

NAME 'y' (1, 2) (1, 3)

209

OP '=' (1, 4) (1, 5)

210

NUMBER '3e-1230' (1, 6) (1, 13)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

211

""")

212

self.check_tokenize("x = 3.14e159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

213

NAME 'x' (1, 0) (1, 1)

214

OP '=' (1, 2) (1, 3)

215

NUMBER '3.14e159' (1, 4) (1, 12)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

216

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

217

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

218

def test_underscore_literals(self):

219

def number_token(s):

220

f = BytesIO(s.encode('utf-8'))

221

for toktype, token, start, end, line in tokenize(f.readline):

222

if toktype == NUMBER:

223

return token

224

return 'invalid token'

225

for lit in VALID_UNDERSCORE_LITERALS:

226

if '(' in lit:

227

# this won't work with compound complex inputs

228

continue

229

self.assertEqual(number_token(lit), lit)

230

for lit in INVALID_UNDERSCORE_LITERALS:

231

self.assertNotEqual(number_token(lit), lit)

232

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

233

def test_string(self):

234

# String literals

235

self.check_tokenize("x = ''; y = \"\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

236

NAME 'x' (1, 0) (1, 1)

237

OP '=' (1, 2) (1, 3)

238

STRING "''" (1, 4) (1, 6)

239

OP ';' (1, 6) (1, 7)

240

NAME 'y' (1, 8) (1, 9)

241

OP '=' (1, 10) (1, 11)

242

STRING '""' (1, 12) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

243

""")

244

self.check_tokenize("x = '\"'; y = \"'\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

245

NAME 'x' (1, 0) (1, 1)

246

OP '=' (1, 2) (1, 3)

247

STRING '\\'"\\'' (1, 4) (1, 7)

248

OP ';' (1, 7) (1, 8)

249

NAME 'y' (1, 9) (1, 10)

250

OP '=' (1, 11) (1, 12)

251

STRING '"\\'"' (1, 13) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

252

""")

253

self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

254

NAME 'x' (1, 0) (1, 1)

255

OP '=' (1, 2) (1, 3)

256

STRING '"doesn\\'t "' (1, 4) (1, 14)

257

NAME 'shrink' (1, 14) (1, 20)

258

STRING '", does it"' (1, 20) (1, 31)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

259

""")

260

self.check_tokenize("x = 'abc' + 'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

261

NAME 'x' (1, 0) (1, 1)

262

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

263

STRING "'abc'" (1, 4) (1, 9)

264

OP '+' (1, 10) (1, 11)

265

STRING "'ABC'" (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

266

""")

267

self.check_tokenize('y = "ABC" + "ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

268

NAME 'y' (1, 0) (1, 1)

269

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

270

STRING '"ABC"' (1, 4) (1, 9)

271

OP '+' (1, 10) (1, 11)

272

STRING '"ABC"' (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

273

""")

274

self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

275

NAME 'x' (1, 0) (1, 1)

276

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

277

STRING "r'abc'" (1, 4) (1, 10)

278

OP '+' (1, 11) (1, 12)

279

STRING "r'ABC'" (1, 13) (1, 19)

280

OP '+' (1, 20) (1, 21)

281

STRING "R'ABC'" (1, 22) (1, 28)

282

OP '+' (1, 29) (1, 30)

283

STRING "R'ABC'" (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

284

""")

285

self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

286

NAME 'y' (1, 0) (1, 1)

287

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

288

STRING 'r"abc"' (1, 4) (1, 10)

289

OP '+' (1, 11) (1, 12)

290

STRING 'r"ABC"' (1, 13) (1, 19)

291

OP '+' (1, 20) (1, 21)

292

STRING 'R"ABC"' (1, 22) (1, 28)

293

OP '+' (1, 29) (1, 30)

294

STRING 'R"ABC"' (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

295

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

296

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

297

self.check_tokenize("u'abc' + U'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

298

STRING "u'abc'" (1, 0) (1, 6)

299

OP '+' (1, 7) (1, 8)

300

STRING "U'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

301

""")

302

self.check_tokenize('u"abc" + U"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

303

STRING 'u"abc"' (1, 0) (1, 6)

304

OP '+' (1, 7) (1, 8)

305

STRING 'U"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

306

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

307

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

308

self.check_tokenize("b'abc' + B'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

309

STRING "b'abc'" (1, 0) (1, 6)

310

OP '+' (1, 7) (1, 8)

311

STRING "B'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

312

""")

313

self.check_tokenize('b"abc" + B"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

314

STRING 'b"abc"' (1, 0) (1, 6)

315

OP '+' (1, 7) (1, 8)

316

STRING 'B"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

317

""")

318

self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

319

STRING "br'abc'" (1, 0) (1, 7)

320

OP '+' (1, 8) (1, 9)

321

STRING "bR'abc'" (1, 10) (1, 17)

322

OP '+' (1, 18) (1, 19)

323

STRING "Br'abc'" (1, 20) (1, 27)

324

OP '+' (1, 28) (1, 29)

325

STRING "BR'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

326

""")

327

self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

328

STRING 'br"abc"' (1, 0) (1, 7)

329

OP '+' (1, 8) (1, 9)

330

STRING 'bR"abc"' (1, 10) (1, 17)

331

OP '+' (1, 18) (1, 19)

332

STRING 'Br"abc"' (1, 20) (1, 27)

333

OP '+' (1, 28) (1, 29)

334

STRING 'BR"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

335

""")

336

self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

337

STRING "rb'abc'" (1, 0) (1, 7)

338

OP '+' (1, 8) (1, 9)

339

STRING "rB'abc'" (1, 10) (1, 17)

340

OP '+' (1, 18) (1, 19)

341

STRING "Rb'abc'" (1, 20) (1, 27)

342

OP '+' (1, 28) (1, 29)

343

STRING "RB'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

344

""")

345

self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

346

STRING 'rb"abc"' (1, 0) (1, 7)

347

OP '+' (1, 8) (1, 9)

348

STRING 'rB"abc"' (1, 10) (1, 17)

349

OP '+' (1, 18) (1, 19)

350

STRING 'Rb"abc"' (1, 20) (1, 27)

351

OP '+' (1, 28) (1, 29)

352

STRING 'RB"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

353

""")

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

354

# Check 0, 1, and 2 character string prefixes.

355

self.check_tokenize(r'"a\

356

de\

357

fg"', """\

358

STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)

359

""")

360

self.check_tokenize(r'u"a\

361

de"', """\

362

STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3)

363

""")

364

self.check_tokenize(r'rb"a\

365

d"', """\

366

STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2)

367

""")

368

self.check_tokenize(r'"""a\

369

b"""', """\

370

STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)

371

""")

372

self.check_tokenize(r'u"""a\

373

b"""', """\

374

STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)

375

""")

376

self.check_tokenize(r'rb"""a\

377

b\

378

c"""', """\

379

STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)

380

""")

Eric V. Smith

1c8222c

2015-10-26 04:37:55 -0400

[diff] [blame]

381

self.check_tokenize('f"abc"', """\

382

STRING 'f"abc"' (1, 0) (1, 6)

383

""")

384

self.check_tokenize('fR"a{b}c"', """\

385

STRING 'fR"a{b}c"' (1, 0) (1, 9)

386

""")

387

self.check_tokenize('f"""abc"""', """\

388

STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10)

389

""")

390

self.check_tokenize(r'f"abc\

391

def"', """\

392

STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4)

393

""")

394

self.check_tokenize(r'Rf"abc\

395

def"', """\

396

STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)

397

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

398

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

399

def test_function(self):

400

self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

401

NAME 'def' (1, 0) (1, 3)

402

NAME 'd22' (1, 4) (1, 7)

403

OP '(' (1, 7) (1, 8)

404

NAME 'a' (1, 8) (1, 9)

405

OP ',' (1, 9) (1, 10)

406

NAME 'b' (1, 11) (1, 12)

407

OP ',' (1, 12) (1, 13)

408

NAME 'c' (1, 14) (1, 15)

409

OP '=' (1, 15) (1, 16)

410

NUMBER '2' (1, 16) (1, 17)

411

OP ',' (1, 17) (1, 18)

412

NAME 'd' (1, 19) (1, 20)

413

OP '=' (1, 20) (1, 21)

414

NUMBER '2' (1, 21) (1, 22)

415

OP ',' (1, 22) (1, 23)

416

OP '*' (1, 24) (1, 25)

417

NAME 'k' (1, 25) (1, 26)

418

OP ')' (1, 26) (1, 27)

419

OP ':' (1, 27) (1, 28)

420

NAME 'pass' (1, 29) (1, 33)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

421

""")

422

self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

423

NAME 'def' (1, 0) (1, 3)

424

NAME 'd01v_' (1, 4) (1, 9)

425

OP '(' (1, 9) (1, 10)

426

NAME 'a' (1, 10) (1, 11)

427

OP '=' (1, 11) (1, 12)

428

NUMBER '1' (1, 12) (1, 13)

429

OP ',' (1, 13) (1, 14)

430

OP '*' (1, 15) (1, 16)

431

NAME 'k' (1, 16) (1, 17)

432

OP ',' (1, 17) (1, 18)

433

OP '**' (1, 19) (1, 21)

434

NAME 'w' (1, 21) (1, 22)

435

OP ')' (1, 22) (1, 23)

436

OP ':' (1, 23) (1, 24)

437

NAME 'pass' (1, 25) (1, 29)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

438

""")

Jim Fasarakis-Hilliard

d4914e9

2017-03-14 22:16:15 +0200

[diff] [blame]

439

self.check_tokenize("def d23(a: str, b: int=3) -> int: pass", """\

440

NAME 'def' (1, 0) (1, 3)

441

NAME 'd23' (1, 4) (1, 7)

442

OP '(' (1, 7) (1, 8)

443

NAME 'a' (1, 8) (1, 9)

444

OP ':' (1, 9) (1, 10)

445

NAME 'str' (1, 11) (1, 14)

446

OP ',' (1, 14) (1, 15)

447

NAME 'b' (1, 16) (1, 17)

448

OP ':' (1, 17) (1, 18)

449

NAME 'int' (1, 19) (1, 22)

450

OP '=' (1, 22) (1, 23)

451

NUMBER '3' (1, 23) (1, 24)

452

OP ')' (1, 24) (1, 25)

453

OP '->' (1, 26) (1, 28)

454

NAME 'int' (1, 29) (1, 32)

455

OP ':' (1, 32) (1, 33)

456

NAME 'pass' (1, 34) (1, 38)

457

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

458

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

459

def test_comparison(self):

460

# Comparison

461

self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "

462

"1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

463

NAME 'if' (1, 0) (1, 2)

464

NUMBER '1' (1, 3) (1, 4)

465

OP '<' (1, 5) (1, 6)

466

NUMBER '1' (1, 7) (1, 8)

467

OP '>' (1, 9) (1, 10)

468

NUMBER '1' (1, 11) (1, 12)

469

OP '==' (1, 13) (1, 15)

470

NUMBER '1' (1, 16) (1, 17)

471

OP '>=' (1, 18) (1, 20)

472

NUMBER '5' (1, 21) (1, 22)

473

OP '<=' (1, 23) (1, 25)

474

NUMBER '0x15' (1, 26) (1, 30)

475

OP '<=' (1, 31) (1, 33)

476

NUMBER '0x12' (1, 34) (1, 38)

477

OP '!=' (1, 39) (1, 41)

478

NUMBER '1' (1, 42) (1, 43)

479

NAME 'and' (1, 44) (1, 47)

480

NUMBER '5' (1, 48) (1, 49)

481

NAME 'in' (1, 50) (1, 52)

482

NUMBER '1' (1, 53) (1, 54)

483

NAME 'not' (1, 55) (1, 58)

484

NAME 'in' (1, 59) (1, 61)

485

NUMBER '1' (1, 62) (1, 63)

486

NAME 'is' (1, 64) (1, 66)

487

NUMBER '1' (1, 67) (1, 68)

488

NAME 'or' (1, 69) (1, 71)

489

NUMBER '5' (1, 72) (1, 73)

490

NAME 'is' (1, 74) (1, 76)

491

NAME 'not' (1, 77) (1, 80)

492

NUMBER '1' (1, 81) (1, 82)

493

OP ':' (1, 82) (1, 83)

494

NAME 'pass' (1, 84) (1, 88)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

495

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

496

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

497

def test_shift(self):

498

# Shift

499

self.check_tokenize("x = 1 << 1 >> 5", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

500

NAME 'x' (1, 0) (1, 1)

501

OP '=' (1, 2) (1, 3)

502

NUMBER '1' (1, 4) (1, 5)

503

OP '<<' (1, 6) (1, 8)

504

NUMBER '1' (1, 9) (1, 10)

505

OP '>>' (1, 11) (1, 13)

506

NUMBER '5' (1, 14) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

507

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

508

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

509

def test_additive(self):

510

# Additive

511

self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

512

NAME 'x' (1, 0) (1, 1)

513

OP '=' (1, 2) (1, 3)

514

NUMBER '1' (1, 4) (1, 5)

515

OP '-' (1, 6) (1, 7)

516

NAME 'y' (1, 8) (1, 9)

517

OP '+' (1, 10) (1, 11)

518

NUMBER '15' (1, 12) (1, 14)

519

OP '-' (1, 15) (1, 16)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

520

NUMBER '1' (1, 17) (1, 18)

521

OP '+' (1, 19) (1, 20)

522

NUMBER '0x124' (1, 21) (1, 26)

523

OP '+' (1, 27) (1, 28)

524

NAME 'z' (1, 29) (1, 30)

525

OP '+' (1, 31) (1, 32)

526

NAME 'a' (1, 33) (1, 34)

527

OP '[' (1, 34) (1, 35)

528

NUMBER '5' (1, 35) (1, 36)

529

OP ']' (1, 36) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

530

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

531

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

532

def test_multiplicative(self):

533

# Multiplicative

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

534

self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

535

NAME 'x' (1, 0) (1, 1)

536

OP '=' (1, 2) (1, 3)

537

NUMBER '1' (1, 4) (1, 5)

538

OP '//' (1, 5) (1, 7)

539

NUMBER '1' (1, 7) (1, 8)

540

OP '*' (1, 8) (1, 9)

541

NUMBER '1' (1, 9) (1, 10)

542

OP '/' (1, 10) (1, 11)

543

NUMBER '5' (1, 11) (1, 12)

544

OP '*' (1, 12) (1, 13)

545

NUMBER '12' (1, 13) (1, 15)

546

OP '%' (1, 15) (1, 16)

547

NUMBER '0x12' (1, 16) (1, 20)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

548

OP '@' (1, 20) (1, 21)

549

NUMBER '42' (1, 21) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

550

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

551

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

552

def test_unary(self):

553

# Unary

554

self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

555

OP '~' (1, 0) (1, 1)

556

NUMBER '1' (1, 1) (1, 2)

557

OP '^' (1, 3) (1, 4)

558

NUMBER '1' (1, 5) (1, 6)

559

OP '&' (1, 7) (1, 8)

560

NUMBER '1' (1, 9) (1, 10)

561

OP '|' (1, 11) (1, 12)

562

NUMBER '1' (1, 12) (1, 13)

563

OP '^' (1, 14) (1, 15)

564

OP '-' (1, 16) (1, 17)

565

NUMBER '1' (1, 17) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

566

""")

567

self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

568

OP '-' (1, 0) (1, 1)

569

NUMBER '1' (1, 1) (1, 2)

570

OP '*' (1, 2) (1, 3)

571

NUMBER '1' (1, 3) (1, 4)

572

OP '/' (1, 4) (1, 5)

573

NUMBER '1' (1, 5) (1, 6)

574

OP '+' (1, 6) (1, 7)

575

NUMBER '1' (1, 7) (1, 8)

576

OP '*' (1, 8) (1, 9)

577

NUMBER '1' (1, 9) (1, 10)

578

OP '//' (1, 10) (1, 12)

579

NUMBER '1' (1, 12) (1, 13)

580

OP '-' (1, 14) (1, 15)

581

OP '-' (1, 16) (1, 17)

582

OP '-' (1, 17) (1, 18)

583

OP '-' (1, 18) (1, 19)

584

NUMBER '1' (1, 19) (1, 20)

585

OP '**' (1, 20) (1, 22)

586

NUMBER '1' (1, 22) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

587

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

588

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

589

def test_selector(self):

590

# Selector

591

self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

592

NAME 'import' (1, 0) (1, 6)

593

NAME 'sys' (1, 7) (1, 10)

594

OP ',' (1, 10) (1, 11)

595

NAME 'time' (1, 12) (1, 16)

596

NEWLINE '\\n' (1, 16) (1, 17)

597

NAME 'x' (2, 0) (2, 1)

598

OP '=' (2, 2) (2, 3)

599

NAME 'sys' (2, 4) (2, 7)

600

OP '.' (2, 7) (2, 8)

601

NAME 'modules' (2, 8) (2, 15)

602

OP '[' (2, 15) (2, 16)

603

STRING "'time'" (2, 16) (2, 22)

604

OP ']' (2, 22) (2, 23)

605

OP '.' (2, 23) (2, 24)

606

NAME 'time' (2, 24) (2, 28)

607

OP '(' (2, 28) (2, 29)

608

OP ')' (2, 29) (2, 30)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

609

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

610

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

611

def test_method(self):

612

# Methods

613

self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

614

OP '@' (1, 0) (1, 1)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

615

NAME 'staticmethod' (1, 1) (1, 13)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

616

NEWLINE '\\n' (1, 13) (1, 14)

617

NAME 'def' (2, 0) (2, 3)

618

NAME 'foo' (2, 4) (2, 7)

619

OP '(' (2, 7) (2, 8)

620

NAME 'x' (2, 8) (2, 9)

621

OP ',' (2, 9) (2, 10)

622

NAME 'y' (2, 10) (2, 11)

623

OP ')' (2, 11) (2, 12)

624

OP ':' (2, 12) (2, 13)

625

NAME 'pass' (2, 14) (2, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

626

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

627

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

628

def test_tabs(self):

629

# Evil tabs

630

self.check_tokenize("def f():\n"

631

"\tif x\n"

632

" \tpass", """\

Benjamin Peterson

a0dfa82

2009-11-13 02:25:08 +0000

[diff] [blame]

633

NAME 'def' (1, 0) (1, 3)

634

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

639

INDENT '\\t' (2, 0) (2, 1)

640

NAME 'if' (2, 1) (2, 3)

641

NAME 'x' (2, 4) (2, 5)

642

NEWLINE '\\n' (2, 5) (2, 6)

643

INDENT ' \\t' (3, 0) (3, 9)

644

NAME 'pass' (3, 9) (3, 13)

645

DEDENT '' (4, 0) (4, 0)

646

DEDENT '' (4, 0) (4, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

647

""")

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

648

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

649

def test_non_ascii_identifiers(self):

650

# Non-ascii identifiers

651

self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

652

NAME 'Örter' (1, 0) (1, 5)

653

OP '=' (1, 6) (1, 7)

654

STRING "'places'" (1, 8) (1, 16)

655

NEWLINE '\\n' (1, 16) (1, 17)

656

NAME 'grün' (2, 0) (2, 4)

657

OP '=' (2, 5) (2, 6)

658

STRING "'green'" (2, 7) (2, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

659

""")

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

660

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

661

def test_unicode(self):

662

# Legacy unicode literals:

663

self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

664

NAME 'Örter' (1, 0) (1, 5)

665

OP '=' (1, 6) (1, 7)

666

STRING "u'places'" (1, 8) (1, 17)

667

NEWLINE '\\n' (1, 17) (1, 18)

668

NAME 'grün' (2, 0) (2, 4)

669

OP '=' (2, 5) (2, 6)

Christian Heimes

0b3847d

2012-06-20 11:17:58 +0200

[diff] [blame]

670

STRING "U'green'" (2, 7) (2, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

671

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

672

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

673

def test_async(self):

674

# Async/await extension:

675

self.check_tokenize("async = 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

676

NAME 'async' (1, 0) (1, 5)

677

OP '=' (1, 6) (1, 7)

678

NUMBER '1' (1, 8) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

679

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

680

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

681

self.check_tokenize("a = (async = 1)", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

682

NAME 'a' (1, 0) (1, 1)

683

OP '=' (1, 2) (1, 3)

684

OP '(' (1, 4) (1, 5)

685

NAME 'async' (1, 5) (1, 10)

686

OP '=' (1, 11) (1, 12)

687

NUMBER '1' (1, 13) (1, 14)

688

OP ')' (1, 14) (1, 15)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

689

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

690

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

691

self.check_tokenize("async()", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

692

NAME 'async' (1, 0) (1, 5)

693

OP '(' (1, 5) (1, 6)

694

OP ')' (1, 6) (1, 7)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

695

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

696

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

697

self.check_tokenize("class async(Bar):pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

698

NAME 'class' (1, 0) (1, 5)

699

NAME 'async' (1, 6) (1, 11)

700

OP '(' (1, 11) (1, 12)

701

NAME 'Bar' (1, 12) (1, 15)

702

OP ')' (1, 15) (1, 16)

703

OP ':' (1, 16) (1, 17)

704

NAME 'pass' (1, 17) (1, 21)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

705

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

706

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

707

self.check_tokenize("class async:pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

708

NAME 'class' (1, 0) (1, 5)

709

NAME 'async' (1, 6) (1, 11)

710

OP ':' (1, 11) (1, 12)

711

NAME 'pass' (1, 12) (1, 16)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

712

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

713

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

714

self.check_tokenize("await = 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

715

NAME 'await' (1, 0) (1, 5)

716

OP '=' (1, 6) (1, 7)

717

NUMBER '1' (1, 8) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

718

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

719

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

720

self.check_tokenize("foo.async", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

721

NAME 'foo' (1, 0) (1, 3)

722

OP '.' (1, 3) (1, 4)

723

NAME 'async' (1, 4) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

724

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

725

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

726

self.check_tokenize("async for a in b: pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

727

NAME 'async' (1, 0) (1, 5)

728

NAME 'for' (1, 6) (1, 9)

729

NAME 'a' (1, 10) (1, 11)

730

NAME 'in' (1, 12) (1, 14)

731

NAME 'b' (1, 15) (1, 16)

732

OP ':' (1, 16) (1, 17)

733

NAME 'pass' (1, 18) (1, 22)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

734

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

735

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

736

self.check_tokenize("async with a as b: pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

737

NAME 'async' (1, 0) (1, 5)

738

NAME 'with' (1, 6) (1, 10)

739

NAME 'a' (1, 11) (1, 12)

740

NAME 'as' (1, 13) (1, 15)

741

NAME 'b' (1, 16) (1, 17)

742

OP ':' (1, 17) (1, 18)

743

NAME 'pass' (1, 19) (1, 23)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

744

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

745

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

746

self.check_tokenize("async.foo", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

747

NAME 'async' (1, 0) (1, 5)

748

OP '.' (1, 5) (1, 6)

749

NAME 'foo' (1, 6) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

750

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

751

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

752

self.check_tokenize("async", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

753

NAME 'async' (1, 0) (1, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

754

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

755

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

756

self.check_tokenize("async\n#comment\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

757

NAME 'async' (1, 0) (1, 5)

758

NEWLINE '\\n' (1, 5) (1, 6)

759

COMMENT '#comment' (2, 0) (2, 8)

760

NL '\\n' (2, 8) (2, 9)

761

NAME 'await' (3, 0) (3, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

762

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

763

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

764

self.check_tokenize("async\n...\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

765

NAME 'async' (1, 0) (1, 5)

766

NEWLINE '\\n' (1, 5) (1, 6)

767

OP '...' (2, 0) (2, 3)

768

NEWLINE '\\n' (2, 3) (2, 4)

769

NAME 'await' (3, 0) (3, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

770

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

771

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

772

self.check_tokenize("async\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

773

NAME 'async' (1, 0) (1, 5)

774

NEWLINE '\\n' (1, 5) (1, 6)

775

NAME 'await' (2, 0) (2, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

776

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

777

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

778

self.check_tokenize("foo.async + 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

779

NAME 'foo' (1, 0) (1, 3)

780

OP '.' (1, 3) (1, 4)

781

NAME 'async' (1, 4) (1, 9)

782

OP '+' (1, 10) (1, 11)

783

NUMBER '1' (1, 12) (1, 13)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

784

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

785

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

786

self.check_tokenize("async def foo(): pass", """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

787

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

788

NAME 'def' (1, 6) (1, 9)

789

NAME 'foo' (1, 10) (1, 13)

790

OP '(' (1, 13) (1, 14)

791

OP ')' (1, 14) (1, 15)

792

OP ':' (1, 15) (1, 16)

793

NAME 'pass' (1, 17) (1, 21)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

794

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

795

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

796

self.check_tokenize('''\

async def foo():

def foo(await):

await = 1

if 1:

await

async += 1

''', """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

804

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

805

NAME 'def' (1, 6) (1, 9)

806

NAME 'foo' (1, 10) (1, 13)

807

OP '(' (1, 13) (1, 14)

808

OP ')' (1, 14) (1, 15)

809

OP ':' (1, 15) (1, 16)

810

NEWLINE '\\n' (1, 16) (1, 17)

811

INDENT ' ' (2, 0) (2, 2)

812

NAME 'def' (2, 2) (2, 5)

813

NAME 'foo' (2, 6) (2, 9)

814

OP '(' (2, 9) (2, 10)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

815

NAME 'await' (2, 10) (2, 15)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

816

OP ')' (2, 15) (2, 16)

817

OP ':' (2, 16) (2, 17)

818

NEWLINE '\\n' (2, 17) (2, 18)

819

INDENT ' ' (3, 0) (3, 4)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

820

NAME 'await' (3, 4) (3, 9)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

821

OP '=' (3, 10) (3, 11)

822

NUMBER '1' (3, 12) (3, 13)

823

NEWLINE '\\n' (3, 13) (3, 14)

824

DEDENT '' (4, 2) (4, 2)

825

NAME 'if' (4, 2) (4, 4)

826

NUMBER '1' (4, 5) (4, 6)

827

OP ':' (4, 6) (4, 7)

828

NEWLINE '\\n' (4, 7) (4, 8)

829

INDENT ' ' (5, 0) (5, 4)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

830

NAME 'await' (5, 4) (5, 9)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

831

NEWLINE '\\n' (5, 9) (5, 10)

832

DEDENT '' (6, 0) (6, 0)

833

DEDENT '' (6, 0) (6, 0)

834

NAME 'async' (6, 0) (6, 5)

835

OP '+=' (6, 6) (6, 8)

836

NUMBER '1' (6, 9) (6, 10)

837

NEWLINE '\\n' (6, 10) (6, 11)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

838

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

839

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

840

self.check_tokenize('''\

841

async def foo():

842

async for i in 1: pass''', """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

843

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

844

NAME 'def' (1, 6) (1, 9)

845

NAME 'foo' (1, 10) (1, 13)

846

OP '(' (1, 13) (1, 14)

847

OP ')' (1, 14) (1, 15)

848

OP ':' (1, 15) (1, 16)

849

NEWLINE '\\n' (1, 16) (1, 17)

850

INDENT ' ' (2, 0) (2, 2)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

851

NAME 'async' (2, 2) (2, 7)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

852

NAME 'for' (2, 8) (2, 11)

853

NAME 'i' (2, 12) (2, 13)

854

NAME 'in' (2, 14) (2, 16)

855

NUMBER '1' (2, 17) (2, 18)

856

OP ':' (2, 18) (2, 19)

857

NAME 'pass' (2, 20) (2, 24)

858

DEDENT '' (3, 0) (3, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

859

""")

Yury Selivanov

8fb307c

2015-07-22 13:33:45 +0300

[diff] [blame]

860

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

861

self.check_tokenize('''async def foo(async): await''', """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

862

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

8fb307c

2015-07-22 13:33:45 +0300

[diff] [blame]

863

NAME 'def' (1, 6) (1, 9)

864

NAME 'foo' (1, 10) (1, 13)

865

OP '(' (1, 13) (1, 14)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

866

NAME 'async' (1, 14) (1, 19)

Yury Selivanov

8fb307c

2015-07-22 13:33:45 +0300

[diff] [blame]

867

OP ')' (1, 19) (1, 20)

868

OP ':' (1, 20) (1, 21)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

869

NAME 'await' (1, 22) (1, 27)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

870

""")

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

871

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

872

self.check_tokenize('''\

def f():

def baz(): pass

async def bar(): pass

877

878

await = 2''', """\

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

879

NAME 'def' (1, 0) (1, 3)

880

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

885

NL '\\n' (2, 0) (2, 1)

886

INDENT ' ' (3, 0) (3, 2)

887

NAME 'def' (3, 2) (3, 5)

888

NAME 'baz' (3, 6) (3, 9)

889

OP '(' (3, 9) (3, 10)

890

OP ')' (3, 10) (3, 11)

891

OP ':' (3, 11) (3, 12)

892

NAME 'pass' (3, 13) (3, 17)

893

NEWLINE '\\n' (3, 17) (3, 18)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

894

NAME 'async' (4, 2) (4, 7)

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

895

NAME 'def' (4, 8) (4, 11)

896

NAME 'bar' (4, 12) (4, 15)

897

OP '(' (4, 15) (4, 16)

898

OP ')' (4, 16) (4, 17)

899

OP ':' (4, 17) (4, 18)

900

NAME 'pass' (4, 19) (4, 23)

901

NEWLINE '\\n' (4, 23) (4, 24)

902

NL '\\n' (5, 0) (5, 1)

903

NAME 'await' (6, 2) (6, 7)

904

OP '=' (6, 8) (6, 9)

905

NUMBER '2' (6, 10) (6, 11)

906

DEDENT '' (7, 0) (7, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

907

""")

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

908

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

909

self.check_tokenize('''\

async def f():

def baz(): pass

async def bar(): pass

914

915

await = 2''', """\

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

916

NAME 'async' (1, 0) (1, 5)

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

917

NAME 'def' (1, 6) (1, 9)

918

NAME 'f' (1, 10) (1, 11)

919

OP '(' (1, 11) (1, 12)

920

OP ')' (1, 12) (1, 13)

921

OP ':' (1, 13) (1, 14)

922

NEWLINE '\\n' (1, 14) (1, 15)

923

NL '\\n' (2, 0) (2, 1)

924

INDENT ' ' (3, 0) (3, 2)

925

NAME 'def' (3, 2) (3, 5)

926

NAME 'baz' (3, 6) (3, 9)

927

OP '(' (3, 9) (3, 10)

928

OP ')' (3, 10) (3, 11)

929

OP ':' (3, 11) (3, 12)

930

NAME 'pass' (3, 13) (3, 17)

931

NEWLINE '\\n' (3, 17) (3, 18)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

932

NAME 'async' (4, 2) (4, 7)

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

933

NAME 'def' (4, 8) (4, 11)

934

NAME 'bar' (4, 12) (4, 15)

935

OP '(' (4, 15) (4, 16)

936

OP ')' (4, 16) (4, 17)

937

OP ':' (4, 17) (4, 18)

938

NAME 'pass' (4, 19) (4, 23)

939

NEWLINE '\\n' (4, 23) (4, 24)

940

NL '\\n' (5, 0) (5, 1)

Jelle Zijlstra

2017-10-05 20:24:46 -0700

[diff] [blame]

941

NAME 'await' (6, 2) (6, 7)

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

942

OP '=' (6, 8) (6, 9)

943

NUMBER '2' (6, 10) (6, 11)

944

DEDENT '' (7, 0) (7, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

945

""")

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

946

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

947

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

948

def decistmt(s):

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

949

result = []

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

950

g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

951

for toknum, tokval, _, _, _ in g:

952

if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens

result.extend([

(NAME, 'Decimal'),

(OP, '('),

(STRING, repr(tokval)),

(OP, ')')

])

else:

result.append((toknum, tokval))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

961

return untokenize(result).decode('utf-8')

962

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

963

class TestMisc(TestCase):

964

965

def test_decistmt(self):

966

# Substitute Decimals for floats in a string of statements.

967

# This is an example from the docs.

968

969

from decimal import Decimal

970

s = '+21.3e-5*-.1234/81.7'

971

self.assertEqual(decistmt(s),

972

"+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")

973

974

# The format of the exponent is inherited from the platform C library.

975

# Known cases are "e-007" (Windows) and "e-07" (not Windows). Since

976

# we're only showing 11 digits, and the 12th isn't close to 5, the

977

# rest of the output should be platform-independent.

978

self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')

979

980

# Output from calculations with Decimal should be identical across all

981

# platforms.

982

self.assertEqual(eval(decistmt(s)),

983

Decimal('-3.217160342717258261933904529E-7'))

984

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

985

986

class TestTokenizerAdheresToPep0263(TestCase):

987

"""

988

Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.

989

"""

990

991

def _testFile(self, filename):

992

path = os.path.join(os.path.dirname(__file__), filename)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

993

TestRoundtrip.check_roundtrip(self, open(path, 'rb'))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

994

995

def test_utf8_coding_cookie_and_no_utf8_bom(self):

Ned Deily

2ea6fcc

2011-07-19 16:15:27 -0700

[diff] [blame]

996

f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

997

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

998

999

def test_latin1_coding_cookie_and_utf8_bom(self):

1000

"""

1001

As per PEP 0263, if a file starts with a utf-8 BOM signature, the only

1002

allowed encoding for the comment is 'utf-8'. The text file used in

1003

this test starts with a BOM signature, but specifies latin1 as the

1004

coding, so verify that a SyntaxError is raised, which matches the

1005

behaviour of the interpreter when it encounters a similar condition.

1006

"""

1007

f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'

Benjamin Peterson

c9c0f20

2009-06-30 23:06:06 +0000

[diff] [blame]

1008

self.assertRaises(SyntaxError, self._testFile, f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1009

1010

def test_no_coding_cookie_and_utf8_bom(self):

1011

f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1012

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1013

1014

def test_utf8_coding_cookie_and_utf8_bom(self):

1015

f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1016

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1017

Florent Xicluna

11f0b41

2012-07-07 12:13:35 +0200

[diff] [blame]

1018

def test_bad_coding_cookie(self):

1019

self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')

1020

self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')

1021

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1022

1023

class Test_Tokenize(TestCase):

1024

1025

def test__tokenize_decodes_with_specified_encoding(self):

1026

literal = '"ЉЊЈЁЂ"'

1027

line = literal.encode('utf-8')

first = False

def readline():

nonlocal first

if not first:

first = True

return line

else:

return b''

Ammar Askar

2018-07-06 06:21:05 -0400

[diff] [blame]

1037

# skip the initial encoding token and the end tokens

1038

tokens = list(_tokenize(readline, encoding='utf-8'))[1:-2]

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1039

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1040

self.assertEqual(tokens, expected_tokens,

1041

"bytes not decoded with encoding")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1042

1043

def test__tokenize_does_not_decode_with_encoding_none(self):

literal = '"ЉЊЈЁЂ"'

first = False

def readline():

nonlocal first

if not first:

first = True

return literal

else:

return b''

Ammar Askar

2018-07-06 06:21:05 -0400

[diff] [blame]

1054

# skip the end tokens

1055

tokens = list(_tokenize(readline, encoding=None))[:-2]

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1056

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1057

self.assertEqual(tokens, expected_tokens,

1058

"string not tokenized when encoding is None")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1059

1060

1061

class TestDetectEncoding(TestCase):

1062

1063

def get_readline(self, lines):

index = 0

def readline():

nonlocal index

if index == len(lines):

raise StopIteration

line = lines[index]

index += 1

return line

return readline

def test_no_bom_no_encoding_cookie(self):

1075

lines = (

1076

b'# something\n',

1077

b'print(something)\n',

1078

b'do_something(else)\n'

1079

)

1080

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1081

self.assertEqual(encoding, 'utf-8')

1082

self.assertEqual(consumed_lines, list(lines[:2]))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1083

1084

def test_bom_no_cookie(self):

1085

lines = (

1086

b'\xef\xbb\xbf# something\n',

1087

b'print(something)\n',

1088

b'do_something(else)\n'

1089

)

1090

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1091

self.assertEqual(encoding, 'utf-8-sig')

1092

self.assertEqual(consumed_lines,

1093

[b'# something\n', b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1094

1095

def test_cookie_first_line_no_bom(self):

1096

lines = (

1097

b'# -*- coding: latin-1 -*-\n',

1098

b'print(something)\n',

1099

b'do_something(else)\n'

1100

)

1101

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1102

self.assertEqual(encoding, 'iso-8859-1')

1103

self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1104

1105

def test_matched_bom_and_cookie_first_line(self):

1106

lines = (

1107

b'\xef\xbb\xbf# coding=utf-8\n',

1108

b'print(something)\n',

1109

b'do_something(else)\n'

1110

)

1111

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1112

self.assertEqual(encoding, 'utf-8-sig')

1113

self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1114

1115

def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):

1116

lines = (

1117

b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',

1118

b'print(something)\n',

1119

b'do_something(else)\n'

1120

)

1121

readline = self.get_readline(lines)

1122

self.assertRaises(SyntaxError, detect_encoding, readline)

1123

1124

def test_cookie_second_line_no_bom(self):

1125

lines = (

1126

b'#! something\n',

1127

b'# vim: set fileencoding=ascii :\n',

1128

b'print(something)\n',

1129

b'do_something(else)\n'

1130

)

1131

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1132

self.assertEqual(encoding, 'ascii')

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1133

expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1134

self.assertEqual(consumed_lines, expected)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1135

1136

def test_matched_bom_and_cookie_second_line(self):

1137

lines = (

1138

b'\xef\xbb\xbf#! something\n',

1139

b'f# coding=utf-8\n',

1140

b'print(something)\n',

1141

b'do_something(else)\n'

1142

)

1143

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1144

self.assertEqual(encoding, 'utf-8-sig')

1145

self.assertEqual(consumed_lines,

1146

[b'#! something\n', b'f# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1147

1148

def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):

1149

lines = (

1150

b'\xef\xbb\xbf#! something\n',

1151

b'# vim: set fileencoding=ascii :\n',

1152

b'print(something)\n',

1153

b'do_something(else)\n'

1154

)

1155

readline = self.get_readline(lines)

1156

self.assertRaises(SyntaxError, detect_encoding, readline)

1157

Serhiy Storchaka

768c16c

2014-01-09 18:36:09 +0200

[diff] [blame]

1158

def test_cookie_second_line_noncommented_first_line(self):

1159

lines = (

1160

b"print('\xc2\xa3')\n",

1161

b'# vim: set fileencoding=iso8859-15 :\n',

1162

b"print('\xe2\x82\xac')\n"

1163

)

1164

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1165

self.assertEqual(encoding, 'utf-8')

1166

expected = [b"print('\xc2\xa3')\n"]

1167

self.assertEqual(consumed_lines, expected)

1168

1169

def test_cookie_second_line_commented_first_line(self):

1170

lines = (

1171

b"#print('\xc2\xa3')\n",

1172

b'# vim: set fileencoding=iso8859-15 :\n',

1173

b"print('\xe2\x82\xac')\n"

1174

)

1175

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1176

self.assertEqual(encoding, 'iso8859-15')

1177

expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']

1178

self.assertEqual(consumed_lines, expected)

1179

1180

def test_cookie_second_line_empty_first_line(self):

1181

lines = (

1182

b'\n',

1183

b'# vim: set fileencoding=iso8859-15 :\n',

1184

b"print('\xe2\x82\xac')\n"

1185

)

1186

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1187

self.assertEqual(encoding, 'iso8859-15')

1188

expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']

1189

self.assertEqual(consumed_lines, expected)

1190

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1191

def test_latin1_normalization(self):

1192

# See get_normal_name() in tokenizer.c.

1193

encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",

1194

"iso-8859-1-unix", "iso-latin-1-mac")

1195

for encoding in encodings:

1196

for rep in ("-", "_"):

1197

enc = encoding.replace("-", rep)

1198

lines = (b"#!/usr/bin/python\n",

1199

b"# coding: " + enc.encode("ascii") + b"\n",

1200

b"print(things)\n",

1201

b"do_something += 4\n")

1202

rl = self.get_readline(lines)

1203

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1204

self.assertEqual(found, "iso-8859-1")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1205

Martin v. Löwis

63674f4

2012-04-20 14:36:47 +0200

[diff] [blame]

1206

def test_syntaxerror_latin1(self):

1207

# Issue 14629: need to raise SyntaxError if the first

1208

# line(s) have non-UTF-8 characters

1209

lines = (

1210

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1211

)

1212

readline = self.get_readline(lines)

1213

self.assertRaises(SyntaxError, detect_encoding, readline)

1214

1215

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1216

def test_utf8_normalization(self):

1217

# See get_normal_name() in tokenizer.c.

1218

encodings = ("utf-8", "utf-8-mac", "utf-8-unix")

1219

for encoding in encodings:

1220

for rep in ("-", "_"):

1221

enc = encoding.replace("-", rep)

1222

lines = (b"#!/usr/bin/python\n",

1223

b"# coding: " + enc.encode("ascii") + b"\n",

1224

b"1 + 3\n")

1225

rl = self.get_readline(lines)

1226

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1227

self.assertEqual(found, "utf-8")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1228

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1229

def test_short_files(self):

1230

readline = self.get_readline((b'print(something)\n',))

1231

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1232

self.assertEqual(encoding, 'utf-8')

1233

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1234

1235

encoding, consumed_lines = detect_encoding(self.get_readline(()))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1236

self.assertEqual(encoding, 'utf-8')

1237

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1238

1239

readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))

1240

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1241

self.assertEqual(encoding, 'utf-8-sig')

1242

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1243

1244

readline = self.get_readline((b'\xef\xbb\xbf',))

1245

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1246

self.assertEqual(encoding, 'utf-8-sig')

1247

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1248

Benjamin Peterson

433f32c

2008-12-12 01:25:05 +0000

[diff] [blame]

1249

readline = self.get_readline((b'# coding: bad\n',))

1250

self.assertRaises(SyntaxError, detect_encoding, readline)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1251

Serhiy Storchaka

dafea85

2013-09-16 23:51:56 +0300

[diff] [blame]

1252

def test_false_encoding(self):

1253

# Issue 18873: "Encoding" detected in non-comment lines

1254

readline = self.get_readline((b'print("#coding=fake")',))

1255

encoding, consumed_lines = detect_encoding(readline)

1256

self.assertEqual(encoding, 'utf-8')

1257

self.assertEqual(consumed_lines, [b'print("#coding=fake")'])

1258

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1259

def test_open(self):

1260

filename = support.TESTFN + '.py'

1261

self.addCleanup(support.unlink, filename)

1262

1263

# test coding cookie

1264

for encoding in ('iso-8859-15', 'utf-8'):

1265

with open(filename, 'w', encoding=encoding) as fp:

1266

print("# coding: %s" % encoding, file=fp)

1267

print("print('euro:\u20ac')", file=fp)

1268

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1269

self.assertEqual(fp.encoding, encoding)

1270

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1271

1272

# test BOM (no coding cookie)

1273

with open(filename, 'w', encoding='utf-8-sig') as fp:

1274

print("print('euro:\u20ac')", file=fp)

1275

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1276

self.assertEqual(fp.encoding, 'utf-8-sig')

1277

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1278

Brett Cannon

c33f3f2

2012-04-20 13:23:54 -0400

[diff] [blame]

1279

def test_filename_in_exception(self):

1280

# When possible, include the file name in the exception.

1281

path = 'some_file_path'

1282

lines = (

1283

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1284

)

1285

class Bunk:

1286

def __init__(self, lines, path):

self.name = path

self._lines = lines

self._index = 0

def readline(self):

if self._index == len(lines):

1293

raise StopIteration

1294

line = lines[self._index]

self._index += 1

return line

with self.assertRaises(SyntaxError):

1299

ins = Bunk(lines, path)

1300

# Make sure lacking a name isn't an issue.

1301

del ins.name

1302

detect_encoding(ins.readline)

1303

with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):

1304

ins = Bunk(lines, path)

1305

detect_encoding(ins.readline)

1306

Victor Stinner

387729e

2015-05-26 00:43:58 +0200

[diff] [blame]

1307

def test_open_error(self):

1308

# Issue #23840: open() must close the binary file on error

1309

m = BytesIO(b'#coding:xxx')

1310

with mock.patch('tokenize._builtin_open', return_value=m):

1311

self.assertRaises(SyntaxError, tokenize_open, 'foobar')

1312

self.assertTrue(m.closed)

1313

1314

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1315

class TestTokenize(TestCase):

1316

1317

def test_tokenize(self):

1318

import tokenize as tokenize_module

1319

encoding = object()

1320

encoding_used = None

1321

def mock_detect_encoding(readline):

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1322

return encoding, [b'first', b'second']

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1323

1324

def mock__tokenize(readline, encoding):

1325

nonlocal encoding_used

1326

encoding_used = encoding

1327

out = []

1328

while True:

1329

next_line = readline()

1330

if next_line:

1331

out.append(next_line)

continue

return out

counter = 0

def mock_readline():

nonlocal counter

counter += 1

if counter == 5:

return b''

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1341

return str(counter).encode()

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1342

1343

orig_detect_encoding = tokenize_module.detect_encoding

1344

orig__tokenize = tokenize_module._tokenize

1345

tokenize_module.detect_encoding = mock_detect_encoding

1346

tokenize_module._tokenize = mock__tokenize

1347

try:

1348

results = tokenize(mock_readline)

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1349

self.assertEqual(list(results),

1350

[b'first', b'second', b'1', b'2', b'3', b'4'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1351

finally:

1352

tokenize_module.detect_encoding = orig_detect_encoding

1353

tokenize_module._tokenize = orig__tokenize

1354

1355

self.assertTrue(encoding_used, encoding)

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

1356

Yury Selivanov

8085b80

2015-05-18 12:50:52 -0400

[diff] [blame]

1357

def test_oneline_defs(self):

1358

buf = []

1359

for i in range(500):

1360

buf.append('def i{i}(): return {i}'.format(i=i))

buf.append('OK')

buf = '\n'.join(buf)

# Test that 500 consequent, one-line defs is OK

1365

toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))

Ammar Askar

2018-07-06 06:21:05 -0400

[diff] [blame]

1366

self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER

1367

# [-2] is always NEWLINE

Yury Selivanov

8085b80

2015-05-18 12:50:52 -0400

[diff] [blame]

1368

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1369

def assertExactTypeEqual(self, opstr, *optypes):

1370

tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))

1371

num_optypes = len(optypes)

Ammar Askar

2018-07-06 06:21:05 -0400

[diff] [blame]

1372

self.assertEqual(len(tokens), 3 + num_optypes)

Albert-Jan Nijburg

fc354f0

2017-05-31 15:00:21 +0100

[diff] [blame]

1373

self.assertEqual(tok_name[tokens[0].exact_type],

1374

tok_name[ENCODING])

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1375

for i in range(num_optypes):

Albert-Jan Nijburg

fc354f0

2017-05-31 15:00:21 +0100

[diff] [blame]

1376

self.assertEqual(tok_name[tokens[i + 1].exact_type],

1377

tok_name[optypes[i]])

1378

self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type],

Ammar Askar

2018-07-06 06:21:05 -0400

[diff] [blame]

1379

tok_name[token.NEWLINE])

1380

self.assertEqual(tok_name[tokens[2 + num_optypes].exact_type],

Albert-Jan Nijburg

fc354f0

2017-05-31 15:00:21 +0100

[diff] [blame]

1381

tok_name[token.ENDMARKER])

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1382

1383

def test_exact_type(self):

1384

self.assertExactTypeEqual('()', token.LPAR, token.RPAR)

1385

self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)

1386

self.assertExactTypeEqual(':', token.COLON)

1387

self.assertExactTypeEqual(',', token.COMMA)

1388

self.assertExactTypeEqual(';', token.SEMI)

1389

self.assertExactTypeEqual('+', token.PLUS)

1390

self.assertExactTypeEqual('-', token.MINUS)

1391

self.assertExactTypeEqual('*', token.STAR)

1392

self.assertExactTypeEqual('/', token.SLASH)

1393

self.assertExactTypeEqual('|', token.VBAR)

1394

self.assertExactTypeEqual('&', token.AMPER)

1395

self.assertExactTypeEqual('<', token.LESS)

1396

self.assertExactTypeEqual('>', token.GREATER)

1397

self.assertExactTypeEqual('=', token.EQUAL)

1398

self.assertExactTypeEqual('.', token.DOT)

1399

self.assertExactTypeEqual('%', token.PERCENT)

1400

self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)

1401

self.assertExactTypeEqual('==', token.EQEQUAL)

1402

self.assertExactTypeEqual('!=', token.NOTEQUAL)

1403

self.assertExactTypeEqual('<=', token.LESSEQUAL)

1404

self.assertExactTypeEqual('>=', token.GREATEREQUAL)

1405

self.assertExactTypeEqual('~', token.TILDE)

1406

self.assertExactTypeEqual('^', token.CIRCUMFLEX)

1407

self.assertExactTypeEqual('<<', token.LEFTSHIFT)

1408

self.assertExactTypeEqual('>>', token.RIGHTSHIFT)

1409

self.assertExactTypeEqual('**', token.DOUBLESTAR)

1410

self.assertExactTypeEqual('+=', token.PLUSEQUAL)

1411

self.assertExactTypeEqual('-=', token.MINEQUAL)

1412

self.assertExactTypeEqual('*=', token.STAREQUAL)

1413

self.assertExactTypeEqual('/=', token.SLASHEQUAL)

1414

self.assertExactTypeEqual('%=', token.PERCENTEQUAL)

1415

self.assertExactTypeEqual('&=', token.AMPEREQUAL)

1416

self.assertExactTypeEqual('|=', token.VBAREQUAL)

1417

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1418

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1419

self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)

1420

self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)

1421

self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)

1422

self.assertExactTypeEqual('//', token.DOUBLESLASH)

1423

self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)

Jim Fasarakis-Hilliard

d4914e9

2017-03-14 22:16:15 +0200

[diff] [blame]

1424

self.assertExactTypeEqual('...', token.ELLIPSIS)

1425

self.assertExactTypeEqual('->', token.RARROW)

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1426

self.assertExactTypeEqual('@', token.AT)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

1427

self.assertExactTypeEqual('@=', token.ATEQUAL)

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

1428

1429

self.assertExactTypeEqual('a**2+b**2==c**2',

1430

NAME, token.DOUBLESTAR, NUMBER,

1431

token.PLUS,

1432

NAME, token.DOUBLESTAR, NUMBER,

1433

token.EQEQUAL,

1434

NAME, token.DOUBLESTAR, NUMBER)

1435

self.assertExactTypeEqual('{1, 2, 3}',

1436

token.LBRACE,

1437

token.NUMBER, token.COMMA,

1438

token.NUMBER, token.COMMA,

1439

token.NUMBER,

1440

token.RBRACE)

1441

self.assertExactTypeEqual('^(x & 0x1)',

1442

token.CIRCUMFLEX,

1443

token.LPAR,

1444

token.NAME, token.AMPER, token.NUMBER,

1445

token.RPAR)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1446

Ezio Melotti

fafa8b7

2012-11-03 17:46:51 +0200

[diff] [blame]

1447

def test_pathological_trailing_whitespace(self):

1448

# See http://bugs.python.org/issue16152

1449

self.assertExactTypeEqual('@ ', token.AT)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1450

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1451

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1452

class UntokenizeTest(TestCase):

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1453

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1454

def test_bad_input_order(self):

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1455

# raise if previous row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

u = Untokenizer()

u.prev_row = 2

u.prev_col = 2

with self.assertRaises(ValueError) as cm:

1460

u.add_whitespace((1,3))

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1461

self.assertEqual(cm.exception.args[0],

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1462

'start (1,3) precedes previous end (2,2)')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1463

# raise if previous column in row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1464

self.assertRaises(ValueError, u.add_whitespace, (2,1))

1465

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1466

def test_backslash_continuation(self):

1467

# The problem is that <whitespace>\<newline> leaves no token

u = Untokenizer()

u.prev_row = 1

u.prev_col = 1

u.tokens = []

u.add_whitespace((2, 0))

1473

self.assertEqual(u.tokens, ['\\\n'])

1474

u.prev_row = 2

1475

u.add_whitespace((4, 4))

1476

self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1477

TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1478

Terry Jan Reedy

5b8d2c3

2014-02-17 23:12:16 -0500

[diff] [blame]

1479

def test_iter_compat(self):

1480

u = Untokenizer()

1481

token = (NAME, 'Hello')

1482

tokens = [(ENCODING, 'utf-8'), token]

1483

u.compat(token, iter([]))

1484

self.assertEqual(u.tokens, ["Hello "])

1485

u = Untokenizer()

1486

self.assertEqual(u.untokenize(iter([token])), 'Hello ')

1487

u = Untokenizer()

1488

self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')

1489

self.assertEqual(u.encoding, 'utf-8')

1490

self.assertEqual(untokenize(iter(tokens)), b'Hello ')

1491

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1492

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1493

class TestRoundtrip(TestCase):

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1494

1495

def check_roundtrip(self, f):

1496

"""

1497

Test roundtrip for `untokenize`. `f` is an open file or a string.

1498

The source code in f is tokenized to both 5- and 2-tuples.

1499

Both sequences are converted back to source code via

1500

tokenize.untokenize(), and the latter tokenized again to 2-tuples.

1501

The test fails if the 3 pair tokenizations do not match.

1502

1503

When untokenize bugs are fixed, untokenize with 5-tuples should

1504

reproduce code that does not contain a backslash continuation

1505

following spaces. A proper test should test this.

1506

"""

1507

# Get source code and original tokenizations

1508

if isinstance(f, str):

1509

code = f.encode('utf-8')

else:

code = f.read()

f.close()

readline = iter(code.splitlines(keepends=True)).__next__

1514

tokens5 = list(tokenize(readline))

1515

tokens2 = [tok[:2] for tok in tokens5]

1516

# Reproduce tokens2 from pairs

1517

bytes_from2 = untokenize(tokens2)

1518

readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__

1519

tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]

1520

self.assertEqual(tokens2_from2, tokens2)

1521

# Reproduce tokens2 from 5-tuples

1522

bytes_from5 = untokenize(tokens5)

1523

readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__

1524

tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]

1525

self.assertEqual(tokens2_from5, tokens2)

1526

1527

def test_roundtrip(self):

1528

# There are some standard formatting practices that are easy to get right.

1529

1530

self.check_roundtrip("if x == 1:\n"

1531

" print(x)\n")

1532

self.check_roundtrip("# This is a comment\n"

Ammar Askar

2018-07-06 06:21:05 -0400

[diff] [blame]

1533

"# This also\n")

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1534

1535

# Some people use different formatting conventions, which makes

1536

# untokenize a little trickier. Note that this test involves trailing

1537

# whitespace after the colon. Note that we use hex escapes to make the

1538

# two trailing blanks apparent in the expected output.

1539

1540

self.check_roundtrip("if x == 1 : \n"

1541

" print(x)\n")

1542

fn = support.findfile("tokenize_tests.txt")

1543

with open(fn, 'rb') as f:

1544

self.check_roundtrip(f)

1545

self.check_roundtrip("if x == 1:\n"

1546

" # A comment by itself.\n"

1547

" print(x) # Comment here, too.\n"

1548

" # Another comment.\n"

1549

"after_if = True\n")

1550

self.check_roundtrip("if (x # The comments need to go in the right place\n"

1551

" == 1):\n"

1552

" print('x==1')\n")

1553

self.check_roundtrip("class Test: # A comment here\n"

1554

" # A comment with weird indent\n"

1555

" after_com = 5\n"

1556

" def x(m): return m*5 # a one liner\n"

1557

" def y(m): # A whitespace after the colon\n"

1558

" return y*4 # 3-space indent\n")

1559

1560

# Some error-handling code

1561

self.check_roundtrip("try: import somemodule\n"

1562

"except ImportError: # comment\n"

1563

" print('Can not import' # comment2\n)"

1564

"else: print('Loaded')\n")

1565

1566

def test_continuation(self):

1567

# Balancing continuation

1568

self.check_roundtrip("a = (3,4, \n"

"5,6)\n"

"y = [3, 4,\n"

"5]\n"

"z = {'a': 5,\n"

"'b':15, 'c':True}\n"

1574

"x = len(y) + 5 - a[\n"

"3] - a[2]\n"

"+ len(z) - z[\n"

"'b']\n")

def test_backslash_continuation(self):

1580

# Backslash means line continuation, except for comments

1581

self.check_roundtrip("x=1+\\\n"

1582

"1\n"

1583

"# This is a comment\\\n"

1584

"# This also\n")

1585

self.check_roundtrip("# Comment \\\n"

1586

"x = 0")

1587

1588

def test_string_concatenation(self):

1589

# Two string literals on the same line

1590

self.check_roundtrip("'' ''")

1591

1592

def test_random_files(self):

1593

# Test roundtrip on random python modules.

1594

# pass the '-ucpu' option to process the full directory.

1595

1596

import glob, random

1597

fn = support.findfile("tokenize_tests.txt")

1598

tempdir = os.path.dirname(fn) or os.curdir

1599

testfiles = glob.glob(os.path.join(tempdir, "test*.py"))

1600

Brett Cannon

2016-09-09 14:57:09 -0700

[diff] [blame]

1601

# Tokenize is broken on test_pep3131.py because regular expressions are

1602

# broken on the obscure unicode identifiers in it. *sigh*

1603

# With roundtrip extended to test the 5-tuple mode of untokenize,

1604

# 7 more testfiles fail. Remove them also until the failure is diagnosed.

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1605

Zachary Ware

724f6a6

2016-09-09 12:55:37 -0700

[diff] [blame]

1606

testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1607

for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):

1608

testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)

1609

1610

if not support.is_resource_enabled("cpu"):

1611

testfiles = random.sample(testfiles, 10)

1612

1613

for testfile in testfiles:

1614

with open(testfile, 'rb') as f:

1615

with self.subTest(file=testfile):

1616

self.check_roundtrip(f)

1617

1618

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1619

def roundtrip(self, code):

1620

if isinstance(code, str):

1621

code = code.encode('utf-8')

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1622

return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1623

1624

def test_indentation_semantics_retained(self):

1625

"""

1626

Ensure that although whitespace might be mutated in a roundtrip,

1627

the semantic meaning of the indentation remains consistent.

1628

"""

1629

code = "if False:\n\tx=3\n\tx=3\n"

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1630

codelines = self.roundtrip(code).split('\n')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1631

self.assertEqual(codelines[1], codelines[2])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1632

self.check_roundtrip(code)

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1633

1634

Thomas Wouters

49fd7fa

2006-04-21 10:40:58 +0000

[diff] [blame]

1635

if __name__ == "__main__":

Brett Cannon